From 5494524f921844b881a9062889ee24df7a706e61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Speglich?= Date: Mon, 23 Jun 2025 19:11:01 +0000 Subject: [PATCH 1/2] feature: add OCIGenAIChatCompletions client --- pyproject.toml | 14 ++-- src/llmperf/common.py | 3 + src/llmperf/ray_clients/oci_client.py | 113 ++++++++++++++++++++++++++ 3 files changed, 123 insertions(+), 7 deletions(-) create mode 100644 src/llmperf/ray_clients/oci_client.py diff --git a/pyproject.toml b/pyproject.toml index 7687fb2..d5c015b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,13 +9,13 @@ description = "A framework for load testing LLM APIs" authors = [{name="Avnish Narayan", email="avnish@anyscale.com"}] license = {text= "Apache-2.0"} requires-python = ">=3.8, <3.11" -dependencies = ["pydantic<2.5", - "ray", - "pytest>=6.0", - "seaborn>=0.11", - "awscli>=1.22", - "typer>=0.4", - "litellm>=0.1.738", +dependencies = ["pydantic", + "ray", + "pytest>=6.0", + "seaborn>=0.11", + "awscli>=1.22", + "typer>=0.4", + "litellm>=0.1.738", "num2words", "transformers", "tqdm", diff --git a/src/llmperf/common.py b/src/llmperf/common.py index 3efefa1..2003a02 100644 --- a/src/llmperf/common.py +++ b/src/llmperf/common.py @@ -6,6 +6,7 @@ from llmperf.ray_clients.sagemaker_client import SageMakerClient from llmperf.ray_clients.vertexai_client import VertexAIClient from llmperf.ray_llm_client import LLMClient +from llmperf.ray_clients.oci_client import OCIGenAIChatCompletionsClient SUPPORTED_APIS = ["openai", "anthropic", "litellm"] @@ -28,6 +29,8 @@ def construct_clients(llm_api: str, num_clients: int) -> List[LLMClient]: clients = [SageMakerClient.remote() for _ in range(num_clients)] elif llm_api == "vertexai": clients = [VertexAIClient.remote() for _ in range(num_clients)] + elif llm_api == "oci": + clients = [OCIGenAIChatCompletionsClient.remote() for _ in range(num_clients)] elif llm_api in SUPPORTED_APIS: clients = [LiteLLMClient.remote() for _ in range(num_clients)] else: diff --git a/src/llmperf/ray_clients/oci_client.py b/src/llmperf/ray_clients/oci_client.py new file mode 100644 index 0000000..d2d1650 --- /dev/null +++ b/src/llmperf/ray_clients/oci_client.py @@ -0,0 +1,113 @@ +import json +import os +import time +from typing import Any, Dict + +import ray +import requests + +from llmperf.ray_llm_client import LLMClient +from llmperf.models import RequestConfig +from llmperf import common_metrics + +from langchain_community.chat_models import ChatOCIGenAI +from langchain_core.messages import HumanMessage + +@ray.remote +class OCIGenAIChatCompletionsClient(LLMClient): + """Client for OCIGenAI Chat Completions API.""" + + def llm_request(self, request_config: RequestConfig) -> Dict[str, Any]: + prompt = request_config.prompt + max_tokens = request_config.sampling_params.get("max_tokens", 600) + prompt, prompt_len = prompt + + time_to_next_token = [] + tokens_received = 0 + ttft = 0 + error_response_code = -1 + generated_text = "" + error_msg = "" + output_throughput = 0 + total_request_time = 0 + character_count = 0 + + metrics = {} + + metrics[common_metrics.ERROR_CODE] = None + metrics[common_metrics.ERROR_MSG] = "" + + self.compartment_id = os.getenv("OCI_COMPARTMENT_ID") + self.auth_type = os.getenv("OCI_AUTH_TYPE") + self.config_profile = os.getenv("OCI_CONFIG_PROFILE") + self.endpoint = os.getenv("OCI_ENDPOINT") + + self.model_id = os.getenv("OCI_MODEL_ID") + self.provider = os.getenv("OCI_PROVIDER") + + if not self.compartment_id or not self.auth_type or not self.config_profile or not self.endpoint or not self.model_id: + print(self.compartment_id, self.auth_type, self.config_profile, self.endpoint, self.model_id, self.provider, flush=True) + raise ValueError( + "Environment variables OCI_COMPARTMENT_ID, OCI_AUTH_TYPE, " + "OCI_CONFIG_PROFILE, OCI_ENDPOINT, OCI_MODEL_ID and OCI_PROVIDER must be set." + ) + + self.chat = ChatOCIGenAI( + model_id=self.model_id, + service_endpoint=self.endpoint, + compartment_id=self.compartment_id, + provider=self.provider, + is_stream=True, + model_kwargs={ + "temperature": 1, + "frequency_penalty": 0, + "max_tokens": max_tokens, + "presence_penalty": 0, + "top_p": 0.75 + }, + auth_type=self.auth_type, + auth_profile=self.config_profile, + ) + + start_time = time.monotonic() + most_recent_received_token_time = time.monotonic() + + try: + messages = [ + HumanMessage(content=prompt), + ] + + response_stream = self.chat.stream(messages) + + for chunk in response_stream: + if hasattr(chunk, "content") and chunk.content: + token = chunk.content + if not ttft: + ttft = time.monotonic() - start_time + else: + time_to_next_token.append(time.monotonic() - most_recent_received_token_time) + most_recent_received_token_time = time.monotonic() + + generated_text += token + tokens_received += 1 + character_count += len(token) + + total_request_time = time.monotonic() - start_time + + output_throughput = tokens_received / total_request_time + + except Exception as e: + metrics[common_metrics.ERROR_MSG] = error_msg + metrics[common_metrics.ERROR_CODE] = error_response_code + print(f"Warning Or Error: {e}") + print(error_response_code) + + metrics[common_metrics.INTER_TOKEN_LAT] = sum(time_to_next_token) #This should be same as metrics[common_metrics.E2E_LAT]. Leave it here for now + metrics[common_metrics.TTFT] = ttft + metrics[common_metrics.E2E_LAT] = total_request_time + metrics[common_metrics.REQ_OUTPUT_THROUGHPUT] = output_throughput + metrics[common_metrics.NUM_TOTAL_TOKENS] = tokens_received + prompt_len + metrics[common_metrics.NUM_OUTPUT_TOKENS] = tokens_received + metrics[common_metrics.NUM_INPUT_TOKENS] = prompt_len + + return metrics, generated_text, request_config From fe02b447cce9de953bb8f54848e616ba59a7dec5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Speglich?= Date: Mon, 23 Jun 2025 19:18:06 +0000 Subject: [PATCH 2/2] fix: add missing oci dependency --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d5c015b..428ca6f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,4 +20,5 @@ dependencies = ["pydantic", "transformers", "tqdm", "boto3", - "google-cloud-aiplatform"] + "google-cloud-aiplatform", + "oci"]