Skip to content

Commit a0788f0

Browse files
authored
Merge branch 'main' into ODSC-76209/GPU-Shape-Recommendation
2 parents 9beb848 + 054b2fc commit a0788f0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+2920
-306
lines changed

ads/aqua/common/entities.py

Lines changed: 194 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
44

55
import re
6-
from typing import Any, Dict, List, Optional
6+
from typing import Any, Dict, List, Optional, Union
77

88
from oci.data_science.models import Model
99
from pydantic import BaseModel, ConfigDict, Field, model_validator
@@ -58,19 +58,76 @@ class ComputeRank(Serializable):
5858
None, description="The relative rank of the performance of the shape. Range is [10 (lower performance), 110 (highest performance)]"
5959
)
6060

61-
class GPUSpecs(Serializable):
61+
class ComputeRank(Serializable):
6262
"""
63-
Represents the GPU specifications for a compute instance.
63+
Represents the cost and performance rankings for a specific compute shape.
64+
These rankings help compare different shapes based on their relative pricing
65+
and computational capabilities.
6466
"""
6567

66-
gpu_memory_in_gbs: Optional[int] = Field(
67-
default=None, description="The amount of GPU memory available (in GB)."
68+
cost: Optional[int] = Field(
69+
None,
70+
description=(
71+
"Relative cost ranking of the compute shape. "
72+
"Value ranges from 10 (most cost-effective) to 100 (most expensive). "
73+
"Lower values indicate cheaper compute options."
74+
),
6875
)
76+
77+
performance: Optional[int] = Field(
78+
None,
79+
description=(
80+
"Relative performance ranking of the compute shape. "
81+
"Value ranges from 10 (lowest performance) to 110 (highest performance). "
82+
"Higher values indicate better compute performance."
83+
),
84+
)
85+
86+
87+
class GPUSpecs(Serializable):
88+
"""
89+
Represents the specifications and capabilities of a GPU-enabled compute shape.
90+
Includes details about GPU and CPU resources, supported quantization formats, and
91+
relative rankings for cost and performance.
92+
"""
93+
6994
gpu_count: Optional[int] = Field(
70-
default=None, description="The number of GPUs available."
95+
default=None,
96+
description="Number of physical GPUs available on the compute shape.",
97+
)
98+
99+
gpu_memory_in_gbs: Optional[int] = Field(
100+
default=None, description="Total GPU memory available in gigabytes (GB)."
71101
)
102+
72103
gpu_type: Optional[str] = Field(
73-
default=None, description="The type of GPU (e.g., 'V100, A100, H100')."
104+
default=None,
105+
description="Type of GPU and architecture. Example: 'H100', 'GB200'.",
106+
)
107+
108+
quantization: Optional[List[str]] = Field(
109+
default_factory=list,
110+
description=(
111+
"List of supported quantization formats for the GPU. "
112+
"Examples: 'fp16', 'int8', 'bitsandbytes', 'bf16', 'fp4', etc."
113+
),
114+
)
115+
116+
cpu_count: Optional[int] = Field(
117+
default=None, description="Number of CPU cores available on the shape."
118+
)
119+
120+
cpu_memory_in_gbs: Optional[int] = Field(
121+
default=None, description="Total CPU memory available in gigabytes (GB)."
122+
)
123+
124+
ranking: Optional[ComputeRank] = Field(
125+
default=None,
126+
description=(
127+
"Relative cost and performance rankings of this shape. "
128+
"Cost is ranked from 10 (least expensive) to 100+ (most expensive), "
129+
"and performance from 10 (lowest) to 100+ (highest)."
130+
),
74131
)
75132
quantization: Optional[List[str]] = Field(
76133
default_factory=list, description="The quantization format supported by shape. (ex. bitsandbytes, fp8, etc.)"
@@ -97,50 +154,50 @@ class GPUShapesIndex(Serializable):
97154

98155
class ComputeShapeSummary(Serializable):
99156
"""
100-
Represents the specifications of a compute instance shape,
101-
including CPU, memory, and optional GPU characteristics.
157+
Represents a compute shape's specification including CPU, memory, and (if applicable) GPU configuration.
102158
"""
103159

104160
available: Optional[bool] = Field(
105-
default = False,
106-
description="True if shape is available on user tenancy, "
161+
default=False,
162+
description="True if the shape is available in the user's tenancy/region.",
107163
)
164+
165+
108166
core_count: Optional[int] = Field(
109-
default=None,
110-
description="Total number of CPU cores available for the compute shape.",
167+
default=None, description="Number of vCPUs available for the compute shape."
111168
)
169+
112170
memory_in_gbs: Optional[int] = Field(
113-
default=None,
114-
description="Amount of memory (in GB) available for the compute shape.",
171+
default=None, description="Total CPU memory available for the shape (in GB)."
115172
)
173+
116174
name: Optional[str] = Field(
117-
default=None,
118-
description="Full name of the compute shape, e.g., 'VM.GPU.A10.2'.",
175+
default=None, description="Name of the compute shape, e.g., 'VM.GPU.A10.2'."
119176
)
177+
120178
shape_series: Optional[str] = Field(
121179
default=None,
122-
description="Shape family or series, e.g., 'GPU', 'Standard', etc.",
180+
description="Series or family of the shape, e.g., 'GPU', 'Standard'.",
123181
)
182+
124183
gpu_specs: Optional[GPUSpecs] = Field(
125-
default=None,
126-
description="Optional GPU specifications associated with the shape.",
184+
default=None, description="GPU configuration for the shape, if applicable."
127185
)
128186

129187
@model_validator(mode="after")
130188
@classmethod
131-
def set_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary":
189+
def populate_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary":
132190
"""
133-
Validates and populates GPU specifications if the shape_series indicates a GPU-based shape.
134-
135-
- If the shape_series contains "GPU", the validator first checks if the shape name exists
136-
in the GPU_SPECS dictionary. If found, it creates a GPUSpecs instance with the corresponding data.
137-
- If the shape is not found in the GPU_SPECS, it attempts to extract the GPU count from the shape name
138-
using a regex pattern (looking for a number following a dot at the end of the name).
191+
Attempts to populate GPU specs if the shape is GPU-based and no GPU specs are explicitly set.
139192
140-
The information about shapes is taken from: https://docs.oracle.com/en-us/iaas/data-science/using/supported-shapes.htm
193+
Logic:
194+
- If `shape_series` includes 'GPU' and `gpu_specs` is None:
195+
- Tries to parse the shape name to extract GPU count (e.g., from 'VM.GPU.A10.2').
196+
- Fallback is based on suffix numeric group (e.g., '.2' → gpu_count=2).
197+
- If extraction fails, logs debug-level error but does not raise.
141198
142199
Returns:
143-
ComputeShapeSummary: The updated instance with gpu_specs populated if applicable.
200+
ComputeShapeSummary: The updated model instance.
144201
"""
145202
try:
146203
if (
@@ -149,16 +206,15 @@ def set_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary":
149206
and model.name
150207
and not model.gpu_specs
151208
):
152-
# Try to extract gpu_count from the shape name using a regex (e.g., "VM.GPU3.2" -> gpu_count=2)
153209
match = re.search(r"\.(\d+)$", model.name)
154210
if match:
155211
gpu_count = int(match.group(1))
156212
model.gpu_specs = GPUSpecs(gpu_count=gpu_count)
157213
except Exception as err:
158214
logger.debug(
159-
f"Error occurred in attempt to extract GPU specification for the f{model.name}. "
160-
f"Details: {err}"
215+
f"[populate_gpu_specs] Failed to auto-populate GPU specs for shape '{model.name}': {err}"
161216
)
217+
162218
return model
163219

164220

@@ -207,55 +263,71 @@ class AquaMultiModelRef(Serializable):
207263
"""
208264
Lightweight model descriptor used for multi-model deployment.
209265
210-
This class only contains essential details
211-
required to fetch complete model metadata and deploy models.
266+
This class holds essential details required to fetch model metadata and deploy
267+
individual models as part of a multi-model deployment group.
212268
213269
Attributes
214270
----------
215271
model_id : str
216-
The unique identifier of the model.
272+
The unique identifier (OCID) of the base model.
217273
model_name : Optional[str]
218-
The name of the model.
274+
Optional name for the model.
219275
gpu_count : Optional[int]
220-
Number of GPUs required for deployment.
276+
Number of GPUs required to allocate for this model during deployment.
221277
model_task : Optional[str]
222-
The task that model operates on. Supported tasks are in MultiModelSupportedTaskType
278+
The machine learning task this model performs (e.g., text-generation, summarization).
279+
Supported values are listed in `MultiModelSupportedTaskType`.
223280
env_var : Optional[Dict[str, Any]]
224-
Optional environment variables to override during deployment.
281+
Optional dictionary of environment variables to inject into the runtime environment
282+
of the model container.
283+
params : Optional[Dict[str, Any]]
284+
Optional dictionary of container-specific inference parameters to override.
285+
These are typically framework-level flags required by the runtime backend.
286+
For example, in vLLM containers, valid params may include:
287+
`--tensor-parallel-size`, `--enforce-eager`, `--max-model-len`, etc.
225288
artifact_location : Optional[str]
226-
Artifact path of model in the multimodel group.
289+
Relative path or URI of the model artifact inside the multi-model group folder.
227290
fine_tune_weights : Optional[List[LoraModuleSpec]]
228-
For fine tuned models, the artifact path of the modified model weights
291+
List of fine-tuned weight artifacts (e.g., LoRA modules) associated with this model.
229292
"""
230293

231294
model_id: str = Field(..., description="The model OCID to deploy.")
232-
model_name: Optional[str] = Field(None, description="The name of model.")
295+
model_name: Optional[str] = Field(None, description="The name of the model.")
233296
gpu_count: Optional[int] = Field(
234-
None, description="The gpu count allocation for the model."
297+
None, description="The number of GPUs allocated for the model."
235298
)
236299
model_task: Optional[str] = Field(
237300
None,
238-
description="The task that model operates on. Supported tasks are in MultiModelSupportedTaskType",
301+
description="The task this model performs. See `MultiModelSupportedTaskType` for supported values.",
239302
)
240303
env_var: Optional[dict] = Field(
241-
default_factory=dict, description="The environment variables of the model."
304+
default_factory=dict,
305+
description="Environment variables to override during container startup.",
306+
)
307+
params: Optional[dict] = Field(
308+
default_factory=dict,
309+
description=(
310+
"Framework-specific startup parameters required by the container runtime. "
311+
"For example, vLLM models may use flags like `--tensor-parallel-size`, `--enforce-eager`, etc."
312+
),
242313
)
243314
artifact_location: Optional[str] = Field(
244-
None, description="Artifact path of model in the multimodel group."
315+
None,
316+
description="Path to the model artifact relative to the multi-model base folder.",
245317
)
246318
fine_tune_weights: Optional[List[LoraModuleSpec]] = Field(
247319
None,
248-
description="For fine tuned models, the artifact path of the modified model weights",
320+
description="List of fine-tuned weight modules (e.g., LoRA) associated with this base model.",
249321
)
250322

251323
def all_model_ids(self) -> List[str]:
252324
"""
253-
Returns all associated model OCIDs, including the base model and any fine-tuned models.
325+
Returns all model OCIDs associated with this reference, including fine-tuned weights.
254326
255327
Returns
256328
-------
257329
List[str]
258-
A list of all model OCIDs associated with this multi-model reference.
330+
A list containing the base model OCID and any fine-tuned module OCIDs.
259331
"""
260332
ids = {self.model_id}
261333
if self.fine_tune_weights:
@@ -264,8 +336,80 @@ def all_model_ids(self) -> List[str]:
264336
)
265337
return list(ids)
266338

339+
@model_validator(mode="before")
340+
@classmethod
341+
def extract_params_from_env_var(cls, values: Dict[str, Any]) -> Dict[str, Any]:
342+
"""
343+
A model-level validator that extracts `PARAMS` from the `env_var` dictionary
344+
and injects them into the `params` field as a dictionary.
345+
346+
This is useful for backward compatibility where users pass CLI-style
347+
parameters via environment variables, e.g.:
348+
env_var = { "PARAMS": "--max-model-len 65536 --enable-streaming" }
349+
350+
If `params` is already set, values from `PARAMS` in `env_var` are added
351+
only if they do not override existing keys.
352+
"""
353+
env = values.get("env_var", {})
354+
param_string = env.pop("PARAMS", None)
355+
356+
if param_string:
357+
parsed_params = cls._parse_params(params=param_string)
358+
existing_params = values.get("params", {}) or {}
359+
# Avoid overriding existing keys
360+
for k, v in parsed_params.items():
361+
if k not in existing_params:
362+
existing_params[k] = v
363+
values["params"] = existing_params
364+
values["env_var"] = env # cleaned up version without PARAMS
365+
366+
return values
367+
368+
@staticmethod
369+
def _parse_params(params: Union[str, List[str]]) -> Dict[str, str]:
370+
"""
371+
Parses CLI-style parameters into a dictionary format.
372+
373+
This method accepts either:
374+
- A single string of parameters (e.g., "--key1 val1 --key2 val2")
375+
- A list of strings (e.g., ["--key1", "val1", "--key2", "val2"])
376+
377+
Returns a dictionary of the form { "key1": "val1", "key2": "val2" }.
378+
379+
Parameters
380+
----------
381+
params : Union[str, List[str]]
382+
The parameters to parse. Can be a single string or a list of strings.
383+
384+
Returns
385+
-------
386+
Dict[str, str]
387+
Dictionary with parameter names as keys and their corresponding values as strings.
388+
"""
389+
if not params or not isinstance(params, (str, list)):
390+
return {}
391+
392+
# Normalize string to list of "--key value" strings
393+
if isinstance(params, str):
394+
params_list = [
395+
f"--{param.strip()}" for param in params.split("--") if param.strip()
396+
]
397+
else:
398+
params_list = params
399+
400+
parsed = {}
401+
for item in params_list:
402+
parts = item.strip().split()
403+
if not parts:
404+
continue
405+
key = parts[0]
406+
value = " ".join(parts[1:]) if len(parts) > 1 else ""
407+
parsed[key] = value
408+
409+
return parsed
410+
267411
class Config:
268-
extra = "ignore"
412+
extra = "allow"
269413
protected_namespaces = ()
270414

271415

ads/aqua/common/enums.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,12 @@ class Platform(ExtendedEnum):
123123
# - Key: The preferred container family to use when multiple compatible families are selected.
124124
# - Value: A list of all compatible families (including the preferred one).
125125
CONTAINER_FAMILY_COMPATIBILITY: Dict[str, List[str]] = {
126+
InferenceContainerTypeFamily.AQUA_VLLM_OPENAI_CONTAINER_FAMILY: [
127+
InferenceContainerTypeFamily.AQUA_VLLM_OPENAI_CONTAINER_FAMILY,
128+
InferenceContainerTypeFamily.AQUA_VLLM_LLAMA4_CONTAINER_FAMILY,
129+
InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY,
130+
InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY,
131+
],
126132
InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY: [
127133
InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY,
128134
InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY,

0 commit comments

Comments
 (0)