33# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
44
55import re
6- from typing import Any , Dict , List , Optional
6+ from typing import Any , Dict , List , Optional , Union
77
88from oci .data_science .models import Model
99from pydantic import BaseModel , ConfigDict , Field , model_validator
@@ -58,19 +58,76 @@ class ComputeRank(Serializable):
5858 None , description = "The relative rank of the performance of the shape. Range is [10 (lower performance), 110 (highest performance)]"
5959 )
6060
61- class GPUSpecs (Serializable ):
61+ class ComputeRank (Serializable ):
6262 """
63- Represents the GPU specifications for a compute instance.
63+ Represents the cost and performance rankings for a specific compute shape.
64+ These rankings help compare different shapes based on their relative pricing
65+ and computational capabilities.
6466 """
6567
66- gpu_memory_in_gbs : Optional [int ] = Field (
67- default = None , description = "The amount of GPU memory available (in GB)."
68+ cost : Optional [int ] = Field (
69+ None ,
70+ description = (
71+ "Relative cost ranking of the compute shape. "
72+ "Value ranges from 10 (most cost-effective) to 100 (most expensive). "
73+ "Lower values indicate cheaper compute options."
74+ ),
6875 )
76+
77+ performance : Optional [int ] = Field (
78+ None ,
79+ description = (
80+ "Relative performance ranking of the compute shape. "
81+ "Value ranges from 10 (lowest performance) to 110 (highest performance). "
82+ "Higher values indicate better compute performance."
83+ ),
84+ )
85+
86+
87+ class GPUSpecs (Serializable ):
88+ """
89+ Represents the specifications and capabilities of a GPU-enabled compute shape.
90+ Includes details about GPU and CPU resources, supported quantization formats, and
91+ relative rankings for cost and performance.
92+ """
93+
6994 gpu_count : Optional [int ] = Field (
70- default = None , description = "The number of GPUs available."
95+ default = None ,
96+ description = "Number of physical GPUs available on the compute shape." ,
97+ )
98+
99+ gpu_memory_in_gbs : Optional [int ] = Field (
100+ default = None , description = "Total GPU memory available in gigabytes (GB)."
71101 )
102+
72103 gpu_type : Optional [str ] = Field (
73- default = None , description = "The type of GPU (e.g., 'V100, A100, H100')."
104+ default = None ,
105+ description = "Type of GPU and architecture. Example: 'H100', 'GB200'." ,
106+ )
107+
108+ quantization : Optional [List [str ]] = Field (
109+ default_factory = list ,
110+ description = (
111+ "List of supported quantization formats for the GPU. "
112+ "Examples: 'fp16', 'int8', 'bitsandbytes', 'bf16', 'fp4', etc."
113+ ),
114+ )
115+
116+ cpu_count : Optional [int ] = Field (
117+ default = None , description = "Number of CPU cores available on the shape."
118+ )
119+
120+ cpu_memory_in_gbs : Optional [int ] = Field (
121+ default = None , description = "Total CPU memory available in gigabytes (GB)."
122+ )
123+
124+ ranking : Optional [ComputeRank ] = Field (
125+ default = None ,
126+ description = (
127+ "Relative cost and performance rankings of this shape. "
128+ "Cost is ranked from 10 (least expensive) to 100+ (most expensive), "
129+ "and performance from 10 (lowest) to 100+ (highest)."
130+ ),
74131 )
75132 quantization : Optional [List [str ]] = Field (
76133 default_factory = list , description = "The quantization format supported by shape. (ex. bitsandbytes, fp8, etc.)"
@@ -97,50 +154,50 @@ class GPUShapesIndex(Serializable):
97154
98155class ComputeShapeSummary (Serializable ):
99156 """
100- Represents the specifications of a compute instance shape,
101- including CPU, memory, and optional GPU characteristics.
157+ Represents a compute shape's specification including CPU, memory, and (if applicable) GPU configuration.
102158 """
103159
104160 available : Optional [bool ] = Field (
105- default = False ,
106- description = "True if shape is available on user tenancy, "
161+ default = False ,
162+ description = "True if the shape is available in the user's tenancy/region." ,
107163 )
164+
165+
108166 core_count : Optional [int ] = Field (
109- default = None ,
110- description = "Total number of CPU cores available for the compute shape." ,
167+ default = None , description = "Number of vCPUs available for the compute shape."
111168 )
169+
112170 memory_in_gbs : Optional [int ] = Field (
113- default = None ,
114- description = "Amount of memory (in GB) available for the compute shape." ,
171+ default = None , description = "Total CPU memory available for the shape (in GB)."
115172 )
173+
116174 name : Optional [str ] = Field (
117- default = None ,
118- description = "Full name of the compute shape, e.g., 'VM.GPU.A10.2'." ,
175+ default = None , description = "Name of the compute shape, e.g., 'VM.GPU.A10.2'."
119176 )
177+
120178 shape_series : Optional [str ] = Field (
121179 default = None ,
122- description = "Shape family or series , e.g., 'GPU', 'Standard', etc ." ,
180+ description = "Series or family of the shape , e.g., 'GPU', 'Standard'." ,
123181 )
182+
124183 gpu_specs : Optional [GPUSpecs ] = Field (
125- default = None ,
126- description = "Optional GPU specifications associated with the shape." ,
184+ default = None , description = "GPU configuration for the shape, if applicable."
127185 )
128186
129187 @model_validator (mode = "after" )
130188 @classmethod
131- def set_gpu_specs (cls , model : "ComputeShapeSummary" ) -> "ComputeShapeSummary" :
189+ def populate_gpu_specs (cls , model : "ComputeShapeSummary" ) -> "ComputeShapeSummary" :
132190 """
133- Validates and populates GPU specifications if the shape_series indicates a GPU-based shape.
134-
135- - If the shape_series contains "GPU", the validator first checks if the shape name exists
136- in the GPU_SPECS dictionary. If found, it creates a GPUSpecs instance with the corresponding data.
137- - If the shape is not found in the GPU_SPECS, it attempts to extract the GPU count from the shape name
138- using a regex pattern (looking for a number following a dot at the end of the name).
191+ Attempts to populate GPU specs if the shape is GPU-based and no GPU specs are explicitly set.
139192
140- The information about shapes is taken from: https://docs.oracle.com/en-us/iaas/data-science/using/supported-shapes.htm
193+ Logic:
194+ - If `shape_series` includes 'GPU' and `gpu_specs` is None:
195+ - Tries to parse the shape name to extract GPU count (e.g., from 'VM.GPU.A10.2').
196+ - Fallback is based on suffix numeric group (e.g., '.2' → gpu_count=2).
197+ - If extraction fails, logs debug-level error but does not raise.
141198
142199 Returns:
143- ComputeShapeSummary: The updated instance with gpu_specs populated if applicable .
200+ ComputeShapeSummary: The updated model instance .
144201 """
145202 try :
146203 if (
@@ -149,16 +206,15 @@ def set_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary":
149206 and model .name
150207 and not model .gpu_specs
151208 ):
152- # Try to extract gpu_count from the shape name using a regex (e.g., "VM.GPU3.2" -> gpu_count=2)
153209 match = re .search (r"\.(\d+)$" , model .name )
154210 if match :
155211 gpu_count = int (match .group (1 ))
156212 model .gpu_specs = GPUSpecs (gpu_count = gpu_count )
157213 except Exception as err :
158214 logger .debug (
159- f"Error occurred in attempt to extract GPU specification for the f{ model .name } . "
160- f"Details: { err } "
215+ f"[populate_gpu_specs] Failed to auto-populate GPU specs for shape '{ model .name } ': { err } "
161216 )
217+
162218 return model
163219
164220
@@ -207,55 +263,71 @@ class AquaMultiModelRef(Serializable):
207263 """
208264 Lightweight model descriptor used for multi-model deployment.
209265
210- This class only contains essential details
211- required to fetch complete model metadata and deploy models .
266+ This class holds essential details required to fetch model metadata and deploy
267+ individual models as part of a multi-model deployment group .
212268
213269 Attributes
214270 ----------
215271 model_id : str
216- The unique identifier of the model.
272+ The unique identifier (OCID) of the base model.
217273 model_name : Optional[str]
218- The name of the model.
274+ Optional name for the model.
219275 gpu_count : Optional[int]
220- Number of GPUs required for deployment.
276+ Number of GPUs required to allocate for this model during deployment.
221277 model_task : Optional[str]
222- The task that model operates on. Supported tasks are in MultiModelSupportedTaskType
278+ The machine learning task this model performs (e.g., text-generation, summarization).
279+ Supported values are listed in `MultiModelSupportedTaskType`.
223280 env_var : Optional[Dict[str, Any]]
224- Optional environment variables to override during deployment.
281+ Optional dictionary of environment variables to inject into the runtime environment
282+ of the model container.
283+ params : Optional[Dict[str, Any]]
284+ Optional dictionary of container-specific inference parameters to override.
285+ These are typically framework-level flags required by the runtime backend.
286+ For example, in vLLM containers, valid params may include:
287+ `--tensor-parallel-size`, `--enforce-eager`, `--max-model-len`, etc.
225288 artifact_location : Optional[str]
226- Artifact path of model in the multimodel group.
289+ Relative path or URI of the model artifact inside the multi-model group folder .
227290 fine_tune_weights : Optional[List[LoraModuleSpec]]
228- For fine tuned models, the artifact path of the modified model weights
291+ List of fine- tuned weight artifacts (e.g., LoRA modules) associated with this model.
229292 """
230293
231294 model_id : str = Field (..., description = "The model OCID to deploy." )
232- model_name : Optional [str ] = Field (None , description = "The name of model." )
295+ model_name : Optional [str ] = Field (None , description = "The name of the model." )
233296 gpu_count : Optional [int ] = Field (
234- None , description = "The gpu count allocation for the model."
297+ None , description = "The number of GPUs allocated for the model."
235298 )
236299 model_task : Optional [str ] = Field (
237300 None ,
238- description = "The task that model operates on. Supported tasks are in MultiModelSupportedTaskType " ,
301+ description = "The task this model performs. See `MultiModelSupportedTaskType` for supported values. " ,
239302 )
240303 env_var : Optional [dict ] = Field (
241- default_factory = dict , description = "The environment variables of the model."
304+ default_factory = dict ,
305+ description = "Environment variables to override during container startup." ,
306+ )
307+ params : Optional [dict ] = Field (
308+ default_factory = dict ,
309+ description = (
310+ "Framework-specific startup parameters required by the container runtime. "
311+ "For example, vLLM models may use flags like `--tensor-parallel-size`, `--enforce-eager`, etc."
312+ ),
242313 )
243314 artifact_location : Optional [str ] = Field (
244- None , description = "Artifact path of model in the multimodel group."
315+ None ,
316+ description = "Path to the model artifact relative to the multi-model base folder." ,
245317 )
246318 fine_tune_weights : Optional [List [LoraModuleSpec ]] = Field (
247319 None ,
248- description = "For fine tuned models, the artifact path of the modified model weights " ,
320+ description = "List of fine- tuned weight modules (e.g., LoRA) associated with this base model. " ,
249321 )
250322
251323 def all_model_ids (self ) -> List [str ]:
252324 """
253- Returns all associated model OCIDs, including the base model and any fine-tuned models .
325+ Returns all model OCIDs associated with this reference, including fine-tuned weights .
254326
255327 Returns
256328 -------
257329 List[str]
258- A list of all model OCIDs associated with this multi-model reference .
330+ A list containing the base model OCID and any fine-tuned module OCIDs .
259331 """
260332 ids = {self .model_id }
261333 if self .fine_tune_weights :
@@ -264,8 +336,80 @@ def all_model_ids(self) -> List[str]:
264336 )
265337 return list (ids )
266338
339+ @model_validator (mode = "before" )
340+ @classmethod
341+ def extract_params_from_env_var (cls , values : Dict [str , Any ]) -> Dict [str , Any ]:
342+ """
343+ A model-level validator that extracts `PARAMS` from the `env_var` dictionary
344+ and injects them into the `params` field as a dictionary.
345+
346+ This is useful for backward compatibility where users pass CLI-style
347+ parameters via environment variables, e.g.:
348+ env_var = { "PARAMS": "--max-model-len 65536 --enable-streaming" }
349+
350+ If `params` is already set, values from `PARAMS` in `env_var` are added
351+ only if they do not override existing keys.
352+ """
353+ env = values .get ("env_var" , {})
354+ param_string = env .pop ("PARAMS" , None )
355+
356+ if param_string :
357+ parsed_params = cls ._parse_params (params = param_string )
358+ existing_params = values .get ("params" , {}) or {}
359+ # Avoid overriding existing keys
360+ for k , v in parsed_params .items ():
361+ if k not in existing_params :
362+ existing_params [k ] = v
363+ values ["params" ] = existing_params
364+ values ["env_var" ] = env # cleaned up version without PARAMS
365+
366+ return values
367+
368+ @staticmethod
369+ def _parse_params (params : Union [str , List [str ]]) -> Dict [str , str ]:
370+ """
371+ Parses CLI-style parameters into a dictionary format.
372+
373+ This method accepts either:
374+ - A single string of parameters (e.g., "--key1 val1 --key2 val2")
375+ - A list of strings (e.g., ["--key1", "val1", "--key2", "val2"])
376+
377+ Returns a dictionary of the form { "key1": "val1", "key2": "val2" }.
378+
379+ Parameters
380+ ----------
381+ params : Union[str, List[str]]
382+ The parameters to parse. Can be a single string or a list of strings.
383+
384+ Returns
385+ -------
386+ Dict[str, str]
387+ Dictionary with parameter names as keys and their corresponding values as strings.
388+ """
389+ if not params or not isinstance (params , (str , list )):
390+ return {}
391+
392+ # Normalize string to list of "--key value" strings
393+ if isinstance (params , str ):
394+ params_list = [
395+ f"--{ param .strip ()} " for param in params .split ("--" ) if param .strip ()
396+ ]
397+ else :
398+ params_list = params
399+
400+ parsed = {}
401+ for item in params_list :
402+ parts = item .strip ().split ()
403+ if not parts :
404+ continue
405+ key = parts [0 ]
406+ value = " " .join (parts [1 :]) if len (parts ) > 1 else ""
407+ parsed [key ] = value
408+
409+ return parsed
410+
267411 class Config :
268- extra = "ignore "
412+ extra = "allow "
269413 protected_namespaces = ()
270414
271415
0 commit comments