oracle
diff --git a/‎ads/aqua/common/entities.py‎
Lines changed: 194 additions & 50 deletions b/‎ads/aqua/common/entities.py‎
Lines changed: 194 additions & 50 deletions
diff --git a/‎ads/aqua/common/enums.py‎
Lines changed: 6 additions & 0 deletions b/‎ads/aqua/common/enums.py‎
Lines changed: 6 additions & 0 deletions
@@ -3,7 +3,7 @@
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 import re
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union
 
 from oci.data_science.models import Model
 from pydantic import BaseModel, ConfigDict, Field, model_validator
@@ -58,19 +58,76 @@ class ComputeRank(Serializable):
     None, description="The relative rank of the performance of the shape. Range is [10 (lower performance), 110 (highest performance)]"
     )
 
-class GPUSpecs(Serializable):
+class ComputeRank(Serializable):
     """
-    Represents the GPU specifications for a compute instance.
+    Represents the cost and performance rankings for a specific compute shape.
+    These rankings help compare different shapes based on their relative pricing
+    and computational capabilities.
     """
 
-    gpu_memory_in_gbs: Optional[int] = Field(
-        default=None, description="The amount of GPU memory available (in GB)."
+    cost: Optional[int] = Field(
+        None,
+        description=(
+            "Relative cost ranking of the compute shape. "
+            "Value ranges from 10 (most cost-effective) to 100 (most expensive). "
+            "Lower values indicate cheaper compute options."
+        ),
     )
+
+    performance: Optional[int] = Field(
+        None,
+        description=(
+            "Relative performance ranking of the compute shape. "
+            "Value ranges from 10 (lowest performance) to 110 (highest performance). "
+            "Higher values indicate better compute performance."
+        ),
+    )
+
+
+class GPUSpecs(Serializable):
+    """
+    Represents the specifications and capabilities of a GPU-enabled compute shape.
+    Includes details about GPU and CPU resources, supported quantization formats, and
+    relative rankings for cost and performance.
+    """
+
     gpu_count: Optional[int] = Field(
-        default=None, description="The number of GPUs available."
+        default=None,
+        description="Number of physical GPUs available on the compute shape.",
+    )
+
+    gpu_memory_in_gbs: Optional[int] = Field(
+        default=None, description="Total GPU memory available in gigabytes (GB)."
     )
+
     gpu_type: Optional[str] = Field(
-        default=None, description="The type of GPU (e.g., 'V100, A100, H100')."
+        default=None,
+        description="Type of GPU and architecture. Example: 'H100', 'GB200'.",
+    )
+
+    quantization: Optional[List[str]] = Field(
+        default_factory=list,
+        description=(
+            "List of supported quantization formats for the GPU. "
+            "Examples: 'fp16', 'int8', 'bitsandbytes', 'bf16', 'fp4', etc."
+        ),
+    )
+
+    cpu_count: Optional[int] = Field(
+        default=None, description="Number of CPU cores available on the shape."
+    )
+
+    cpu_memory_in_gbs: Optional[int] = Field(
+        default=None, description="Total CPU memory available in gigabytes (GB)."
+    )
+
+    ranking: Optional[ComputeRank] = Field(
+        default=None,
+        description=(
+            "Relative cost and performance rankings of this shape. "
+            "Cost is ranked from 10 (least expensive) to 100+ (most expensive), "
+            "and performance from 10 (lowest) to 100+ (highest)."
+        ),
     )
     quantization: Optional[List[str]] = Field(
         default_factory=list, description="The quantization format supported by shape. (ex.  bitsandbytes, fp8, etc.)"
@@ -97,50 +154,50 @@ class GPUShapesIndex(Serializable):
 
 class ComputeShapeSummary(Serializable):
     """
-    Represents the specifications of a compute instance shape,
-    including CPU, memory, and optional GPU characteristics.
+    Represents a compute shape's specification including CPU, memory, and (if applicable) GPU configuration.
     """
 
     available: Optional[bool] = Field(
-        default = False,
-        description="True if shape is available on user tenancy, "
+        default=False,
+        description="True if the shape is available in the user's tenancy/region.",
     )
+
+
     core_count: Optional[int] = Field(
-        default=None,
-        description="Total number of CPU cores available for the compute shape.",
+        default=None, description="Number of vCPUs available for the compute shape."
     )
+
     memory_in_gbs: Optional[int] = Field(
-        default=None,
-        description="Amount of memory (in GB) available for the compute shape.",
+        default=None, description="Total CPU memory available for the shape (in GB)."
     )
+
     name: Optional[str] = Field(
-        default=None,
-        description="Full name of the compute shape, e.g., 'VM.GPU.A10.2'.",
+        default=None, description="Name of the compute shape, e.g., 'VM.GPU.A10.2'."
     )
+
     shape_series: Optional[str] = Field(
         default=None,
-        description="Shape family or series, e.g., 'GPU', 'Standard', etc.",
+        description="Series or family of the shape, e.g., 'GPU', 'Standard'.",
     )
+
     gpu_specs: Optional[GPUSpecs] = Field(
-        default=None,
-        description="Optional GPU specifications associated with the shape.",
+        default=None, description="GPU configuration for the shape, if applicable."
     )
 
     @model_validator(mode="after")
     @classmethod
-    def set_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary":
+    def populate_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary":
         """
-        Validates and populates GPU specifications if the shape_series indicates a GPU-based shape.
-
-        - If the shape_series contains "GPU", the validator first checks if the shape name exists
-          in the GPU_SPECS dictionary. If found, it creates a GPUSpecs instance with the corresponding data.
-        - If the shape is not found in the GPU_SPECS, it attempts to extract the GPU count from the shape name
-          using a regex pattern (looking for a number following a dot at the end of the name).
+        Attempts to populate GPU specs if the shape is GPU-based and no GPU specs are explicitly set.
 
-        The information about shapes is taken from: https://docs.oracle.com/en-us/iaas/data-science/using/supported-shapes.htm
+        Logic:
+        - If `shape_series` includes 'GPU' and `gpu_specs` is None:
+          - Tries to parse the shape name to extract GPU count (e.g., from 'VM.GPU.A10.2').
+          - Fallback is based on suffix numeric group (e.g., '.2' → gpu_count=2).
+        - If extraction fails, logs debug-level error but does not raise.
 
         Returns:
-            ComputeShapeSummary: The updated instance with gpu_specs populated if applicable.
+            ComputeShapeSummary: The updated model instance.
         """
         try:
             if (
@@ -149,16 +206,15 @@ def set_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary":
                 and model.name
                 and not model.gpu_specs
             ):
-                # Try to extract gpu_count from the shape name using a regex (e.g., "VM.GPU3.2" -> gpu_count=2)
                 match = re.search(r"\.(\d+)$", model.name)
                 if match:
                     gpu_count = int(match.group(1))
                     model.gpu_specs = GPUSpecs(gpu_count=gpu_count)
         except Exception as err:
             logger.debug(
-                f"Error occurred in attempt to extract GPU specification for the f{model.name}. "
-                f"Details: {err}"
+                f"[populate_gpu_specs] Failed to auto-populate GPU specs for shape '{model.name}': {err}"
             )
+
         return model
 
 
@@ -207,55 +263,71 @@ class AquaMultiModelRef(Serializable):
     """
     Lightweight model descriptor used for multi-model deployment.
 
-    This class only contains essential details
-    required to fetch complete model metadata and deploy models.
+    This class holds essential details required to fetch model metadata and deploy
+    individual models as part of a multi-model deployment group.
 
     Attributes
     ----------
     model_id : str
-        The unique identifier of the model.
+        The unique identifier (OCID) of the base model.
     model_name : Optional[str]
-        The name of the model.
+        Optional name for the model.
     gpu_count : Optional[int]
-        Number of GPUs required for deployment.
+        Number of GPUs required to allocate for this model during deployment.
     model_task : Optional[str]
-        The task that model operates on. Supported tasks are in MultiModelSupportedTaskType
+        The machine learning task this model performs (e.g., text-generation, summarization).
+        Supported values are listed in `MultiModelSupportedTaskType`.
     env_var : Optional[Dict[str, Any]]
-        Optional environment variables to override during deployment.
+        Optional dictionary of environment variables to inject into the runtime environment
+        of the model container.
+    params : Optional[Dict[str, Any]]
+        Optional dictionary of container-specific inference parameters to override.
+        These are typically framework-level flags required by the runtime backend.
+        For example, in vLLM containers, valid params may include:
+        `--tensor-parallel-size`, `--enforce-eager`, `--max-model-len`, etc.
     artifact_location : Optional[str]
-        Artifact path of model in the multimodel group.
+        Relative path or URI of the model artifact inside the multi-model group folder.
     fine_tune_weights : Optional[List[LoraModuleSpec]]
-        For fine tuned models, the artifact path of the modified model weights
+        List of fine-tuned weight artifacts (e.g., LoRA modules) associated with this model.
     """
 
     model_id: str = Field(..., description="The model OCID to deploy.")
-    model_name: Optional[str] = Field(None, description="The name of model.")
+    model_name: Optional[str] = Field(None, description="The name of the model.")
     gpu_count: Optional[int] = Field(
-        None, description="The gpu count allocation for the model."
+        None, description="The number of GPUs allocated for the model."
     )
     model_task: Optional[str] = Field(
         None,
-        description="The task that model operates on. Supported tasks are in MultiModelSupportedTaskType",
+        description="The task this model performs. See `MultiModelSupportedTaskType` for supported values.",
     )
     env_var: Optional[dict] = Field(
-        default_factory=dict, description="The environment variables of the model."
+        default_factory=dict,
+        description="Environment variables to override during container startup.",
+    )
+    params: Optional[dict] = Field(
+        default_factory=dict,
+        description=(
+            "Framework-specific startup parameters required by the container runtime. "
+            "For example, vLLM models may use flags like `--tensor-parallel-size`, `--enforce-eager`, etc."
+        ),
     )
     artifact_location: Optional[str] = Field(
-        None, description="Artifact path of model in the multimodel group."
+        None,
+        description="Path to the model artifact relative to the multi-model base folder.",
     )
     fine_tune_weights: Optional[List[LoraModuleSpec]] = Field(
         None,
-        description="For fine tuned models, the artifact path of the modified model weights",
+        description="List of fine-tuned weight modules (e.g., LoRA) associated with this base model.",
     )
 
     def all_model_ids(self) -> List[str]:
         """
-        Returns all associated model OCIDs, including the base model and any fine-tuned models.
+        Returns all model OCIDs associated with this reference, including fine-tuned weights.
 
         Returns
         -------
         List[str]
-            A list of all model OCIDs associated with this multi-model reference.
+            A list containing the base model OCID and any fine-tuned module OCIDs.
         """
         ids = {self.model_id}
         if self.fine_tune_weights:
@@ -264,8 +336,80 @@ def all_model_ids(self) -> List[str]:
             )
         return list(ids)
 
+    @model_validator(mode="before")
+    @classmethod
+    def extract_params_from_env_var(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        A model-level validator that extracts `PARAMS` from the `env_var` dictionary
+        and injects them into the `params` field as a dictionary.
+
+        This is useful for backward compatibility where users pass CLI-style
+        parameters via environment variables, e.g.:
+        env_var = { "PARAMS": "--max-model-len 65536 --enable-streaming" }
+
+        If `params` is already set, values from `PARAMS` in `env_var` are added
+        only if they do not override existing keys.
+        """
+        env = values.get("env_var", {})
+        param_string = env.pop("PARAMS", None)
+
+        if param_string:
+            parsed_params = cls._parse_params(params=param_string)
+            existing_params = values.get("params", {}) or {}
+            # Avoid overriding existing keys
+            for k, v in parsed_params.items():
+                if k not in existing_params:
+                    existing_params[k] = v
+            values["params"] = existing_params
+            values["env_var"] = env  # cleaned up version without PARAMS
+
+        return values
+
+    @staticmethod
+    def _parse_params(params: Union[str, List[str]]) -> Dict[str, str]:
+        """
+        Parses CLI-style parameters into a dictionary format.
+
+        This method accepts either:
+        - A single string of parameters (e.g., "--key1 val1 --key2 val2")
+        - A list of strings (e.g., ["--key1", "val1", "--key2", "val2"])
+
+        Returns a dictionary of the form { "key1": "val1", "key2": "val2" }.
+
+        Parameters
+        ----------
+        params : Union[str, List[str]]
+            The parameters to parse. Can be a single string or a list of strings.
+
+        Returns
+        -------
+        Dict[str, str]
+            Dictionary with parameter names as keys and their corresponding values as strings.
+        """
+        if not params or not isinstance(params, (str, list)):
+            return {}
+
+        # Normalize string to list of "--key value" strings
+        if isinstance(params, str):
+            params_list = [
+                f"--{param.strip()}" for param in params.split("--") if param.strip()
+            ]
+        else:
+            params_list = params
+
+        parsed = {}
+        for item in params_list:
+            parts = item.strip().split()
+            if not parts:
+                continue
+            key = parts[0]
+            value = " ".join(parts[1:]) if len(parts) > 1 else ""
+            parsed[key] = value
+
+        return parsed
+
     class Config:
-        extra = "ignore"
+        extra = "allow"
         protected_namespaces = ()
 
 
 
@@ -123,6 +123,12 @@ class Platform(ExtendedEnum):
 #   - Key: The preferred container family to use when multiple compatible families are selected.
 #   - Value: A list of all compatible families (including the preferred one).
 CONTAINER_FAMILY_COMPATIBILITY: Dict[str, List[str]] = {
+    InferenceContainerTypeFamily.AQUA_VLLM_OPENAI_CONTAINER_FAMILY: [
+        InferenceContainerTypeFamily.AQUA_VLLM_OPENAI_CONTAINER_FAMILY,
+        InferenceContainerTypeFamily.AQUA_VLLM_LLAMA4_CONTAINER_FAMILY,
+        InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY,
+        InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY,
+    ],
     InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY: [
         InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY,
         InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY,