diff --git a/ads/aqua/config/config.py b/ads/aqua/config/config.py index 1cabc203c..1ccf2c703 100644 --- a/ads/aqua/config/config.py +++ b/ads/aqua/config/config.py @@ -40,6 +40,7 @@ def get_finetuning_config_defaults(): "VM.GPU.A10.2": {"batch_size": 1, "replica": "1-10"}, "BM.GPU.A10.4": {"batch_size": 1, "replica": 1}, "BM.GPU4.8": {"batch_size": 4, "replica": 1}, + "BM.GPU.L40S-NC.4": {"batch_size": 4, "replica": 1}, "BM.GPU.A100-v2.8": {"batch_size": 6, "replica": 1}, "BM.GPU.H100.8": {"batch_size": 6, "replica": 1}, } diff --git a/ads/aqua/config/deployment_config_defaults.json b/ads/aqua/config/deployment_config_defaults.json index 21572ee99..9caa8ef11 100644 --- a/ads/aqua/config/deployment_config_defaults.json +++ b/ads/aqua/config/deployment_config_defaults.json @@ -30,6 +30,7 @@ "VM.GPU.A10.2", "BM.GPU.A10.4", "BM.GPU4.8", + "BM.GPU.L40S-NC.4", "BM.GPU.A100-v2.8", "BM.GPU.H100.8", "VM.Standard.A1.Flex" diff --git a/ads/aqua/config/resource_limit_names.json b/ads/aqua/config/resource_limit_names.json index d3e23370e..3aabcfaee 100644 --- a/ads/aqua/config/resource_limit_names.json +++ b/ads/aqua/config/resource_limit_names.json @@ -2,6 +2,7 @@ "BM.GPU.A10.4": "ds-gpu-a10-count", "BM.GPU.A100-v2.8": "ds-gpu-a100-v2-count", "BM.GPU.H100.8": "ds-gpu-h100-count", + "BM.GPU.L40S-NC.4": "ds-gpu-l40s-nc-count", "BM.GPU4.8": "ds-gpu4-count", "VM.GPU.A10.1": "ds-gpu-a10-count", "VM.GPU.A10.2": "ds-gpu-a10-count" diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py index 654e00dc8..8ce6ab320 100644 --- a/ads/aqua/modeldeployment/deployment.py +++ b/ads/aqua/modeldeployment/deployment.py @@ -46,7 +46,6 @@ from ads.config import ( AQUA_CONFIG_FOLDER, AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME, - AQUA_DEPLOYMENT_CONTAINER_OVERRIDE_FLAG_METADATA_NAME, AQUA_MODEL_DEPLOYMENT_CONFIG, AQUA_MODEL_DEPLOYMENT_CONFIG_DEFAULTS, COMPARTMENT_OCID, @@ -87,26 +86,27 @@ class AquaDeploymentApp(AquaApp): @telemetry(entry_point="plugin=deployment&action=create", name="aqua") def create( - self, - model_id: str, - instance_shape: str, - display_name: str, - instance_count: int = None, - log_group_id: str = None, - access_log_id: str = None, - predict_log_id: str = None, - compartment_id: str = None, - project_id: str = None, - description: str = None, - bandwidth_mbps: int = None, - web_concurrency: int = None, - server_port: int = None, - health_check_port: int = None, - env_var: Dict = None, - container_family: str = None, - memory_in_gbs: Optional[float] = None, - ocpus: Optional[float] = None, - model_file: Optional[str] = None, + self, + model_id: str, + instance_shape: str, + display_name: str, + instance_count: int = None, + log_group_id: str = None, + access_log_id: str = None, + predict_log_id: str = None, + compartment_id: str = None, + project_id: str = None, + description: str = None, + bandwidth_mbps: int = None, + web_concurrency: int = None, + server_port: int = None, + health_check_port: int = None, + env_var: Dict = None, + container_family: str = None, + memory_in_gbs: Optional[float] = None, + ocpus: Optional[float] = None, + model_file: Optional[str] = None, + cmd_var: List[str] = None, ) -> "AquaDeployment": """ Creates a new Aqua deployment @@ -153,6 +153,8 @@ def create( The ocpu count for the shape selected. model_file: str The file used for model deployment. + cmd_var: List[str] + The cmd of model deployment container runtime. Returns ------- AquaDeployment @@ -231,8 +233,7 @@ def create( env_var.update({"FT_MODEL": f"{fine_tune_output_path}"}) container_type_key = self._get_container_type_key( - model=aqua_model, - container_family=container_family + model=aqua_model, container_family=container_family ) # fetch image name from config @@ -248,7 +249,11 @@ def create( model_format = model_formats_str.split(",") # Figure out a better way to handle this in future release - if ModelFormat.GGUF.value in model_format and container_type_key.lower() == InferenceContainerTypeFamily.AQUA_LLAMA_CPP_CONTAINER_FAMILY: + if ( + ModelFormat.GGUF.value in model_format + and container_type_key.lower() + == InferenceContainerTypeFamily.AQUA_LLAMA_CPP_CONTAINER_FAMILY + ): if model_file is not None: logger.info( f"Overriding {model_file} as model_file for model {aqua_model.id}." @@ -299,8 +304,8 @@ def create( if user_params: # todo: remove this check in the future version, logic to be moved to container_index if ( - container_type_key.lower() - == InferenceContainerTypeFamily.AQUA_LLAMA_CPP_CONTAINER_FAMILY + container_type_key.lower() + == InferenceContainerTypeFamily.AQUA_LLAMA_CPP_CONTAINER_FAMILY ): # AQUA_LLAMA_CPP_CONTAINER_FAMILY container uses uvicorn that required model/server params # to be set as env vars @@ -369,6 +374,8 @@ def create( .with_overwrite_existing_artifact(True) .with_remove_existing_artifact(True) ) + if cmd_var: + container_runtime.with_cmd(cmd_var) # configure model deployment and deploy model on container runtime deployment = ( @@ -422,9 +429,8 @@ def _get_container_type_key(model: DataScienceModel, container_family: str) -> s f"for model {model.id}. For unverified Aqua models, {AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME} should be" f"set and value can be one of {', '.join(InferenceContainerTypeFamily.values())}." ) from err - + return container_type_key - @telemetry(entry_point="plugin=deployment&action=list", name="aqua") def list(self, **kwargs) -> List["AquaDeployment"]: @@ -453,8 +459,8 @@ def list(self, **kwargs) -> List["AquaDeployment"]: for model_deployment in model_deployments: oci_aqua = ( ( - Tags.AQUA_TAG in model_deployment.freeform_tags - or Tags.AQUA_TAG.lower() in model_deployment.freeform_tags + Tags.AQUA_TAG in model_deployment.freeform_tags + or Tags.AQUA_TAG.lower() in model_deployment.freeform_tags ) if model_deployment.freeform_tags else False @@ -508,8 +514,8 @@ def get(self, model_deployment_id: str, **kwargs) -> "AquaDeploymentDetail": oci_aqua = ( ( - Tags.AQUA_TAG in model_deployment.freeform_tags - or Tags.AQUA_TAG.lower() in model_deployment.freeform_tags + Tags.AQUA_TAG in model_deployment.freeform_tags + or Tags.AQUA_TAG.lower() in model_deployment.freeform_tags ) if model_deployment.freeform_tags else False @@ -526,8 +532,8 @@ def get(self, model_deployment_id: str, **kwargs) -> "AquaDeploymentDetail": log_group_name = "" logs = ( - model_deployment.category_log_details.access - or model_deployment.category_log_details.predict + model_deployment.category_log_details.access + or model_deployment.category_log_details.predict ) if logs: log_id = logs.log_id @@ -582,9 +588,9 @@ def get_deployment_config(self, model_id: str) -> Dict: return config def get_deployment_default_params( - self, - model_id: str, - instance_shape: str, + self, + model_id: str, + instance_shape: str, ) -> List[str]: """Gets the default params set in the deployment configs for the given model and instance shape. @@ -616,8 +622,8 @@ def get_deployment_default_params( ) if ( - container_type_key - and container_type_key in InferenceContainerTypeFamily.values() + container_type_key + and container_type_key in InferenceContainerTypeFamily.values() ): deployment_config = self.get_deployment_config(model_id) config_params = ( @@ -640,10 +646,10 @@ def get_deployment_default_params( return default_params def validate_deployment_params( - self, - model_id: str, - params: List[str] = None, - container_family: str = None, + self, + model_id: str, + params: List[str] = None, + container_family: str = None, ) -> Dict: """Validate if the deployment parameters passed by the user can be overridden. Parameter values are not validated, only param keys are validated. @@ -666,8 +672,7 @@ def validate_deployment_params( if params: model = DataScienceModel.from_id(model_id) container_type_key = self._get_container_type_key( - model=model, - container_family=container_family + model=model, container_family=container_family ) container_config = get_container_config() @@ -689,9 +694,9 @@ def validate_deployment_params( @staticmethod def _find_restricted_params( - default_params: Union[str, List[str]], - user_params: Union[str, List[str]], - container_family: str, + default_params: Union[str, List[str]], + user_params: Union[str, List[str]], + container_family: str, ) -> List[str]: """Returns a list of restricted params that user chooses to override when creating an Aqua deployment. The default parameters coming from the container index json file cannot be overridden. diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index c600dee05..5d61acc16 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -2,6 +2,12 @@ Release Notes ============= +2.12.2 +------- +Release date: October 16, 2024 + +* Introduced enhancements for AI Quick Actions. + 2.12.1 ------- Release date: October 10, 2024 diff --git a/pyproject.toml b/pyproject.toml index 1b8f15ade..d1bd85131 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ build-backend = "flit_core.buildapi" # Required name = "oracle_ads" # the install (PyPI) name; name for local build in [tool.flit.module] section below -version = "2.12.1" +version = "2.12.2" # Optional description = "Oracle Accelerated Data Science SDK"