dstackai · jvstme · Oct 28, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -32,7 +32,8 @@ dependencies = [
     "python-multipart>=0.0.16",
     "filelock",
     "psutil",
-    "gpuhunt==0.1.11",
+    # TODO: release and pin new version
+    "gpuhunt @ https://github.com/dstackai/gpuhunt/archive/refs/heads/hotaisle_store_specs_in_provider_data.zip",
     "argcomplete>=3.5.0",
     "ignore-python>=0.2.0",
     "orjson",
@@ -67,6 +68,9 @@ artifacts = [
     "src/dstack/_internal/server/statics/**",
 ]
 
+[tool.hatch.metadata]
+allow-direct-references = true  # TODO: unset
+
 [tool.hatch.metadata.hooks.fancy-pypi-readme]
 content-type = "text/markdown"
 

diff --git a/src/dstack/_internal/core/backends/hotaisle/compute.py b/src/dstack/_internal/core/backends/hotaisle/compute.py
@@ -2,7 +2,7 @@
 import subprocess
 import tempfile
 from threading import Thread
-from typing import List, Optional
+from typing import Any, List, Optional
 
 import gpuhunt
 from gpuhunt.providers.hotaisle import HotAisleProvider
@@ -22,6 +22,7 @@
 from dstack._internal.core.models.instances import (
     InstanceAvailability,
     InstanceConfiguration,
+    InstanceOffer,
     InstanceOfferWithAvailability,
 )
 from dstack._internal.core.models.placement import PlacementGroup
@@ -31,48 +32,7 @@
 logger = get_logger(__name__)
 
 
-INSTANCE_TYPE_SPECS = {
-    "1x MI300X 8x Xeon Platinum 8462Y+": {
-        "cpu_model": "Xeon Platinum 8462Y+",
-        "cpu_frequency": 2800000000,
-        "cpu_manufacturer": "Intel",
-    },
-    "1x MI300X 13x Xeon Platinum 8470": {
-        "cpu_model": "Xeon Platinum 8470",
-        "cpu_frequency": 2000000000,
-        "cpu_manufacturer": "Intel",
-    },
-    "2x MI300X 26x Xeon Platinum 8470": {
-        "cpu_model": "Xeon Platinum 8470",
-        "cpu_frequency": 2000000000,
-        "cpu_manufacturer": "Intel",
-    },
-    "2x MI300X 26x Xeon Platinum 8462Y+": {
-        "cpu_model": "Xeon Platinum 8462Y+",
-        "cpu_frequency": 2800000000,
-        "cpu_manufacturer": "Intel",
-    },
-    "4x MI300X 52x Xeon Platinum 8470": {
-        "cpu_model": "Xeon Platinum 8470",
-        "cpu_frequency": 2000000000,
-        "cpu_manufacturer": "Intel",
-    },
-    "4x MI300X 52x Xeon Platinum 8462Y+": {
-        "cpu_model": "Xeon Platinum 8462Y+",
-        "cpu_frequency": 2800000000,
-        "cpu_manufacturer": "Intel",
-    },
-    "8x MI300X 104x Xeon Platinum 8470": {
-        "cpu_model": "Xeon Platinum 8470",
-        "cpu_frequency": 2000000000,
-        "cpu_manufacturer": "Intel",
-    },
-    "8x MI300X 104x Xeon Platinum 8462Y+": {
-        "cpu_model": "Xeon Platinum 8462Y+",
-        "cpu_frequency": 2800000000,
-        "cpu_manufacturer": "Intel",
-    },
-}
+SUPPORTED_GPUS = ["MI300X"]
 
 
 class HotAisleCompute(
@@ -95,45 +55,15 @@ def get_all_offers_with_availability(self) -> List[InstanceOfferWithAvailability
             backend=BackendType.HOTAISLE,
             locations=self.config.regions or None,
             catalog=self.catalog,
+            extra_filter=_supported_instances,
         )
-        supported_offers = []
-        for offer in offers:
-            if offer.instance.name in INSTANCE_TYPE_SPECS:
-                supported_offers.append(
-                    InstanceOfferWithAvailability(
-                        **offer.dict(), availability=InstanceAvailability.AVAILABLE
-                    )
-                )
-            else:
-                logger.warning(
-                    f"Skipping unsupported Hot Aisle instance type: {offer.instance.name}"
-                )
-        return supported_offers
-
-    def get_payload_from_offer(self, instance_type) -> dict:
-        instance_type_name = instance_type.name
-        cpu_specs = INSTANCE_TYPE_SPECS[instance_type_name]
-        cpu_cores = instance_type.resources.cpus
-
-        return {
-            "cpu_cores": cpu_cores,
-            "cpus": {
-                "count": 1,
-                "manufacturer": cpu_specs["cpu_manufacturer"],
-                "model": cpu_specs["cpu_model"],
-                "cores": cpu_cores,
-                "frequency": cpu_specs["cpu_frequency"],
-            },
-            "disk_capacity": instance_type.resources.disk.size_mib * 1024**2,
-            "ram_capacity": instance_type.resources.memory_mib * 1024**2,
-            "gpus": [
-                {
-                    "count": len(instance_type.resources.gpus),
-                    "manufacturer": instance_type.resources.gpus[0].vendor,
-                    "model": instance_type.resources.gpus[0].name,
-                }
-            ],
-        }
+        return [
+            InstanceOfferWithAvailability(
+                **offer.dict(),
+                availability=InstanceAvailability.AVAILABLE,
+            )
+            for offer in offers
+        ]
 
     def create_instance(
         self,
@@ -143,8 +73,10 @@ def create_instance(
     ) -> JobProvisioningData:
         project_ssh_key = instance_config.ssh_keys[0]
         self.api_client.upload_ssh_key(project_ssh_key.public)
-        vm_payload = self.get_payload_from_offer(instance_offer.instance)
-        vm_data = self.api_client.create_virtual_machine(vm_payload)
+        offer_backend_data: HotAisleOfferBackendData = (
+            HotAisleOfferBackendData.__response__.parse_obj(instance_offer.backend_data)
+        )
+        vm_data = self.api_client.create_virtual_machine(offer_backend_data.vm_specs)
         return JobProvisioningData(
             backend=instance_offer.backend,
             instance_type=instance_offer.instance,
@@ -240,10 +172,20 @@ def _run_ssh_command(hostname: str, ssh_private_key: str, command: str):
         )
 
 
+def _supported_instances(offer: InstanceOffer) -> bool:
+    return len(offer.instance.resources.gpus) > 0 and all(
+        gpu.name in SUPPORTED_GPUS for gpu in offer.instance.resources.gpus
+    )
+
+
 class HotAisleInstanceBackendData(CoreModel):
     ip_address: str
 
     @classmethod
     def load(cls, raw: Optional[str]) -> "HotAisleInstanceBackendData":
         assert raw is not None
         return cls.__response__.parse_raw(raw)
+
+
+class HotAisleOfferBackendData(CoreModel):
+    vm_specs: dict[str, Any]