fix: clear model cache when run.yaml model list changes

Ygnas · Ygnas · commit 41b55467577e · 2025-09-18T11:30:12.000+01:00
diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py
@@ -40,6 +40,7 @@
 class RegistryEntrySource(StrEnum):
     via_register_api = "via_register_api"
     listed_from_provider = "listed_from_provider"
+    from_config = "from_config"
 
 
 class User(BaseModel):
diff --git a/llama_stack/core/routing_tables/common.py b/llama_stack/core/routing_tables/common.py
@@ -254,6 +254,13 @@ async def lookup_model(routing_table: CommonRoutingTableImpl, model_id: str) ->
     if model is not None:
         return model
 
+    # Check from_config models if this is a ModelsRoutingTable
+    if hasattr(routing_table, "_generate_from_config_models"):
+        from_config_models = routing_table._generate_from_config_models()
+        for from_config_model in from_config_models:
+            if from_config_model.identifier == model_id:
+                return from_config_model
+
     logger.warning(
         f"WARNING: model identifier '{model_id}' not found in routing table. Falling back to "
         "searching in all providers. This is only for backwards compatibility and will stop working "
diff --git a/llama_stack/core/routing_tables/models.py b/llama_stack/core/routing_tables/models.py
@@ -12,6 +12,7 @@
 from llama_stack.core.datatypes import (
     ModelWithOwner,
     RegistryEntrySource,
+    StackRunConfig,
 )
 from llama_stack.log import get_logger
 
@@ -22,6 +23,7 @@
 
 class ModelsRoutingTable(CommonRoutingTableImpl, Models):
     listed_providers: set[str] = set()
+    current_run_config: "StackRunConfig | None" = None
 
     async def refresh(self) -> None:
         for provider_id, provider in self.impls_by_provider_id.items():
@@ -43,10 +45,26 @@ async def refresh(self) -> None:
             await self.update_registered_models(provider_id, models)
 
     async def list_models(self) -> ListModelsResponse:
-        return ListModelsResponse(data=await self.get_all_with_type("model"))
+        # Get persistent models from registry
+        persistent_models = await self.get_all_with_type("model")
+
+        # Generate from_config models dynamically
+        from_config_models = self._generate_from_config_models()
+
+        # Combine both lists
+        all_models = persistent_models + from_config_models
+
+        return ListModelsResponse(data=all_models)
 
     async def openai_list_models(self) -> OpenAIListModelsResponse:
-        models = await self.get_all_with_type("model")
+        # Get persistent models from registry
+        persistent_models = await self.get_all_with_type("model")
+
+        # Generate from_config models dynamically
+        from_config_models = self._generate_from_config_models()
+
+        # Combine both lists
+        models = persistent_models + from_config_models
         openai_models = [
             OpenAIModel(
                 id=model.identifier,
@@ -74,6 +92,7 @@ async def register_model(
         provider_id: str | None = None,
         metadata: dict[str, Any] | None = None,
         model_type: ModelType | None = None,
+        source: RegistryEntrySource = RegistryEntrySource.via_register_api,
     ) -> Model:
         if provider_id is None:
             # If provider_id not specified, use the only provider if it supports this model
@@ -106,7 +125,7 @@ async def register_model(
             provider_id=provider_id,
             metadata=metadata,
             model_type=model_type,
-            source=RegistryEntrySource.via_register_api,
+            source=source,
         )
         registered_model = await self.register_object(model)
         return registered_model
@@ -117,6 +136,39 @@ async def unregister_model(self, model_id: str) -> None:
             raise ModelNotFoundError(model_id)
         await self.unregister_object(existing_model)
 
+    def set_run_config(self, run_config: "StackRunConfig") -> None:
+        """Set the current run configuration for generating from_config models."""
+        self.current_run_config = run_config
+
+    def _generate_from_config_models(self) -> list[ModelWithOwner]:
+        """Generate from_config models from the current run configuration."""
+        if not self.current_run_config:
+            return []
+
+        from_config_models = []
+        for model_input in self.current_run_config.models:
+            # Skip models with disabled providers
+            if not model_input.provider_id or model_input.provider_id == "__disabled__":
+                continue
+
+            # Generate identifier
+            if model_input.model_id != (model_input.provider_model_id or model_input.model_id):
+                identifier = model_input.model_id
+            else:
+                identifier = f"{model_input.provider_id}/{model_input.provider_model_id or model_input.model_id}"
+
+            model = ModelWithOwner(
+                identifier=identifier,
+                provider_resource_id=model_input.provider_model_id or model_input.model_id,
+                provider_id=model_input.provider_id,
+                metadata=model_input.metadata,
+                model_type=model_input.model_type or ModelType.llm,
+                source=RegistryEntrySource.from_config,
+            )
+            from_config_models.append(model)
+
+        return from_config_models
+
     async def update_registered_models(
         self,
         provider_id: str,
@@ -133,6 +185,10 @@ async def update_registered_models(
             if model.source == RegistryEntrySource.via_register_api:
                 model_ids[model.provider_resource_id] = model.identifier
                 continue
+            # Also preserve from_config models - they should not be unregistered during refresh
+            if model.source == RegistryEntrySource.from_config:
+                model_ids[model.provider_resource_id] = model.identifier
+                continue
 
             logger.debug(f"unregistering model {model.identifier}")
             await self.unregister_object(model)
diff --git a/llama_stack/core/stack.py b/llama_stack/core/stack.py
@@ -35,7 +35,7 @@
 from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
 from llama_stack.apis.vector_dbs import VectorDBs
 from llama_stack.apis.vector_io import VectorIO
-from llama_stack.core.datatypes import Provider, StackRunConfig
+from llama_stack.core.datatypes import Provider, RegistryEntrySource, StackRunConfig
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
 from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
@@ -101,6 +101,11 @@ class LlamaStack(
 
 
 async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
+    # Set the run config on the models routing table for generating from_config models
+    if Api.models in impls:
+        models_impl = impls[Api.models]
+        models_impl.set_run_config(run_config)
+
     for rsrc, api, register_method, list_method in RESOURCES:
         objects = getattr(run_config, rsrc)
         if api not in impls:
@@ -118,7 +123,13 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
             # we want to maintain the type information in arguments to method.
             # instead of method(**obj.model_dump()), which may convert a typed attr to a dict,
             # we use model_dump() to find all the attrs and then getattr to get the still typed value.
-            await method(**{k: getattr(obj, k) for k in obj.model_dump().keys()})
+            kwargs = {k: getattr(obj, k) for k in obj.model_dump().keys()}
+
+            # For models, add source=from_config to indicate they come from run.yaml
+            if rsrc == "models":
+                kwargs["source"] = RegistryEntrySource.from_config
+
+            await method(**kwargs)
 
         method = getattr(impls[api], list_method)
         response = await method()