fix: clear model cache when run.yaml model list changes

Ygnas · Ygnas · commit 62d1d5964f3e · 2025-08-19T11:39:31.000+01:00
diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py
@@ -39,6 +39,7 @@
 class RegistryEntrySource(StrEnum):
     via_register_api = "via_register_api"
     listed_from_provider = "listed_from_provider"
+    from_config = "from_config"
 
 
 class User(BaseModel):
diff --git a/llama_stack/core/routing_tables/models.py b/llama_stack/core/routing_tables/models.py
@@ -74,6 +74,7 @@ async def register_model(
         provider_id: str | None = None,
         metadata: dict[str, Any] | None = None,
         model_type: ModelType | None = None,
+        source: RegistryEntrySource | None = None,
     ) -> Model:
         if provider_id is None:
             # If provider_id not specified, use the only provider if it supports this model
@@ -100,13 +101,15 @@ async def register_model(
         else:
             identifier = f"{provider_id}/{provider_model_id}"
 
+        source = source or RegistryEntrySource.via_register_api
+
         model = ModelWithOwner(
             identifier=identifier,
             provider_resource_id=provider_model_id,
             provider_id=provider_id,
             metadata=metadata,
             model_type=model_type,
-            source=RegistryEntrySource.via_register_api,
+            source=source,
         )
         registered_model = await self.register_object(model)
         return registered_model
@@ -130,7 +133,7 @@ async def update_registered_models(
         for model in existing_models:
             if model.provider_id != provider_id:
                 continue
-            if model.source == RegistryEntrySource.via_register_api:
+            if model.source in [RegistryEntrySource.via_register_api, RegistryEntrySource.from_config]:
                 model_ids[model.provider_resource_id] = model.identifier
                 continue
 
@@ -156,3 +159,45 @@ async def update_registered_models(
                     source=RegistryEntrySource.listed_from_provider,
                 )
             )
+
+    async def cleanup_ephemeral_models(self) -> None:
+        """Clean up models that should not persist across sessions."""
+        existing_models = await self.get_all_with_type("model")
+
+        for model in existing_models:
+            if model.source == RegistryEntrySource.listed_from_provider:
+                logger.debug(f"Cleaning up ephemeral provider model: {model.identifier}")
+                await self.unregister_object(model)
+                continue
+
+    async def cleanup_config_models(self) -> None:
+        """Clean up models that came from configuration (run.yaml)."""
+        existing_models = await self.get_all_with_type("model")
+
+        for model in existing_models:
+            if model.source == RegistryEntrySource.from_config:
+                logger.debug(f"Cleaning up config model: {model.identifier}")
+                await self.unregister_object(model)
+                continue
+
+    async def initialize(self) -> None:
+        """Initialize the models routing table with cleanup."""
+        # Clean up provider models from previous sessions
+        await self.cleanup_ephemeral_models()
+
+        # Also clean up config models from previous sessions
+        # This ensures we start with a clean slate for config models
+        await self.cleanup_config_models()
+
+        await super().initialize()
+
+    async def shutdown(self) -> None:
+        """Shutdown with cleanup of ephemeral models."""
+        # Clean up provider models before shutdown
+        existing_models = await self.get_all_with_type("model")
+
+        for model in existing_models:
+            if model.source == RegistryEntrySource.listed_from_provider:
+                await self.unregister_object(model)
+
+        await super().shutdown()
diff --git a/llama_stack/core/stack.py b/llama_stack/core/stack.py
@@ -34,7 +34,7 @@
 from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
 from llama_stack.apis.vector_dbs import VectorDBs
 from llama_stack.apis.vector_io import VectorIO
-from llama_stack.core.datatypes import Provider, StackRunConfig
+from llama_stack.core.datatypes import Provider, RegistryEntrySource, StackRunConfig
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
 from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
@@ -112,10 +112,25 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
                 logger.debug(f"Skipping {rsrc.capitalize()} registration for disabled provider.")
                 continue
 
-            # we want to maintain the type information in arguments to method.
-            # instead of method(**obj.model_dump()), which may convert a typed attr to a dict,
-            # we use model_dump() to find all the attrs and then getattr to get the still typed value.
-            await method(**{k: getattr(obj, k) for k in obj.model_dump().keys()})
+            # For models, use the register_model method with config source
+            if rsrc == "models":
+                logger.debug(
+                    f"Registering model from config: {obj.model_id} -> {obj.provider_model_id} via {obj.provider_id}"
+                )
+                await impls[api].register_model(
+                    model_id=obj.model_id,
+                    provider_model_id=obj.provider_model_id,
+                    provider_id=obj.provider_id,
+                    metadata=obj.metadata,
+                    model_type=obj.model_type,
+                    source=RegistryEntrySource.from_config,
+                )
+                logger.debug(f"Model registration completed for: {obj.model_id}")
+            else:
+                # we want to maintain the type information in arguments to method.
+                # instead of method(**obj.model_dump()), which may convert a typed attr to a dict,
+                # we use model_dump() to find all the attrs and then getattr to get the still typed value.
+                await method(**{k: getattr(obj, k) for k in obj.model_dump().keys()})
 
         method = getattr(impls[api], list_method)
         response = await method()
@@ -303,6 +318,14 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf
     impls[Api.providers] = providers_impl
 
 
+async def cleanup_provider_models_on_startup(impls: dict[Api, Any]) -> None:
+    """Clean up provider models from previous sessions on startup."""
+    routing_tables = [v for v in impls.values() if isinstance(v, CommonRoutingTableImpl)]
+    for routing_table in routing_tables:
+        if hasattr(routing_table, "cleanup_ephemeral_models"):
+            await routing_table.cleanup_ephemeral_models()
+
+
 # Produces a stack of providers for the given run config. Not all APIs may be
 # asked for in the run config.
 async def construct_stack(
@@ -328,6 +351,8 @@ async def construct_stack(
 
     await register_resources(run_config, impls)
 
+    # Clean up ephemeral models from previous sessions before first refresh
+    await cleanup_provider_models_on_startup(impls)
     await refresh_registry_once(impls)
 
     global REGISTRY_REFRESH_TASK