-
Notifications
You must be signed in to change notification settings - Fork 202
Add SGLang Router Support #3267
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
3c08dc7
f191132
23d3e69
e6c2bcb
7285b30
e905541
1206531
b278b1a
6510956
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| from enum import Enum | ||
| from typing import Union | ||
|
|
||
| from pydantic import Field | ||
| from typing_extensions import Annotated, Literal | ||
|
|
||
| from dstack._internal.core.models.common import CoreModel | ||
|
|
||
|
|
||
| class RouterType(str, Enum): | ||
| SGLANG = "sglang" | ||
| SGLANG_DEPRECATED = "sglang_deprecated" | ||
| SGLANG_NEW = "sglang_new" | ||
| VLLM = "vllm" | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. vLLM routers are not yet supported, yet vLLM settings will be visible to users in things like the automatically generated reference and IDE hints. I'd suggest to remove anything vLLM-related to avoid confusing users. |
||
|
|
||
|
|
||
| class SGLangRouterConfig(CoreModel): | ||
| type: Literal["sglang_deprecated"] = "sglang_deprecated" | ||
| policy: str = "cache_aware" | ||
|
|
||
|
|
||
| class SGLangNewRouterConfig(CoreModel): | ||
| type: Literal["sglang"] = "sglang" | ||
| policy: str = "cache_aware" | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Currently, setting I can suggest to make this field a Then the user will see a detailed configuration validation error when trying to create the gateway. |
||
|
|
||
|
|
||
| class VLLMRouterConfig(CoreModel): | ||
| type: Literal["vllm"] = "vllm" | ||
| policy: str = "cache_aware" | ||
|
|
||
|
|
||
| AnyRouterConfig = Annotated[ | ||
| Union[SGLangRouterConfig, SGLangNewRouterConfig, VLLMRouterConfig], Field(discriminator="type") | ||
| ] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| from typing import Optional | ||
|
|
||
| from dstack._internal.core.models.routers import AnyRouterConfig | ||
| from dstack._internal.proxy.gateway.model_routers.sglang import SglangRouter | ||
| from dstack._internal.proxy.gateway.model_routers.sglang_new import SglangRouterNew | ||
|
|
||
| from .base import Replica, Router, RouterContext | ||
|
|
||
|
|
||
| def get_router(router: AnyRouterConfig, context: Optional[RouterContext] = None) -> Router: | ||
| """Factory function to create a router instance from router configuration.""" | ||
| if router.type == "sglang": | ||
| return SglangRouterNew(router=router, context=context) | ||
| if router.type == "sglang_deprecated": | ||
| return SglangRouter(router=router, context=context) | ||
| if router.type == "sglang_new": | ||
| return SglangRouterNew(router=router, context=context) | ||
| raise ValueError(f"Router type '{router.type}' is not available") | ||
|
|
||
|
|
||
| __all__ = [ | ||
| "Router", | ||
| "RouterContext", | ||
| "Replica", | ||
| "get_router", | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,147 @@ | ||
| from abc import ABC, abstractmethod | ||
| from pathlib import Path | ||
| from typing import List, Literal, Optional | ||
|
|
||
| from pydantic import BaseModel | ||
|
|
||
| from dstack._internal.core.models.routers import AnyRouterConfig | ||
|
|
||
|
|
||
| class RouterContext(BaseModel): | ||
| """Context for router initialization and configuration.""" | ||
|
|
||
| class Config: | ||
| frozen = True | ||
|
|
||
| host: str = "127.0.0.1" | ||
| port: int = 3000 | ||
| log_dir: Path = Path("./router_logs") | ||
| log_level: Literal["debug", "info", "warning", "error"] = "info" | ||
|
|
||
|
|
||
| class Replica(BaseModel): | ||
| """Represents a single replica (worker) endpoint managed by the router. | ||
| The model field identifies which model this replica serves. | ||
| In SGLang, model = model_id (e.g., "meta-llama/Meta-Llama-3.1-8B-Instruct"). | ||
| """ | ||
|
|
||
| url: str # HTTP URL where the replica is accessible (e.g., "http://127.0.0.1:10001") | ||
| model: str # (e.g., "meta-llama/Meta-Llama-3.1-8B-Instruct") | ||
|
|
||
|
|
||
| class Router(ABC): | ||
| """Abstract base class for router implementations (e.g., SGLang, vLLM). | ||
| A router manages the lifecycle of worker replicas and handles request routing. | ||
| Different router implementations may have different mechanisms for managing | ||
| replicas. | ||
| """ | ||
|
|
||
| def __init__( | ||
| self, | ||
| router: Optional[AnyRouterConfig] = None, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (nit) It seems a bit confusing to me that to construct a Maybe |
||
| context: Optional[RouterContext] = None, | ||
| ): | ||
| """Initialize router with context. | ||
| Args: | ||
| router: Optional router configuration (implementation-specific) | ||
| context: Runtime context for the router (host, port, logging, etc.) | ||
| """ | ||
| self.context = context or RouterContext() | ||
|
|
||
| @abstractmethod | ||
| def start(self) -> None: | ||
| """Start the router process. | ||
| Raises: | ||
| Exception: If the router fails to start. | ||
| """ | ||
| ... | ||
|
|
||
| @abstractmethod | ||
| def stop(self) -> None: | ||
| """Stop the router process. | ||
| Raises: | ||
| Exception: If the router fails to stop. | ||
| """ | ||
| ... | ||
|
Comment on lines
+63
to
+70
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (nit) Unused, can delete here and in |
||
|
|
||
| @abstractmethod | ||
| def is_running(self) -> bool: | ||
| """Check if the router is currently running and responding. | ||
| Returns: | ||
| True if the router is running and healthy, False otherwise. | ||
| """ | ||
| ... | ||
|
|
||
| @abstractmethod | ||
| def register_replicas( | ||
| self, domain: str, num_replicas: int, model_id: Optional[str] = None | ||
| ) -> List[Replica]: | ||
| """Register replicas to a domain (allocate ports/URLs for workers). | ||
| Args: | ||
| domain: The domain name for this service. | ||
| num_replicas: The number of replicas to allocate for this domain. | ||
| model_id: Optional model identifier (e.g., "meta-llama/Meta-Llama-3.1-8B-Instruct"). | ||
| Required only for routers that support IGW (Inference Gateway) mode for multi-model serving. | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (nit) This method is supposed to return |
||
| Returns: | ||
| List of Replica objects with allocated URLs and model_id set (if provided). | ||
| Raises: | ||
| Exception: If allocation fails. | ||
| """ | ||
| ... | ||
|
|
||
| @abstractmethod | ||
| def unregister_replicas(self, domain: str) -> None: | ||
| """Unregister replicas for a domain (remove model and unassign all its replicas). | ||
| Args: | ||
| domain: The domain name for this service. | ||
| Raises: | ||
| Exception: If removal fails or domain is not found. | ||
| """ | ||
| ... | ||
|
|
||
| @abstractmethod | ||
| def add_replicas(self, replicas: List[Replica]) -> None: | ||
| """Register replicas with the router (actual API calls to add workers). | ||
| Args: | ||
| replicas: The list of replicas to add to router. | ||
| Raises: | ||
| Exception: If adding replicas fails. | ||
| """ | ||
| ... | ||
|
|
||
| @abstractmethod | ||
| def remove_replicas(self, replicas: List[Replica]) -> None: | ||
| """Unregister replicas from the router (actual API calls to remove workers). | ||
| Args: | ||
| replicas: The list of replicas to remove from router. | ||
| Raises: | ||
| Exception: If removing replicas fails. | ||
| """ | ||
| ... | ||
|
Comment on lines
+113
to
+135
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (nit) Unused, can delete here and in |
||
|
|
||
| @abstractmethod | ||
| def update_replicas(self, replicas: List[Replica]) -> None: | ||
| """Update replicas for service, replacing the current set. | ||
| Args: | ||
| replicas: The new list of replicas for this service. | ||
| Raises: | ||
| Exception: If updating replicas fails. | ||
| """ | ||
| ... | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(nit) It should be possible to install the correct extra with one pip command:
$ pip install "dstack-gateway[sglang] @ https://bihan-test-bucket.s3.eu-west-1.amazonaws.com/dstack_gateway-0.0.1-py3-none-any.whl"This should be slightly faster than calling pip twice.