Skip to content

Commit f3f764e

Browse files
authored
fix: use hf id in dsr1 recipe to support DGDR (#4481)
Signed-off-by: hongkuanz <[email protected]>
1 parent 473cb57 commit f3f764e

File tree

2 files changed

+22
-60
lines changed

2 files changed

+22
-60
lines changed

recipes/deepseek-r1/sglang/disagg-16gpu/deploy.yaml

Lines changed: 10 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,11 @@ kind: DynamoGraphDeployment
66
metadata:
77
name: sgl-dsr1-16gpu
88
spec:
9+
envs:
10+
- name: HF_HOME
11+
value: /opt/model
912
pvcs:
10-
- name: model-cache-pvc
13+
- name: model-cache
1114
create: false
1215
services:
1316
Frontend:
@@ -16,13 +19,6 @@ spec:
1619
replicas: 1
1720
extraPodSpec:
1821
mainContainer:
19-
startupProbe:
20-
httpGet:
21-
path: /health
22-
port: 8000
23-
periodSeconds: 10
24-
timeoutSeconds: 1800
25-
failureThreshold: 60
2622
image: my-registry/sglang-runtime:my-tag
2723
decode:
2824
dynamoNamespace: sgl-dsr1-16gpu
@@ -34,19 +30,12 @@ spec:
3430
limits:
3531
gpu: "8"
3632
volumeMounts:
37-
- name: model-cache-pvc
38-
mountPoint: /model-cache
33+
- name: model-cache
34+
mountPoint: /opt/model
3935
sharedMemory:
4036
size: 80Gi
4137
extraPodSpec:
4238
mainContainer:
43-
startupProbe:
44-
httpGet:
45-
path: /health
46-
port: 9090
47-
periodSeconds: 10
48-
timeoutSeconds: 10
49-
failureThreshold: 600
5039
image: my-registry/sglang-runtime:my-tag
5140
workingDir: /sgl-workspace/dynamo
5241
command:
@@ -55,7 +44,7 @@ spec:
5544
- dynamo.sglang
5645
args:
5746
- --model-path
58-
- /model-cache/deepseek-r1
47+
- deepseek-ai/DeepSeek-R1
5948
- --served-model-name
6049
- deepseek-ai/DeepSeek-R1
6150
- --tp
@@ -86,19 +75,12 @@ spec:
8675
limits:
8776
gpu: "8"
8877
volumeMounts:
89-
- name: model-cache-pvc
90-
mountPoint: /model-cache
78+
- name: model-cache
79+
mountPoint: /opt/model
9180
sharedMemory:
9281
size: 80Gi
9382
extraPodSpec:
9483
mainContainer:
95-
startupProbe:
96-
httpGet:
97-
path: /health
98-
port: 9090
99-
periodSeconds: 10
100-
timeoutSeconds: 10
101-
failureThreshold: 600
10284
image: my-registry/sglang-runtime:my-tag
10385
workingDir: /sgl-workspace/dynamo
10486
command:
@@ -107,7 +89,7 @@ spec:
10789
- dynamo.sglang
10890
args:
10991
- --model-path
110-
- /model-cache/deepseek-r1
92+
- deepseek-ai/DeepSeek-R1
11193
- --served-model-name
11294
- deepseek-ai/DeepSeek-R1
11395
- --tp

recipes/deepseek-r1/sglang/disagg-8gpu/deploy.yaml

Lines changed: 12 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,11 @@ kind: DynamoGraphDeployment
66
metadata:
77
name: sgl-dsr1-8gpu
88
spec:
9+
envs:
10+
- name: HF_HOME
11+
value: /opt/model
912
pvcs:
10-
- name: model-cache-pvc
13+
- name: model-cache
1114
create: false
1215
services:
1316
Frontend:
@@ -16,13 +19,6 @@ spec:
1619
replicas: 1
1720
extraPodSpec:
1821
mainContainer:
19-
startupProbe:
20-
httpGet:
21-
path: /health
22-
port: 8000
23-
periodSeconds: 10
24-
timeoutSeconds: 1800
25-
failureThreshold: 60
2622
image: my-registry/sglang-runtime:my-tag
2723
decode:
2824
dynamoNamespace: sgl-dsr1-8gpu
@@ -32,28 +28,21 @@ spec:
3228
limits:
3329
gpu: "8"
3430
volumeMounts:
35-
- name: model-cache-pvc
36-
mountPoint: /model-cache
31+
- name: model-cache
32+
mountPoint: /opt/model
3733
sharedMemory:
3834
size: 80Gi
3935
extraPodSpec:
4036
mainContainer:
41-
startupProbe:
42-
httpGet:
43-
path: /health
44-
port: 9090
45-
periodSeconds: 10
46-
timeoutSeconds: 10
47-
failureThreshold: 600
4837
image: my-registry/sglang-runtime:my-tag
49-
workingDir: /sgl-workspace/dynamo
38+
workingDir: /workspace
5039
command:
5140
- python3
5241
- -m
5342
- dynamo.sglang
5443
args:
5544
- --model-path
56-
- /model-cache/deepseek-r1
45+
- deepseek-ai/DeepSeek-R1
5746
- --served-model-name
5847
- deepseek-ai/DeepSeek-R1
5948
- --tp
@@ -64,7 +53,6 @@ spec:
6453
- --ep-size
6554
- "8"
6655
- --trust-remote-code
67-
- --skip-tokenizer-init
6856
- --disaggregation-mode
6957
- decode
7058
- --disaggregation-bootstrap-port
@@ -80,36 +68,28 @@ spec:
8068
limits:
8169
gpu: "8"
8270
volumeMounts:
83-
- name: model-cache-pvc
84-
mountPoint: /model-cache
71+
- name: model-cache
72+
mountPoint: /opt/model
8573
sharedMemory:
8674
size: 80Gi
8775
extraPodSpec:
8876
mainContainer:
89-
startupProbe:
90-
httpGet:
91-
path: /health
92-
port: 9090
93-
periodSeconds: 10
94-
timeoutSeconds: 10
95-
failureThreshold: 600
9677
image: my-registry/sglang-runtime:my-tag
97-
workingDir: /sgl-workspace/dynamo
78+
workingDir: /workspace
9879
command:
9980
- python3
10081
- -m
10182
- dynamo.sglang
10283
args:
10384
- --model-path
104-
- /model-cache/deepseek-r1
85+
- deepseek-ai/DeepSeek-R1
10586
- --served-model-name
10687
- deepseek-ai/DeepSeek-R1
10788
- --tp
10889
- "8"
10990
- --ep-size
11091
- "8"
11192
- --trust-remote-code
112-
- --skip-tokenizer-init
11393
- --disaggregation-mode
11494
- prefill
11595
- --disaggregation-bootstrap-port

0 commit comments

Comments
 (0)