@@ -17,100 +17,100 @@ on:
1717 type : string
1818
1919jobs :
20- e2e :
21- name : singlecard
22- runs-on : ${{ inputs.runner }}-1
23- container :
24- image : ${{ inputs.image }}
25- env :
26- VLLM_LOGGING_LEVEL : ERROR
27- VLLM_USE_MODELSCOPE : True
28- steps :
29- - name : Check npu and CANN info
30- run : |
31- npu-smi info
32- cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
33-
34- - name : Config mirrors
35- run : |
36- sed -Ei 's@(ports|archive)[email protected] :8081@g' /etc/apt/sources.list 37- pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
38- pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
39- apt-get update -y
40- apt install git -y
41-
42- - name : Checkout vllm-project/vllm-ascend repo
43- uses : actions/checkout@v6
44-
45- - name : Install system dependencies
46- run : |
47- apt-get -y install `cat packages.txt`
48- apt-get -y install gcc g++ cmake libnuma-dev
49-
50- - name : Checkout vllm-project/vllm repo
51- uses : actions/checkout@v6
52- with :
53- repository : vllm-project/vllm
54- ref : ${{ inputs.vllm }}
55- path : ./vllm-empty
56- fetch-depth : 1
57-
58- - name : Install vllm-project/vllm from source
59- working-directory : ./vllm-empty
60- run : |
61- VLLM_TARGET_DEVICE=empty pip install -e .
62-
63- - name : Install vllm-project/vllm-ascend
64- env :
65- PIP_EXTRA_INDEX_URL : https://mirrors.huaweicloud.com/ascend/repos/pypi
66- run : |
67- pip install -r requirements-dev.txt
68- pip install -v -e .
69-
70- - name : Run vllm-project/vllm-ascend test
71- env :
72- VLLM_WORKER_MULTIPROC_METHOD : spawn
73- VLLM_USE_MODELSCOPE : True
74- PYTORCH_NPU_ALLOC_CONF : max_split_size_mb:256
75- if : ${{ inputs.type == 'light' }}
76- run : |
77- # pytest -sv tests/e2e/singlecard/test_aclgraph.py
78- # pytest -sv tests/e2e/singlecard/test_quantization.py
79- pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
80-
81- - name : Run e2e test
82- env :
83- VLLM_WORKER_MULTIPROC_METHOD : spawn
84- VLLM_USE_MODELSCOPE : True
85- PYTORCH_NPU_ALLOC_CONF : max_split_size_mb:256
86- if : ${{ inputs.type == 'full' }}
87- run : |
88- # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
89- # the test separately.
90-
91- pytest -sv tests/e2e/singlecard/test_completion_with_prompt_embeds.py
92- pytest -sv tests/e2e/singlecard/test_aclgraph.py
93- pytest -sv tests/e2e/singlecard/test_aclgraph_mem.py
94- pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py
95- pytest -sv tests/e2e/singlecard/test_bge_model.py
96- pytest -sv tests/e2e/singlecard/test_camem.py
97- pytest -sv tests/e2e/singlecard/test_chunked.py
98- pytest -sv tests/e2e/singlecard/test_embedding.py
99- # pytest -sv tests/e2e/singlecard/test_embedding_aclgraph.py
100- pytest -sv tests/e2e/singlecard/test_guided_decoding.py
101- # torch 2.8 doesn't work with lora, fix me
102- #pytest -sv tests/e2e/singlecard/test_ilama_lora.py
103- pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
104- pytest -sv tests/e2e/singlecard/test_quantization.py
105- pytest -sv tests/e2e/singlecard/test_sampler.py
106- pytest -sv tests/e2e/singlecard/test_vlm.py
107- pytest -sv tests/e2e/singlecard/multi-modal/test_internvl.py
108-
109- # ------------------------------------ v1 spec decode test ------------------------------------ #
110- pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
111- pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
112- # Fix me: test_eagle_correctness OOM error
113- pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
20+ # e2e:
21+ # name: singlecard
22+ # runs-on: ${{ inputs.runner }}-1
23+ # container:
24+ # image: ${{ inputs.image }}
25+ # env:
26+ # VLLM_LOGGING_LEVEL: ERROR
27+ # VLLM_USE_MODELSCOPE: True
28+ # steps:
29+ # - name: Check npu and CANN info
30+ # run: |
31+ # npu-smi info
32+ # cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
33+
34+ # - name: Config mirrors
35+ # run: |
36+ # sed -Ei 's@(ports|archive)[email protected] :8081@g' /etc/apt/sources.list 37+ # pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
38+ # pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
39+ # apt-get update -y
40+ # apt install git -y
41+
42+ # - name: Checkout vllm-project/vllm-ascend repo
43+ # uses: actions/checkout@v6
44+
45+ # - name: Install system dependencies
46+ # run: |
47+ # apt-get -y install `cat packages.txt`
48+ # apt-get -y install gcc g++ cmake libnuma-dev
49+
50+ # - name: Checkout vllm-project/vllm repo
51+ # uses: actions/checkout@v6
52+ # with:
53+ # repository: vllm-project/vllm
54+ # ref: ${{ inputs.vllm }}
55+ # path: ./vllm-empty
56+ # fetch-depth: 1
57+
58+ # - name: Install vllm-project/vllm from source
59+ # working-directory: ./vllm-empty
60+ # run: |
61+ # VLLM_TARGET_DEVICE=empty pip install -e .
62+
63+ # - name: Install vllm-project/vllm-ascend
64+ # env:
65+ # PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
66+ # run: |
67+ # pip install -r requirements-dev.txt
68+ # pip install -v -e .
69+
70+ # - name: Run vllm-project/vllm-ascend test
71+ # env:
72+ # VLLM_WORKER_MULTIPROC_METHOD: spawn
73+ # VLLM_USE_MODELSCOPE: True
74+ # PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
75+ # if: ${{ inputs.type == 'light' }}
76+ # run: |
77+ # # pytest -sv tests/e2e/singlecard/test_aclgraph.py
78+ # # pytest -sv tests/e2e/singlecard/test_quantization.py
79+ # pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
80+
81+ # - name: Run e2e test
82+ # env:
83+ # VLLM_WORKER_MULTIPROC_METHOD: spawn
84+ # VLLM_USE_MODELSCOPE: True
85+ # PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
86+ # if: ${{ inputs.type == 'full' }}
87+ # run: |
88+ # # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
89+ # # the test separately.
90+
91+ # pytest -sv tests/e2e/singlecard/test_completion_with_prompt_embeds.py
92+ # pytest -sv tests/e2e/singlecard/test_aclgraph.py
93+ # pytest -sv tests/e2e/singlecard/test_aclgraph_mem.py
94+ # pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py
95+ # pytest -sv tests/e2e/singlecard/test_bge_model.py
96+ # pytest -sv tests/e2e/singlecard/test_camem.py
97+ # pytest -sv tests/e2e/singlecard/test_chunked.py
98+ # pytest -sv tests/e2e/singlecard/test_embedding.py
99+ # # pytest -sv tests/e2e/singlecard/test_embedding_aclgraph.py
100+ # pytest -sv tests/e2e/singlecard/test_guided_decoding.py
101+ # # torch 2.8 doesn't work with lora, fix me
102+ # #pytest -sv tests/e2e/singlecard/test_ilama_lora.py
103+ # pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
104+ # pytest -sv tests/e2e/singlecard/test_quantization.py
105+ # pytest -sv tests/e2e/singlecard/test_sampler.py
106+ # pytest -sv tests/e2e/singlecard/test_vlm.py
107+ # pytest -sv tests/e2e/singlecard/multi-modal/test_internvl.py
108+
109+ # # ------------------------------------ v1 spec decode test ------------------------------------ #
110+ # pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
111+ # pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
112+ # # Fix me: test_eagle_correctness OOM error
113+ # pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
114114
115115 e2e-2-cards :
116116 name : multicard-2
@@ -180,9 +180,9 @@ jobs:
180180 VLLM_USE_MODELSCOPE : True
181181 if : ${{ inputs.type == 'full' }}
182182 run : |
183- pytest -sv tests/e2e/multicard/test_aclgraph_capture_replay.py
184- pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
185- pytest -sv tests/e2e/multicard/test_full_graph_mode.py
183+ # pytest -sv tests/e2e/multicard/test_aclgraph_capture_replay.py
184+ # pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
185+ # pytest -sv tests/e2e/multicard/test_full_graph_mode.py
186186 pytest -sv tests/e2e/multicard/test_data_parallel.py
187187 pytest -sv tests/e2e/multicard/test_expert_parallel.py
188188 pytest -sv tests/e2e/multicard/test_external_launcher.py
@@ -207,84 +207,84 @@ jobs:
207207 pytest -sv tests/e2e/multicard/test_prefix_caching.py
208208 pytest -sv tests/e2e/multicard/test_qwen3_moe.py
209209
210- e2e-4-cards :
211- name : multicard-4
212- needs : [e2e, e2e-2-cards]
213- if : ${{ needs.e2e.result == 'success' && needs.e2e-2-cards.result == 'success' && inputs.type == 'full' }}
214- runs-on : linux-aarch64-a3-4
215- container :
216- image : m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
217- env :
218- VLLM_LOGGING_LEVEL : ERROR
219- VLLM_USE_MODELSCOPE : True
220- steps :
221- - name : Check npu and CANN info
222- run : |
223- npu-smi info
224- cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
225-
226- - name : Config mirrors
227- run : |
228- sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
229- pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
230- apt-get update -y
231- apt install git wget curl -y
232- git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
233-
234- - name : Checkout vllm-project/vllm-ascend repo
235- uses : actions/checkout@v6
236- with :
237- path : ./vllm-ascend
238-
239- - name : Install system dependencies
240- run : |
241- apt-get -y install `cat packages.txt`
242- apt-get -y install gcc g++ cmake libnuma-dev
243-
244- - name : Checkout vllm-project/vllm repo
245- uses : actions/checkout@v6
246- with :
247- repository : vllm-project/vllm
248- ref : ${{ inputs.vllm }}
249- path : ./vllm-empty
250-
251- - name : Install vllm-project/vllm from source
252- working-directory : ./vllm-empty
253- run : |
254- VLLM_TARGET_DEVICE=empty pip install -e .
255-
256- - name : Install vllm-project/vllm-ascend
257- working-directory : ./vllm-ascend
258- run : |
259- export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
260- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
261- pip install -r requirements-dev.txt
262- pip install -v -e .
263-
264- - name : Run vllm-project/vllm-ascend test for V1 Engine
265- working-directory : ./vllm-ascend
266- env :
267- VLLM_WORKER_MULTIPROC_METHOD : spawn
268- VLLM_USE_MODELSCOPE : True
269- run : |
270- pytest -sv \
271- tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe \
272- tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
273- # tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP \
274- # tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_W8A8_WITH_EP
275-
276- - name : Install Ascend toolkit & triton_ascend (for Qwen3-Next-80B-A3B-Instruct)
277- shell : bash -l {0}
278- run : |
279- . /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
280- python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev20250914-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl"
281-
282- - name : Run vllm-project/vllm-ascend Qwen3 Next test
283- working-directory : ./vllm-ascend
284- shell : bash -el {0}
285- env :
286- VLLM_WORKER_MULTIPROC_METHOD : spawn
287- VLLM_USE_MODELSCOPE : True
288- run : |
289- . /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
290- pytest -sv tests/e2e/multicard/test_qwen3_next.py
210+ # e2e-4-cards:
211+ # name: multicard-4
212+ # needs: [e2e, e2e-2-cards]
213+ # if: ${{ needs.e2e.result == 'success' && needs.e2e-2-cards.result == 'success' && inputs.type == 'full' }}
214+ # runs-on: linux-aarch64-a3-4
215+ # container:
216+ # image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
217+ # env:
218+ # VLLM_LOGGING_LEVEL: ERROR
219+ # VLLM_USE_MODELSCOPE: True
220+ # steps:
221+ # - name: Check npu and CANN info
222+ # run: |
223+ # npu-smi info
224+ # cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
225+
226+ # - name: Config mirrors
227+ # run: |
228+ # sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
229+ # pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
230+ # apt-get update -y
231+ # apt install git wget curl -y
232+ # git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
233+
234+ # - name: Checkout vllm-project/vllm-ascend repo
235+ # uses: actions/checkout@v6
236+ # with:
237+ # path: ./vllm-ascend
238+
239+ # - name: Install system dependencies
240+ # run: |
241+ # apt-get -y install `cat packages.txt`
242+ # apt-get -y install gcc g++ cmake libnuma-dev
243+
244+ # - name: Checkout vllm-project/vllm repo
245+ # uses: actions/checkout@v6
246+ # with:
247+ # repository: vllm-project/vllm
248+ # ref: ${{ inputs.vllm }}
249+ # path: ./vllm-empty
250+
251+ # - name: Install vllm-project/vllm from source
252+ # working-directory: ./vllm-empty
253+ # run: |
254+ # VLLM_TARGET_DEVICE=empty pip install -e .
255+
256+ # - name: Install vllm-project/vllm-ascend
257+ # working-directory: ./vllm-ascend
258+ # run: |
259+ # export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
260+ # export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
261+ # pip install -r requirements-dev.txt
262+ # pip install -v -e .
263+
264+ # - name: Run vllm-project/vllm-ascend test for V1 Engine
265+ # working-directory: ./vllm-ascend
266+ # env:
267+ # VLLM_WORKER_MULTIPROC_METHOD: spawn
268+ # VLLM_USE_MODELSCOPE: True
269+ # run: |
270+ # pytest -sv \
271+ # tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe \
272+ # tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
273+ # # tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP \
274+ # # tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_W8A8_WITH_EP
275+
276+ # - name: Install Ascend toolkit & triton_ascend (for Qwen3-Next-80B-A3B-Instruct)
277+ # shell: bash -l {0}
278+ # run: |
279+ # . /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
280+ # python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev20250914-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl"
281+
282+ # - name: Run vllm-project/vllm-ascend Qwen3 Next test
283+ # working-directory: ./vllm-ascend
284+ # shell: bash -el {0}
285+ # env:
286+ # VLLM_WORKER_MULTIPROC_METHOD: spawn
287+ # VLLM_USE_MODELSCOPE: True
288+ # run: |
289+ # . /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
290+ # pytest -sv tests/e2e/multicard/test_qwen3_next.py
0 commit comments