ci: integrate vLLM inference tests with GitHub Actions workflows

derekhiggins · derekhiggins · commit 5468bab03e2d · 2025-10-09T12:11:38.000+01:00
Add vLLM provider support to integration test CI workflows alongside
existing Ollama support. Configure provider-specific test execution
where vLLM runs only inference specific tests (excluding vision tests) while
Ollama continues to run the full test suite.

This enables comprehensive CI testing of both inference providers but
keeps the vLLM footprint small, this can be expanded later if it proves
to not be too disruptive.

Also updated test skips that were marked with "inline::vllm", this
should be "remote::vllm". This causes some failing log probs tests
to be skipped and should be revisted.

Signed-off-by: Derek Higgins &lt;derekh@redhat.com&gt;
diff --git a/.github/actions/run-and-record-tests/action.yml b/.github/actions/run-and-record-tests/action.yml
@@ -68,7 +68,8 @@ runs:
           echo "New recordings detected, committing and pushing"
           git add tests/integration/recordings/
 
-          git commit -m "Recordings update from CI (suite: ${{ inputs.suite }})"
+          git commit -m "Recordings update from CI (setup: ${{ inputs.setup }}, suite: ${{ inputs.suite }})"
+
           git fetch origin ${{ github.ref_name }}
           git rebase origin/${{ github.ref_name }}
           echo "Rebased successfully"
@@ -82,7 +83,8 @@ runs:
       if: ${{ always() }}
       shell: bash
       run: |
-        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
+        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true
+        sudo docker logs vllm > vllm-${{ inputs.inference-mode }}.log 2>&1 || true
 
     - name: Upload logs
       if: ${{ always() }}
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
@@ -21,7 +21,6 @@ on:
   schedule:
     # If changing the cron schedule, update the provider in the test-matrix job
     - cron: '0 0 * * *'  # (test latest client) Daily at 12 AM UTC
-    - cron: '1 0 * * 0'  # (test vllm) Weekly on Sunday at 1 AM UTC
   workflow_dispatch:
     inputs:
       test-all-client-versions:
@@ -57,11 +56,9 @@ jobs:
         # Default (including test-setup=ollama): both ollama+base and ollama-vision+vision
         config: >-
           ${{
-            github.event.schedule == '1 0 * * 0'
-              && fromJSON('[{"setup": "vllm", "suite": "base"}]')
-            || github.event.inputs.test-setup == 'ollama-vision'
+            github.event.inputs.test-setup == 'ollama-vision'
               && fromJSON('[{"setup": "ollama-vision", "suite": "vision"}]')
-            || fromJSON('[{"setup": "ollama", "suite": "base"}, {"setup": "ollama-vision", "suite": "vision"}]')
+            || fromJSON('[{"setup": "ollama", "suite": "base"}, {"setup": "ollama-vision", "suite": "vision"}, {"setup": "vllm", "suite": "base-vllm-subset"}]')
           }}
 
     steps:
diff --git a/tests/integration/inference/test_openai_completion.py b/tests/integration/inference/test_openai_completion.py
@@ -39,7 +39,7 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
     if provider.provider_type in (
         "inline::meta-reference",
         "inline::sentence-transformers",
-        "inline::vllm",
+        "remote::vllm",
         "remote::bedrock",
         "remote::databricks",
         # Technically Nvidia does support OpenAI completions, but none of their hosted models
@@ -119,7 +119,7 @@ def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, mode
     if provider.provider_type in (
         "inline::meta-reference",
         "inline::sentence-transformers",
-        "inline::vllm",
+        "remote::vllm",
         "remote::bedrock",
         "remote::databricks",
         "remote::cerebras",
diff --git a/tests/integration/suites.py b/tests/integration/suites.py
@@ -168,6 +168,11 @@ class Setup(BaseModel):
         roots=base_roots,
         default_setup="ollama",
     ),
+    "base-vllm-subset": Suite(
+        name="base-vllm-subset",
+        roots=["tests/integration/inference"],
+        default_setup="vllm",
+    ),
     "responses": Suite(
         name="responses",
         roots=["tests/integration/responses"],