-
Notifications
You must be signed in to change notification settings - Fork 47
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #466 from instructlab/port_old_tests
Add simple and full knowledge pipeline functional tests
- Loading branch information
Showing
10 changed files
with
346 additions
and
62 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
name: Functional GPU (NVIDIA Tesla T4 x1) | ||
|
||
on: | ||
# run against every merge commit to 'main' and release branches | ||
push: | ||
branches: | ||
- main | ||
- release-* | ||
# only run on PRs that touch certain regex paths | ||
pull_request_target: | ||
branches: | ||
- main | ||
- release-* | ||
paths: | ||
# note this should match the merging criteria in 'mergify.yml' | ||
- "**.py" | ||
- "pyproject.toml" | ||
- "requirements**.txt" | ||
- 'tox.ini' | ||
- ".github/workflows/functional-gpu-nvidia-t4-x1.yml" # This workflow | ||
|
||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} | ||
cancel-in-progress: true | ||
|
||
env: | ||
LC_ALL: en_US.UTF-8 | ||
|
||
defaults: | ||
run: | ||
shell: bash | ||
|
||
permissions: | ||
contents: read | ||
|
||
jobs: | ||
start-small-ec2-runner: | ||
runs-on: ubuntu-latest | ||
outputs: | ||
label: ${{ steps.start-ec2-runner.outputs.label }} | ||
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} | ||
steps: | ||
- name: Configure AWS credentials | ||
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2 | ||
with: | ||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | ||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | ||
aws-region: ${{ vars.AWS_REGION }} | ||
|
||
- name: Start EC2 runner | ||
id: start-ec2-runner | ||
uses: machulav/ec2-github-runner@1827d6ca7544d7044ddbd2e9360564651b463da2 # v2.3.7 | ||
with: | ||
mode: start | ||
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} | ||
ec2-image-id: ${{ vars.AWS_EC2_AMI }} | ||
ec2-instance-type: g4dn.2xlarge | ||
subnet-id: subnet-02d230cffd9385bd4 | ||
security-group-id: sg-06300447c4a5fbef3 | ||
iam-role-name: instructlab-ci-runner | ||
aws-resource-tags: > | ||
[ | ||
{"Key": "Name", "Value": "instructlab-ci-github-small-runner"}, | ||
{"Key": "GitHubRepository", "Value": "${{ github.repository }}"}, | ||
{"Key": "GitHubRef", "Value": "${{ github.ref }}"}, | ||
{"Key": "GitHubPR", "Value": "${{ github.event.number }}"} | ||
] | ||
functional-gpu-small-test: | ||
needs: | ||
- start-small-ec2-runner | ||
runs-on: ${{ needs.start-small-ec2-runner.outputs.label }} | ||
|
||
# It is important that this job has no write permissions and has | ||
# no access to any secrets. This part is where we are running | ||
# untrusted code from PRs. | ||
permissions: {} | ||
|
||
steps: | ||
- name: Install Packages | ||
run: | | ||
cat /etc/os-release | ||
sudo dnf install -y gcc gcc-c++ make git python3.11 python3.11-devel | ||
- name: Checkout instructlab/sdg | ||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | ||
with: | ||
# https://github.com/actions/checkout/issues/249 | ||
fetch-depth: 0 | ||
|
||
- name: Fetch and checkout PR | ||
if: github.event_name == 'pull_request_target' | ||
run: | | ||
git fetch origin pull/${{ github.event.pull_request.number }}/merge:pr-merge-${{ github.event.pull_request.number }} | ||
git checkout pr-merge-${{ github.event.pull_request.number }} | ||
git log -1 --format="%H %s" | ||
- name: Install instructlab/sdg | ||
run: | | ||
export PATH="/home/ec2-user/.local/bin:/usr/local/cuda/bin:$PATH" | ||
python3.11 -m venv --upgrade-deps venv | ||
. venv/bin/activate | ||
nvidia-smi | ||
python3.11 -m pip install tox tox-gh>=1.2 | ||
python3.11 -m pip cache remove llama_cpp_python | ||
CMAKE_ARGS="-DLLAMA_CUDA=on" python3.11 -m pip install -r requirements-dev.txt | ||
- name: Check disk before tests | ||
run: | | ||
df -h | ||
- name: Run functional gpu tests with tox | ||
run: | | ||
. venv/bin/activate | ||
tox -e py3-functional-gpu | ||
- name: Check disk after tests | ||
run: | | ||
df -h | ||
stop-small-ec2-runner: | ||
needs: | ||
- start-small-ec2-runner | ||
- functional-gpu-small-test | ||
runs-on: ubuntu-latest | ||
if: ${{ always() }} | ||
steps: | ||
- name: Configure AWS credentials | ||
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2 | ||
with: | ||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | ||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | ||
aws-region: ${{ vars.AWS_REGION }} | ||
|
||
- name: Stop EC2 runner | ||
uses: machulav/ec2-github-runner@1827d6ca7544d7044ddbd2e9360564651b463da2 # v2.3.7 | ||
with: | ||
mode: stop | ||
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} | ||
label: ${{ needs.start-small-ec2-runner.outputs.label }} | ||
ec2-instance-id: ${{ needs.start-small-ec2-runner.outputs.ec2-instance-id }} | ||
|
||
functional-gpu-small-workflow-complete: | ||
# we don't want to block PRs on failed EC2 cleanup | ||
# so not requiring "stop-small-ec2-runner" as well | ||
needs: ["start-small-ec2-runner", "functional-gpu-small-test"] | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Functional GPU Workflow Complete | ||
run: echo "Functional GPU Workflow Complete" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# Standard | ||
from importlib import resources | ||
import pathlib | ||
import typing | ||
|
||
# Third Party | ||
from llama_cpp.server.app import create_app | ||
from llama_cpp.server.settings import ModelSettings, ServerSettings | ||
from openai import OpenAI | ||
from starlette.testclient import TestClient | ||
|
||
|
||
def llama_cpp_openai_client(model, model_repo_id): | ||
server_settings = ServerSettings() | ||
model_settings = [ | ||
ModelSettings( | ||
model=model, | ||
hf_model_repo_id=model_repo_id, | ||
verbose=True, | ||
) | ||
] | ||
app = create_app( | ||
server_settings=server_settings, | ||
model_settings=model_settings, | ||
) | ||
|
||
@app.get("/") | ||
def read_root(): | ||
return {"message": "Hello from InstructLab! Visit us at https://instructlab.ai"} | ||
|
||
test_client = TestClient(app) | ||
return OpenAI( | ||
api_key="EMPTY", | ||
base_url="http://localhost:8000/v1", | ||
http_client=test_client, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
# Standard | ||
from importlib import resources | ||
import unittest | ||
|
||
# Third Party | ||
import pytest | ||
|
||
# First Party | ||
from src.instructlab.sdg.datamixing import _get_question_hack, _get_response_hack | ||
from src.instructlab.sdg.pipeline import ( | ||
FULL_PIPELINES_PACKAGE, | ||
Pipeline, | ||
PipelineContext, | ||
) | ||
|
||
# Local | ||
from .llama_cpp_helpers import llama_cpp_openai_client | ||
|
||
|
||
@pytest.mark.gpu | ||
class TestFullPipeline(unittest.TestCase): | ||
@pytest.fixture(autouse=True) | ||
def _setup_fixtures(self, tonsils_knowledge_dataset): | ||
model = "mistral-7b-instruct-v0.2.Q5_K_M.gguf" | ||
model_repo_id = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF" | ||
model_family = "mixtral" | ||
client = llama_cpp_openai_client(model, model_repo_id) | ||
teacher_model = client.models.list().data[0].id | ||
num_instructions_to_generate = 2 | ||
max_num_tokens = 1024 | ||
context = PipelineContext( | ||
client=client, | ||
model_family=model_family, | ||
model_id=teacher_model, | ||
num_instructions_to_generate=num_instructions_to_generate, | ||
max_num_tokens=max_num_tokens, | ||
) | ||
yaml_path = resources.files(FULL_PIPELINES_PACKAGE).joinpath("knowledge.yaml") | ||
self.knowledge_dataset = tonsils_knowledge_dataset | ||
self.knowledge_pipeline = Pipeline.from_file(context, yaml_path) | ||
|
||
def test_knowledge(self): | ||
samples = self.knowledge_pipeline.generate(self.knowledge_dataset) | ||
assert len(samples) > 0 | ||
assert "question" in samples.column_names | ||
assert "response" in samples.column_names | ||
for sample in samples: | ||
question = _get_question_hack(sample) | ||
response = _get_response_hack(sample) | ||
assert len(question) > 0 | ||
assert len(response) > 0 |
Oops, something went wrong.