From ff76c3190afea368ca39dc6998dfa9a436f59345 Mon Sep 17 00:00:00 2001 From: Siddharth Venkatesan Date: Tue, 4 Mar 2025 15:32:22 -0800 Subject: [PATCH] [ci] disable and delete workflows that are no longer in use --- .github/workflows/benchmark-nightly.yml | 120 ---------- .github/workflows/instant_benchmark.yml | 218 ------------------ .github/workflows/llm_integration_p4d.yml | 2 - .github/workflows/lmi-no-code.yml | 2 - .github/workflows/nightly-docker-ecr-sync.yml | 2 - 5 files changed, 344 deletions(-) delete mode 100644 .github/workflows/benchmark-nightly.yml delete mode 100644 .github/workflows/instant_benchmark.yml diff --git a/.github/workflows/benchmark-nightly.yml b/.github/workflows/benchmark-nightly.yml deleted file mode 100644 index 87178e3c6..000000000 --- a/.github/workflows/benchmark-nightly.yml +++ /dev/null @@ -1,120 +0,0 @@ -name: Benchmark Nightly - -on: - workflow_dispatch: - schedule: - - cron: '0 1 * * *' - -permissions: - id-token: write - contents: read - -jobs: - create-runners: - runs-on: [ self-hosted, scheduler ] - steps: - - name: Create new G6 instance - id: create_gpu - run: | - cd /home/ubuntu/djl_benchmark_script/scripts - token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ - https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ - --fail \ - | jq '.token' | tr -d '"' ) - ./start_instance.sh action_g6 $token djl-serving - outputs: - gpu_instance_id: ${{ steps.create_gpu.outputs.action_g6_instance_id }} - - g5-2xl: - uses: ./.github/workflows/instant_benchmark.yml - secrets: inherit - with: - running_template: ./benchmark/nightly/g5-2xl.txt - instance: g5.2xlarge - record: cloudwatch - g5-12xl: - uses: ./.github/workflows/instant_benchmark.yml - secrets: inherit - with: - running_template: ./benchmark/nightly/g5-12xl.txt - instance: g5.12xlarge - record: cloudwatch - g5-48xl: - uses: ./.github/workflows/instant_benchmark.yml - secrets: inherit - with: - running_template: ./benchmark/nightly/g5-48xl.txt - instance: g5.48xlarge - record: cloudwatch - - handler-performance-test: - runs-on: [ self-hosted, g6 ] - timeout-minutes: 60 - needs: create-runners - strategy: - fail-fast: false - matrix: - test: - - test: TestGPUHandlerPerformance - instance: g6 - steps: - - uses: actions/checkout@v4 - - name: Clean env - run: | - yes | docker system prune -a --volumes - sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ - echo "wait dpkg lock..." - while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done - - name: Set up Python3 - uses: actions/setup-python@v5 - with: - python-version: '3.10.x' - - name: Install pip dependencies - run: pip3 install pytest requests "numpy<2" pillow huggingface_hub - - name: Install torch - # Use torch to get cuda capability of current device to selectively run tests - # Torch version doesn't really matter that much - run: | - pip3 install torch==2.3.0 - - name: Install awscurl - working-directory: tests/integration - run: | - wget https://publish.djl.ai/awscurl/awscurl - chmod +x awscurl - mkdir outputs - - name: Test - working-directory: tests/integration - env: - TEST_DJL_VERSION: nightly - run: | - python -m pytest -k ${{ matrix.test.test }} tests.py - - name: Cleanup - working-directory: tests/integration - run: | - rm -rf outputs - rm awscurl - - name: On Failure - if: ${{ failure() }} - working-directory: tests/integration - run: | - for file in outputs/*; do if [ -f "$file" ]; then echo "Contents of $file:"; cat "$file"; echo; fi; done - sudo rm -rf outputs && sudo rm -rf models - rm awscurl - ./remove_container.sh - - name: Upload test logs - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: test-${{ matrix.test.test }}-logs - path: tests/integration/all_logs/ - - stop-g6-runners: - if: always() - runs-on: [ self-hosted, scheduler ] - needs: [ create-runners, handler-performance-test ] - steps: - - name: Stop g6 instances - run: | - cd /home/ubuntu/djl_benchmark_script/scripts - instance_id=${{ needs.create-runners.outputs.gpu_instance_id }} - ./stop_instance.sh $instance_id diff --git a/.github/workflows/instant_benchmark.yml b/.github/workflows/instant_benchmark.yml deleted file mode 100644 index 7b96cdd09..000000000 --- a/.github/workflows/instant_benchmark.yml +++ /dev/null @@ -1,218 +0,0 @@ -name: instant benchmark tooling - -on: - workflow_dispatch: - inputs: - running_template: - description: 'A json file that contains benchmark plans' - required: true - instance: - description: 'Instance used for benchmark' - required: true - default: 'g5.12xlarge' - type: choice - options: - - g5.2xlarge - - g5.12xlarge - - g5.48xlarge - - g6.2xlarge - - g6.12xlarge - - g6.48xlarge - - g4dn.12xlarge - - g4dn.2xlarge - - p4d.24xlarge - - p4de.24xlarge - - p5.24xlarge - - inf2.8xlarge - - inf2.24xlarge - - trn1.2xlarge - - trn1.32xlarge - container: - description: 'The container used to run benchmark (overrides the template). Should be a full docker path such as deepjavalibrary/djl-serving:0.28.0-lmi' - required: false - default: '' - record: - description: 'Whether to record the results' - default: 'none' - type: choice - options: - - none - - table - - cloudwatch - repo: - description: '[Do not change] The repo for runner registration' - required: false - type: string - default: 'djl-serving' - workflow_call: - inputs: - running_template: - description: 'A json file that contains benchmark plans' - required: true - type: string - instance: - description: 'Instance used for benchmark' - required: true - type: string - container: - description: 'The container used to run benchmark (overrides the template). Should be a full docker path such as deepjavalibrary/djl-serving:0.27.0-deepspeed' - required: false - type: string - default: '' - record: - description: 'Whether to record the results' - required: false - type: string - default: 'none' - repo: - description: 'The repo for runner registration' - required: false - type: string - default: 'djl-serving' - -permissions: - id-token: write - contents: read - -jobs: - create-runners: - runs-on: [self-hosted, scheduler] - steps: - - name: Create new instance - id: create_instance - run: | - cd /home/ubuntu/djl_benchmark_script/scripts - token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ - https://api.github.com/repos/deepjavalibrary/${{ inputs.repo }}/actions/runners/registration-token \ - --fail \ - | jq '.token' | tr -d '"' ) - ./start_instance.sh action_ib_${{ inputs.instance }} $token ${{ inputs.repo }} - outputs: - gpu_instance_id: ${{ steps.create_instance.outputs.action_ib_instance_id }} - - environment-setup: - runs-on: [ self-hosted, "${{ inputs.instance }}" ] - timeout-minutes: 15 - needs: [ create-runners ] - steps: - - uses: actions/checkout@v4 - if: ${{ inputs.repo == 'djl-serving' }} - - name: Setup DJLServing for other repo - uses: actions/checkout@v4 - if: ${{ inputs.repo != 'djl-serving' }} - with: - repository: deepjavalibrary/djl-serving - ref: master - - name: Setup for other repo - uses: actions/checkout@v4 - if: ${{ inputs.repo != 'djl-serving' }} - with: - path: ${{ inputs.repo }} - - name: Set up Python3 - uses: actions/setup-python@v5 - with: - python-version: '3.10.x' - - name: Clean env - run: | - yes | docker system prune -a --volumes - - name: install deps - run: | - pip3 install boto3 awscli - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving - aws-region: us-east-1 - - name: Parse job schema - working-directory: tests/integration - id: generate_matrix - run: | - python3 instant_benchmark.py --parse ${{ inputs.running_template }} \ - --container "${{ inputs.container }}" - - uses: actions/upload-artifact@v4 - with: - name: template-${{ inputs.instance }} - path: tests/integration/template_tmp.json - outputs: - jobs: ${{ steps.generate_matrix.outputs.jobs }} - - benchmark_run: - runs-on: [ self-hosted, "${{ inputs.instance }}" ] - timeout-minutes: 120 - needs: [ environment-setup ] - strategy: - matrix: - job: ${{ fromJSON(needs.environment-setup.outputs.jobs) }} - steps: - - uses: actions/checkout@v4 - if: ${{ inputs.repo == 'djl-serving' }} - - name: Setup DJLServing for other repo - uses: actions/checkout@v4 - if: ${{ inputs.repo != 'djl-serving' }} - with: - repository: deepjavalibrary/djl-serving - ref: master - - name: Setup for other repo - uses: actions/checkout@v4 - if: ${{ inputs.repo != 'djl-serving' }} - with: - path: ${{ inputs.repo }} - - name: Set up Python3 - uses: actions/setup-python@v5 - with: - python-version: '3.10.x' - - name: install deps - run: | - pip3 install boto3 awscli - - name: Setup awscurl - working-directory: tests/integration - run: | - wget https://publish.djl.ai/awscurl/awscurl - chmod +x awscurl - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving - aws-region: us-east-1 - role-duration-seconds: 7200 # 2 hour - - name: Login to Amazon ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v2 - with: - registries: "125045733377,875423407011" - - name: Download template - uses: actions/download-artifact@v4 - with: - path: tests/integration - - name: Run benchmark job - working-directory: tests/integration - run: | - cp template-${{ inputs.instance }}/template_tmp.json template.json - python3 instant_benchmark.py --template template.json \ - --job ${{ matrix.job }} --instance ${{ inputs.instance }} \ - --record ${{ inputs.record }} - - bash instant_benchmark.sh - - name: Get serving logs - if: always() - working-directory: tests/integration - run: | - ./remove_container.sh || true - cat logs/serving.log || true - - name: Upload test artifacts - uses: actions/upload-artifact@v4 - if: always() - with: - name: ${{ matrix.job }} - path: tests/integration - - stop-runners: - if: always() - runs-on: [ self-hosted, scheduler ] - needs: [ create-runners, environment-setup, benchmark_run ] - steps: - - name: Stop instances - run: | - cd /home/ubuntu/djl_benchmark_script/scripts - instance_id=${{ needs.create-runners.outputs.gpu_instance_id }} - ./stop_instance.sh $instance_id diff --git a/.github/workflows/llm_integration_p4d.yml b/.github/workflows/llm_integration_p4d.yml index 23ff5e672..420b10bbf 100644 --- a/.github/workflows/llm_integration_p4d.yml +++ b/.github/workflows/llm_integration_p4d.yml @@ -11,8 +11,6 @@ on: description: 'Run only the tests you need [aiccl]' required: false default: '' - schedule: - - cron: '0 15 * * *' jobs: diff --git a/.github/workflows/lmi-no-code.yml b/.github/workflows/lmi-no-code.yml index 1ef756abf..5628f9df4 100644 --- a/.github/workflows/lmi-no-code.yml +++ b/.github/workflows/lmi-no-code.yml @@ -7,8 +7,6 @@ on: description: 'The released version of DJL' required: false default: '' - schedule: - - cron: '0 8 * * *' jobs: create-runners: diff --git a/.github/workflows/nightly-docker-ecr-sync.yml b/.github/workflows/nightly-docker-ecr-sync.yml index ad2ad7a8b..f38939517 100644 --- a/.github/workflows/nightly-docker-ecr-sync.yml +++ b/.github/workflows/nightly-docker-ecr-sync.yml @@ -7,8 +7,6 @@ on: description: 'version string like 0.27.0, default is nightly' required: true default: 'nightly' - schedule: - - cron: '0 14 * * *' jobs: create-aarch64-runner: