Skip to content

Update data

Update data #27

Workflow file for this run

name: DVC Workflow
on:
pull_request:
branches:
- main
paths:
- '**/*.dvc'
- '.dvc/**'
jobs:
format:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: 3.11
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install dvc[gdrive]
- name: Authenticate with Google Cloud
uses: google-github-actions/auth@v2
with:
credentials_json: ${{ secrets.GCP_SA_KEY }}
- name: Prepare DVC Authentication
run: |
mv $(find $GITHUB_WORKSPACE -name "gha-creds-*.json") /home/runner/work/my_project/credentials.json
- name: Pull data from DVC
run: |
dvc pull --no-run-cache -v
- name: Debug paths
run: |
pwd
ls -R
- name: Check data statistics
run: |
python src/my_project/data.py > data_statistics.md
echo '![](./mnist_images.png "MNIST images")' >> data_statistics.md
echo '![](./train_label_distribution.png "Train label distribution")' >> data_statistics.md
echo '![](./test_label_distribution.png "Test label distribution")' >> data_statistics.md
- name: Setup CML
uses: iterative/setup-cml@v2
- name: Comment on PR
env:
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
cml comment create data_statistics.md --watermark-title="Data Checker"