Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 35 additions & 12 deletions .github/workflows/docker_build.yaml
Original file line number Diff line number Diff line change
@@ -1,58 +1,81 @@
# This is boilerplate for publishing a Docker image via Github Actions.

name: Docker
name: Docker workflows

on:
workflow_dispatch:
workflow_call:

push:
branches: ["main"]
# Publish semver tags as releases.
tags:
- "v[0-9]+.[0-9]+.[0-9]+"
- "[0-9]+.[0-9]+.[0-9]+"
- "[0-9]+.[0-9]+.[0-9]+-*"
pull_request:
types:
- opened
- reopened
- synchronize
- ready_for_review
release:
types: [published]

env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
TEST_TAG: test_container

jobs:
build:
build_test_push:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write

steps:
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
uses: docker/login-action@v4
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Checkout repository
uses: actions/checkout@v4
- name: Checkout
uses: actions/checkout@v6

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@v5
id: meta
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}

- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@v5
- name: Build test Docker image
uses: docker/build-push-action@v6
id: build-test-image
with:
context: .
load: true
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.TEST_TAG }}
cache-from: type=gha
cache-to: type=gha,mode=max

- name: Run non-spark tests on the built image
id: run-tests-docker
continue-on-error: true
run: |
docker run --rm ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.TEST_TAG }} test

- name: Build and push
uses: docker/build-push-action@v6
id: build-push-image
if: ${{ github.event_name == 'release' || github.event_name == 'push' }}
with:
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/amd64,linux/arm64/v8
cache-from: type=gha
cache-to: type=gha,mode=max
10 changes: 5 additions & 5 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v5
uses: actions/checkout@v6

- name: Install uv
uses: astral-sh/setup-uv@v7.1.2
Expand All @@ -31,7 +31,6 @@ jobs:

- name: Run code formatting checks
shell: bash
continue-on-error: false
run: uv run ruff format --check

code_linting:
Expand All @@ -40,7 +39,7 @@ jobs:
needs: code_format
steps:
- name: Checkout
uses: actions/checkout@v5
uses: actions/checkout@v6

- name: Install uv
uses: astral-sh/setup-uv@v7.1.2
Expand All @@ -51,6 +50,7 @@ jobs:
run: uv sync

- name: Run code linting checks
continue-on-error: true
run: uv run ruff check --output-format=github .

spark_tests:
Expand All @@ -60,7 +60,7 @@ jobs:

steps:
- name: Checkout
uses: actions/checkout@v5
uses: actions/checkout@v6

- name: Run spark tests
shell: bash
Expand Down Expand Up @@ -92,7 +92,7 @@ jobs:

steps:
- name: Checkout
uses: actions/checkout@v5
uses: actions/checkout@v6

- name: Install uv
uses: astral-sh/setup-uv@v7.1.2
Expand Down
17 changes: 9 additions & 8 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,19 @@ ENV DEBIAN_FRONTEND=noninteractive
# add tini
RUN apt-get update -y && apt-get install -y --no-install-recommends tini git

# Setup a non-root user
RUN groupadd --system --gid 999 nonroot \
&& useradd --system --gid 999 --uid 999 --create-home nonroot

# Enable bytecode compilation
ENV UV_COMPILE_BYTECODE=1

# Copy from the cache instead of linking since it's a mounted volume
ENV UV_LINK_MODE=copy

# Omit development dependencies
ENV UV_NO_DEV=1
# ENV UV_NO_DEV=1
ENV UV_NO_SYNC=1

# Ensure installed tools can be executed out of the box
ENV UV_TOOL_BIN_DIR=/usr/local/bin
Expand All @@ -31,21 +36,17 @@ WORKDIR /app
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
uv sync --locked --no-install-project
uv sync --locked --no-install-project --no-editable

# Then, add the rest of the project source code and install it
# Installing separately from its dependencies allows optimal layer caching
COPY . /app
COPY --chown=nonroot:nonroot . /app
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --locked
uv sync --locked --no-editable

# Place executables in the environment at the front of the path
ENV PATH="/app/.venv/bin:$PATH"

# Setup a non-root user
RUN groupadd --system --gid 999 nonroot \
&& useradd --system --gid 999 --uid 999 --create-home nonroot

COPY --chmod=+x ./scripts/entrypoint.sh /app/
# Use the non-root user to run our application
USER nonroot
Expand Down
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ authors = [

dependencies = [
"bioregistry>=0.13.20",
"boto3[crt]>=1.42.55",
"click>=8.3.1",
"defusedxml>=0.7.1",
"delta-spark>=4.1.0",
"dlt[deltalake,filesystem,parquet]>=1.22.2",
"lxml>=6.0.2",
"pydantic>=2.12.5",
Expand All @@ -25,7 +28,7 @@ uniref_pipeline = "cdm_data_loader_utils.pipelines.uniref_pipeline:cli"

[dependency-groups]
dev = [
"berdl-notebook-utils>=0.0.1",
"berdl-notebook-utils",
"biopython>=1.86",
"pytest>=9.0.2",
"pytest-asyncio>=1.3.0",
Expand Down
13 changes: 10 additions & 3 deletions scripts/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ set -euo pipefail

# Ensure at least one argument is provided
if [ "$#" -eq 0 ]; then
echo "Usage: $0 {uniref|uniprot} [args...]"
echo "Usage: $0 {uniref|uniprot|test} [args...]"
exit 1
fi

Expand All @@ -13,11 +13,18 @@ shift
case "$cmd" in
uniref)
# Run the uniref pipeline with any additional arguments via tini
exec /usr/bin/tini -- uv run uniref_pipeline "$@"
exec /usr/bin/tini -- uv run --no-sync uniref_pipeline "$@"
;;
uniprot)
# Run the uniprot pipeline with any additional arguments via tini
exec /usr/bin/tini -- uv run uniprot_pipeline "$@"
exec /usr/bin/tini -- uv run --no-sync uniprot_pipeline "$@"
;;
test)
# run the tests
exec /usr/bin/tini -- uv run --no-sync pytest -m "not requires_spark"
;;
bash)
exec /usr/bin/tini -- /bin/bash
;;
*)
echo "Error: unknown command '$cmd'; valid commands are 'uniref' or 'uniprot'." >&2
Expand Down
Loading
Loading