evaleval · Erotemic · Mar 26, 2026 · Mar 26, 2026
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -1,12 +1,13 @@
-name: Create GitHub Release
+name: Publish release
 
 on:
   push:
     tags:
-      - 'v*.*.*'
+      - "v*.*.*"
 
 jobs:
-  create-release:
+  build:
+    name: Build distributions and create GitHub release
     runs-on: ubuntu-latest
     permissions:
       contents: write
@@ -21,6 +22,12 @@ jobs:
       - name: Build package
         run: uv build
 
+      - name: Upload distribution artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: python-package-distributions
+          path: dist/
+
       - name: Create GitHub Release
         uses: softprops/action-gh-release@v2
         with:
@@ -30,3 +37,23 @@ jobs:
             dist/*.tar.gz
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+  publish-to-pypi:
+    name: Publish to PyPI
+    needs: build
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/every-eval-ever
+    permissions:
+      id-token: write
+
+    steps:
+      - name: Download distribution artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: python-package-distributions
+          path: dist/
+
+      - name: Publish distributions to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/README.md b/README.md
@@ -1,5 +1,8 @@
 # Every Eval Ever
 
+[![PyPI version](https://img.shields.io/pypi/v/every-eval-ever.svg)](https://pypi.org/project/every-eval-ever/)
+[![PyPI downloads](https://img.shields.io/pypi/dm/every-eval-ever.svg)](https://pypistats.org/packages/every-eval-ever)
+
 > [EvalEval Coalition](https://evalevalai.com) — "We are a researcher community developing scientifically grounded research outputs and robust deployment infrastructure for broader impact evaluations."
 
 **Every Eval Ever** is a shared schema and crowdsourced eval database. It defines a standardized metadata format for storing AI evaluation results — from leaderboard scrapes and research papers to local evaluation runs — so that results from different frameworks can be compared, reproduced, and reused. The three components that make it work: