add package manager option #75

daviddkkim · 2025-07-07T19:47:47Z

Should we define a default? it looks like if package_manager is "", it goes into an empty case statement

yeah it's a bit odd, but it's for backward compatibility. I could try without the default '', but ultimately in the code it will fall back to '' due to the zod parsing

ahh gotchu! makes sense

daviddkkim · 2025-07-07T19:48:28Z

Wondering if default should be defined and this should be removed -- what happens in this empty case?

yeah for backward compatibility it'll be whatever we were doing before. so for node it'll be npx and for python it'll be pip. is that what you had in mind? right now it's just a switch case aliased to npm

yeet i forgot about the case statement behavior on no returns -- this makes sense

-Original file line number
+Diff line change
@@ -0,0 +1,50 @@
+    name: Run Python evals
+    on:
+      push:
+        # files:
+        #   - 'test-eval/**'
+    permissions:
+      pull-requests: write
+      contents: read
+    jobs:
+      eval:
+        name: Run Python evals
+        runs-on: ubuntu-latest
+        steps:
+          - name: Checkout
+            id: checkout
+            uses: actions/checkout@v4
+            with:
+              fetch-depth: 0
+              submodules: "recursive"
+          - name: Install uv
+            uses: astral-sh/setup-uv@v5
+          - name: Set up Python
+            uses: actions/setup-python@v4
+            with:
+              python-version: "3.12" # TODO: Matrix test different versions
+          - name: Install dependencies
+            run: |
+              cd test-eval-py
+              uv lock --check
+              uv sync --no-dev
+          - name: Run Evals
+            uses: ./
+            with:
+              api_key: ${{ secrets.BRAINTRUST_API_KEY }}
+              root: test-eval-py
+              runtime: python
+              package_manager: uv
+          # - name: Start terminal session
+          #   uses: mxschmitt/action-tmate@v3
+          #   with:
+          #     limit-access-to-actor: true

-Original file line number
+Diff line change
@@ Expand Up / @@ -22,11 +22,13 @@ You can configure the following variables: @@
     - `paths`: Specific paths, relative to the root, containing evals you'd like to
       run.
     - `runtime`: Either `node` or `python`
+    - `package_manager`: Either `npm`, `pnpm`, or `yarn` for a `node` runtime, or
+      `pip` or `uv` for a `python` runtime.
     - `use_proxy`: Either `true` or `false`. If set, `OPENAI_BASE_URL` will be set
       to `https://braintrustproxy.com/v1`, which will automatically cache repetitive
       LLM calls and run your evals faster. Defaults to `true`.
-    - `terminate_on_failure`: Either `true` or `false`. If set to `true`, the evaluation
-      process will stop when an error occurs. Defaults to `false`.
+    - `terminate_on_failure`: Either `true` or `false`. If set to `true`, the
+      evaluation process will stop when an error occurs. Defaults to `false`.
     ## Full example
@@ Expand Down Expand Up / @@ -82,9 +84,10 @@ jobs: @@
     To see examples of fully configured templates, see the `examples` directory:
-    - [`node with npm`](examples/npm.yml)
-    - [`node with pnpm`](examples/pnpm.yml)
-    - [`python`](examples/python.yml)
+    - [`node with npm`](examples/node/npm.yml)
+    - [`node with pnpm`](examples/node/pnpm.yml)
+    - [`python with pip`](examples/python/pip.yml)
+    - [`python with uv`](examples/python/uv.yml)
     ## How it works
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -23,6 +23,12 @@ inputs: @@
       runtime:
         description: "The runtime to use for evals. Valid values: node, python."
         required: true
+      package_manager:
+        description:
+          "The package manager to use for evals. Valid values: npm, pnpm, yarn, pip,
+          or uv depending on the runtime."
+        required: false
+        default: ""
       use_proxy:
         description:
           "Whether to use the Braintrust proxy (to cache LLM calls). Set to 'true'
@@ Expand All / @@ -31,8 +37,8 @@ inputs: @@
         default: "true"
       terminate_on_failure:
         description:
-          "Whether to terminate the evaluation process when an error occurs. Set to 'true'
-          or 'false'."
+          "Whether to terminate the evaluation process when an error occurs. Set to
+          'true' or 'false'."
         required: false
         default: "false"
       github_token:
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -17,6 +17,7 @@ function snakeToCamelCase(str: string) { @@
     }
     async function runCommand(command: string, onSummary: OnSummaryFn) {
+      core.info(`> $ ${command}`);
       return new Promise((resolve, reject) => {
         const process = execSync(command);
@@ Expand Down Expand Up @@
       // Change working directory
       process.chdir(path.resolve(root));
-      let command: string;
       const terminateFlag = terminate_on_failure ? "--terminate-on-failure" : "";
-      switch (args.runtime) {
-        case "node":
-          command = `npx braintrust eval --jsonl ${terminateFlag} ${paths}`;
-          break;
-        case "python":
-          command = `braintrust eval --jsonl ${terminateFlag} ${paths}`;
-          break;
-        default:
-          throw new Error(`Unsupported runtime: ${args.runtime}`);
-      }
+      const baseCommand = (() => {
+        switch (args.runtime.toLowerCase().trim()) {
+          case "node":
+            switch (args.package_manager) {
+              case "":
+              case "npm":
+                return "npx braintrust";
+              case "pnpm":
+                return "pnpm dlx braintrust";
+              default:
+                throw new Error(
+                  `Unsupported package manager: ${args.package_manager}`,
+                );
+            }
+          case "python":
+            switch ((args.package_manager || "").toLowerCase().trim()) {
+              case "":
+              case "pip":
+                return `braintrust`;
+              case "uv":
+                return `uv run braintrust`;
+              default:
+                throw new Error(
+                  `Unsupported package manager: ${args.package_manager}`,
+                );
+            }
+          default:
+            throw new Error(`Unsupported runtime: ${args.runtime}`);
+        }
+      })();
+      const command = `${baseCommand} eval --jsonl ${terminateFlag} ${paths}`;
       await runCommand(command, onSummary);
     }

-Original file line number
+Diff line change
@@ Expand Up / @@ -6,23 +6,49 @@ import { ExperimentSummary } from "braintrust"; @@
     import { capitalize } from "@braintrust/core";
     import { z } from "zod";
-    const paramsSchema = z.strictObject({
-      api_key: z.string(),
-      root: z.string(),
-      paths: z.string(),
-      runtime: z.enum(["node", "python"]),
-      use_proxy: z
-        .string()
-        .toLowerCase()
-        .transform(x => JSON.parse(x))
-        .pipe(z.boolean()),
-      terminate_on_failure: z
-        .string()
-        .toLowerCase()
-        .transform(x => JSON.parse(x))
-        .pipe(z.boolean())
-        .default("false"),
-    });
+    const nodeManagers = ["npm", "pnpm"];
+    const pythonManagers = ["pip", "uv"];
+    const paramsSchema = z
+      .strictObject({
+        api_key: z.string(),
+        root: z.string(),
+        paths: z.string(),
+        runtime: z.enum(["node", "python"]),
+        package_manager: z
+          .enum(["", ...nodeManagers, ...pythonManagers])
+          .describe("The preferred package manager for the runtime selected")
+          .default(""),
+        use_proxy: z
+          .string()
+          .toLowerCase()
+          .transform(x => JSON.parse(x))
+          .pipe(z.boolean()),
+        terminate_on_failure: z
+          .string()
+          .toLowerCase()
+          .transform(x => JSON.parse(x))
+          .pipe(z.boolean())
+          .default("false"),
+      })
+      .refine(
+        data => {
+          if (data.package_manager === "") {
+            return true;
+          }
+          if (data.runtime === "node") {
+            return nodeManagers.includes(data.package_manager as any);
+          }
+          if (data.runtime === "python") {
+            return pythonManagers.includes(data.package_manager as any);
+          }
+          return false;
+        },
+        {
+          message: "Package manager must match the selected runtime",
+          path: ["package_manager"], // This will show the error on the package_manager field
+        },
+      );
     export type Params = z.infer<typeof paramsSchema>;
     const TITLE = "## Braintrust eval report\n";
@@ Expand All / @@ -37,6 +63,7 @@ async function main(): Promise<void> { @@
         root: core.getInput("root"),
         paths: core.getInput("paths"),
         runtime: core.getInput("runtime"),
+        package_manager: core.getInput("package_manager"),
         use_proxy: core.getInput("use_proxy"),
         terminate_on_failure: core.getInput("terminate_on_failure"),
       });
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

add package manager option #75

Uh oh!

Diff view

Diff view

There are no files selected for viewing

daviddkkim Jul 7, 2025

Uh oh!

ibolmo Jul 7, 2025

Uh oh!

daviddkkim Jul 7, 2025

Uh oh!

Uh oh!

Uh oh!

daviddkkim Jul 7, 2025

Uh oh!

ibolmo Jul 7, 2025 •

edited

Loading

Uh oh!

daviddkkim Jul 7, 2025

Uh oh!

-Original file line number
+Diff line change
@@ Expand Up / @@ -41,4 +41,5 @@ jobs: @@
             with:
               api_key: ${{ secrets.BRAINTRUST_API_KEY }}
               runtime: node
+              package_manager: pnpm
               root: my_eval_dir

-Original file line number
+Diff line change
@@ -0,0 +1,4 @@
+    [tools]
+    node = "20.6.0"
+    pnpm = "8"
+    python = "latest"

-Original file line number
+Diff line change
@@ Expand Up / @@ -25,7 +25,7 @@ GREEN='\033[0;32m' @@
     BLUE='\033[0;34m'
     # Get the latest release tag
-    latest_tag=$(git describe --tags "$(git rev-list --tags --max-count=1)")
+    latest_tag=$(git tag -l 'v*' --sort=-v:refname | head -n 1)
     if [[ -z "$latest_tag" ]]; then
     	# There are no existing release tags
@@ Expand Down Expand Up / @@ -59,6 +59,6 @@ git tag -a "$tag_first_part" -m "$tag_first_part Release" -f @@
     echo -e "${GREEN}Tagged: $tag_first_part${OFF}"
     # Push the new tag to the remote
-    git push --tags -f
+    git push --tags
     echo -e "${GREEN}Release tag pushed to remote${OFF}"
     echo -e "${GREEN}Done!${OFF}"

add package manager option #75

Uh oh!

add package manager option #75

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

daviddkkim Jul 7, 2025

Choose a reason for hiding this comment

Uh oh!

ibolmo Jul 7, 2025

Choose a reason for hiding this comment

Uh oh!

daviddkkim Jul 7, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

daviddkkim Jul 7, 2025

Choose a reason for hiding this comment

Uh oh!

ibolmo Jul 7, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

daviddkkim Jul 7, 2025

Choose a reason for hiding this comment

Uh oh!

ibolmo Jul 7, 2025 •

edited

Loading