-
Notifications
You must be signed in to change notification settings - Fork 6
add package manager option #75
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| name: Run Python evals | ||
|
|
||
| on: | ||
| push: | ||
| # files: | ||
| # - 'test-eval/**' | ||
|
|
||
| permissions: | ||
| pull-requests: write | ||
| contents: read | ||
|
|
||
| jobs: | ||
| eval: | ||
| name: Run Python evals | ||
| runs-on: ubuntu-latest | ||
|
|
||
| steps: | ||
| - name: Checkout | ||
| id: checkout | ||
| uses: actions/checkout@v4 | ||
| with: | ||
| fetch-depth: 0 | ||
| submodules: "recursive" | ||
|
|
||
| - name: Install uv | ||
| uses: astral-sh/setup-uv@v5 | ||
|
|
||
| - name: Set up Python | ||
| uses: actions/setup-python@v4 | ||
| with: | ||
| python-version: "3.12" # TODO: Matrix test different versions | ||
|
|
||
| - name: Install dependencies | ||
| run: | | ||
| cd test-eval-py | ||
| uv lock --check | ||
| uv sync --no-dev | ||
|
|
||
| - name: Run Evals | ||
| uses: ./ | ||
| with: | ||
| api_key: ${{ secrets.BRAINTRUST_API_KEY }} | ||
| root: test-eval-py | ||
| runtime: python | ||
| package_manager: uv | ||
|
|
||
| # - name: Start terminal session | ||
| # uses: mxschmitt/action-tmate@v3 | ||
| # with: | ||
| # limit-access-to-actor: true |
Large diffs are not rendered by default.
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,7 @@ function snakeToCamelCase(str: string) { | |
| } | ||
|
|
||
| async function runCommand(command: string, onSummary: OnSummaryFn) { | ||
| core.info(`> $ ${command}`); | ||
| return new Promise((resolve, reject) => { | ||
| const process = execSync(command); | ||
|
|
||
|
|
@@ -76,18 +77,40 @@ export async function runEval(args: Params, onSummary: OnSummaryFn) { | |
| // Change working directory | ||
| process.chdir(path.resolve(root)); | ||
|
|
||
| let command: string; | ||
| const terminateFlag = terminate_on_failure ? "--terminate-on-failure" : ""; | ||
|
|
||
| switch (args.runtime) { | ||
| case "node": | ||
| command = `npx braintrust eval --jsonl ${terminateFlag} ${paths}`; | ||
| break; | ||
| case "python": | ||
| command = `braintrust eval --jsonl ${terminateFlag} ${paths}`; | ||
| break; | ||
| default: | ||
| throw new Error(`Unsupported runtime: ${args.runtime}`); | ||
| } | ||
| const baseCommand = (() => { | ||
| switch (args.runtime.toLowerCase().trim()) { | ||
| case "node": | ||
| switch (args.package_manager) { | ||
| case "": | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wondering if default should be defined and this should be removed -- what happens in this empty case?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah for backward compatibility it'll be whatever we were doing before. so for node it'll be npx and for python it'll be pip. is that what you had in mind? right now it's just a
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeet i forgot about the case statement behavior on no returns -- this makes sense |
||
| case "npm": | ||
| return "npx braintrust"; | ||
| case "pnpm": | ||
| return "pnpm dlx braintrust"; | ||
| default: | ||
| throw new Error( | ||
| `Unsupported package manager: ${args.package_manager}`, | ||
| ); | ||
| } | ||
| case "python": | ||
| switch ((args.package_manager || "").toLowerCase().trim()) { | ||
| case "": | ||
| case "pip": | ||
| return `braintrust`; | ||
| case "uv": | ||
| return `uv run braintrust`; | ||
| default: | ||
| throw new Error( | ||
| `Unsupported package manager: ${args.package_manager}`, | ||
| ); | ||
| } | ||
| default: | ||
| throw new Error(`Unsupported runtime: ${args.runtime}`); | ||
| } | ||
| })(); | ||
|
|
||
| const command = `${baseCommand} eval --jsonl ${terminateFlag} ${paths}`; | ||
|
|
||
| await runCommand(command, onSummary); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -41,4 +41,5 @@ jobs: | |
| with: | ||
| api_key: ${{ secrets.BRAINTRUST_API_KEY }} | ||
| runtime: node | ||
| package_manager: pnpm | ||
| root: my_eval_dir | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,42 @@ | ||
| name: Run Python evals | ||
|
|
||
| on: | ||
| push: | ||
| # Uncomment to run only when files in the 'evals' directory change | ||
| # - paths: | ||
| # - "evals/**" | ||
|
|
||
| permissions: | ||
| pull-requests: write | ||
| contents: read | ||
|
|
||
| jobs: | ||
| eval: | ||
| name: Run evals | ||
| runs-on: ubuntu-latest | ||
|
|
||
| steps: | ||
| - name: Checkout | ||
| id: checkout | ||
| uses: actions/checkout@v4 | ||
| with: | ||
| fetch-depth: 0 | ||
|
|
||
| - name: Set up Python | ||
| uses: actions/setup-python@v4 | ||
| with: | ||
| python-version: "3.12" # Replace with your Python version | ||
|
|
||
| # Tweak this to a dependency manager of your choice | ||
| - name: Install dependencies | ||
| run: | | ||
| python -m pip install --upgrade pip | ||
| pip install -r test-eval-py/requirements.txt | ||
|
|
||
| - name: Run Evals | ||
| uses: braintrustdata/eval-action@v1 | ||
| with: | ||
| api_key: ${{ secrets.BRAINTRUST_API_KEY }} | ||
| runtime: python | ||
| package_manager: uv | ||
| root: my_eval_dir |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| [tools] | ||
| node = "20.6.0" | ||
| pnpm = "8" | ||
| python = "latest" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we define a default? it looks like if package_manager is "", it goes into an empty case statement
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yeah it's a bit odd, but it's for backward compatibility. I could try without the default '', but ultimately in the code it will fall back to '' due to the zod parsing
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ahh gotchu! makes sense