Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions js/src/cli/functions/infer-source.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,10 @@ function locationToString(location: CodeBundle["location"]): string {
return `eval ${location.eval_name} -> ${location.position.type}`;
} else if (location.type === "function") {
return `task ${location.index}`;
} else if (location.type === "sandbox") {
return `sandbox eval ${location.eval_name}`;
} else {
throw new Error(`Unsupported location type: ${location.type}`);
throw new Error(`Unknown location type`);
}
}

Expand All @@ -66,7 +68,7 @@ export async function findCodeDefinition({
// eslint-disable-next-line @typescript-eslint/no-unsafe-function-type
let fn: Function | undefined = undefined;

if (location.type === "experiment") {
if (location.type === "experiment" || location.type === "sandbox") {
const evaluator = outFileModule.evaluators[location.eval_name]?.evaluator;
if (!evaluator) {
console.warn(
Expand All @@ -77,14 +79,18 @@ export async function findCodeDefinition({
return undefined;
}

fn =
location.position.type === "task"
? evaluator.task
: evaluator.scores[location.position.index];
if (location.type === "sandbox") {
fn = evaluator.task;
} else {
fn =
location.position.type === "task"
? evaluator.task
: evaluator.scores[location.position.index];
}
} else if (location.type === "function") {
fn = outFileModule.functions[location.index].handler;
} else {
throw new Error(`Unsupported location type: ${location.type}`);
throw new Error(`Unknown location type`);
}

if (!fn) {
Expand Down
67 changes: 59 additions & 8 deletions js/src/cli/functions/upload.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
import {
CodeBundle as CodeBundleSchema,
type CodeBundleType as CodeBundle,
Function as FunctionObjectSchema,
type FunctionType as FunctionObject,
IfExists as IfExistsSchema,
type IfExistsType as IfExists,
} from "../../generated_types";
import type { BuildSuccess, EvaluatorState, FileHandle } from "../types";
Expand All @@ -24,7 +21,11 @@ import { findCodeDefinition, makeSourceMapContext } from "./infer-source";
import { slugify } from "../../../util/string_util";
import { zodToJsonSchema } from "../../zod/utils";
import pluralize from "pluralize";
import { FunctionEvent, ProjectNameIdMap } from "../../framework2";
import {
FunctionEvent,
ProjectNameIdMap,
serializeRemoteEvalParametersContainer,
} from "../../framework2";

export type EvaluatorMap = Record<
string,
Expand All @@ -47,6 +48,8 @@ interface BundledFunctionSpec {
metadata?: Record<string, unknown>;
}

const SANDBOX_GROUP_NAME_METADATA_KEY = "_bt_sandbox_group_name";

const pathInfoSchema = z
.strictObject({
url: z.string(),
Expand Down Expand Up @@ -155,6 +158,52 @@ export async function uploadHandleBundles({
}
: undefined;

if (setCurrent) {
const sourceStem = path
.basename(sourceFile, path.extname(sourceFile))
.replace(/\.eval$/, "");
const evalName = evaluator.evaluator.evalName;
const sandboxGroupName = sourceStem;

const resolvedParameters = evaluator.evaluator.parameters
? await Promise.resolve(evaluator.evaluator.parameters)
: undefined;

const evaluatorDefinition = {
...(resolvedParameters
? {
parameters:
serializeRemoteEvalParametersContainer(resolvedParameters),
}
: {}),
scores: evaluator.evaluator.scores.map((score, i) => ({
name: scorerName(score, i),
})),
};

bundleSpecs.push({
...baseInfo,
name: `Eval ${evalName} sandbox`,
slug: slugify(`${sourceStem}-${evalName}-sandbox`),
description: `Sandbox eval ${evalName}`,
location: {
type: "sandbox",
sandbox_spec: {
provider: "lambda",
},
entrypoints: [sourceFile],
eval_name: evalName,
evaluator_definition: evaluatorDefinition,
},
function_type: "sandbox",
metadata: {
[SANDBOX_GROUP_NAME_METADATA_KEY]: sandboxGroupName,
},
origin,
});
continue;
}

const fileSpecs: BundledFunctionSpec[] = [
{
...baseInfo,
Expand Down Expand Up @@ -358,10 +407,12 @@ async function uploadBundles({
runtime_context,
location: spec.location,
bundle_id: pathInfo!.bundleId,
preview: await findCodeDefinition({
location: spec.location,
ctx: sourceMapContext,
}),
preview: sourceMapContext
? await findCodeDefinition({
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i take it this is because sandbox functions don't have source maps? could we skip the attempt to resolve the source map if we know the type of function?

location: spec.location,
ctx: sourceMapContext,
})
: undefined,
},
},
origin: spec.origin,
Expand Down
8 changes: 8 additions & 0 deletions js/src/exports.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ export type {
WithTransactionId,
} from "./logger";

export type {
SandboxConfig,
RegisterSandboxOptions,
RegisterSandboxResult,
} from "./sandbox";

export {
Attachment,
BaseAttachment,
Expand Down Expand Up @@ -113,6 +119,8 @@ export {
registerOtelFlush,
} from "./logger";

export { registerSandbox } from "./sandbox";

// Internal isomorph layer for platform-specific implementations
import _internalIso from "./isomorph";
export { _internalIso };
Expand Down
4 changes: 3 additions & 1 deletion js/src/logger.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ import {
VALID_SOURCES,
isArray,
isObject,
} from "./util";
} from "../util/index";
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

didn't expect these changes 🤔

import { slugify } from "../util/string_util";
import {
type AnyModelParamsType as AnyModelParam,
AttachmentReference as attachmentReferenceSchema,
Expand All @@ -65,6 +66,7 @@ import {
type PromptSessionEventType as PromptSessionEvent,
type RepoInfoType as RepoInfo,
type PromptBlockDataType as PromptBlockData,
type IfExistsType,
} from "./generated_types";

const BRAINTRUST_ATTACHMENT =
Expand Down
195 changes: 195 additions & 0 deletions js/src/sandbox.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
import { z } from "zod/v3";
import { slugify } from "../util/string_util";
import { type IfExistsType } from "./generated_types";
import { type BraintrustState, _internalGetGlobalState } from "./logger";

/**
* Configuration for a sandbox runtime.
* @internal
*/
export interface SandboxConfig {
/** The sandbox provider. Currently only "modal" is supported. */
provider: "modal";
/** Reference to the sandbox snapshot. */
snapshotRef: string;
}

/**
* Options for registering a sandbox function.
* @internal
*/
export interface RegisterSandboxOptions {
/** Deprecated. Ignored. Function names are derived from discovered eval names. */
name: string;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since this is net new, did we want to deprecate? you can just remove the props?

/** Name of the project to register the sandbox in. */
project: string;
/** Sandbox configuration (provider and snapshot reference). */
sandbox: SandboxConfig;
/** Optional list of entrypoints available in the sandbox. */
entrypoints?: string[];
/** Deprecated. Ignored. Function slugs are derived from discovered eval names. */
slug?: string;
/** Optional description. */
description?: string;
/** Optional metadata. */
metadata?: Record<string, unknown>;
/** What to do if function already exists. Defaults to "replace". */
ifExists?: IfExistsType;
/** Braintrust API key. Uses BRAINTRUST_API_KEY env var if not provided. */
apiKey?: string;
/** Braintrust app URL. Uses default if not provided. */
appUrl?: string;
/** Organization name. */
orgName?: string;
/** Optional BraintrustState instance. Defaults to the global state. */
state?: BraintrustState;
}

/**
* Result of registering a sandbox.
* @internal
*/
export interface RegisterSandboxResult {
/** Project ID the sandbox is registered in. */
projectId: string;
/** Registered eval functions discovered from this sandbox. */
functions: {
/** Eval name discovered from sandbox list endpoint. */
evalName: string;
/** Unique identifier for the function. */
id: string;
/** Function name. */
name: string;
/** URL-friendly identifier. */
slug: string;
}[];
}

const SANDBOX_GROUP_NAME_METADATA_KEY = "_bt_sandbox_group_name";

/**
* Register a sandbox function with Braintrust.
*
* @param options Configuration for the sandbox to register.
* @returns The registered sandbox function details.
* @internal
*
* @example
* ```typescript
* const result = await registerSandbox({
* name: "My Sandbox",
* project: "My Project",
* entrypoints: ["./my-eval.eval.ts"],
* sandbox: {
* provider: "modal",
* snapshotRef: "sb-xxx",
* },
* });
* console.log(result.functions.map((f) => f.id));
* ```
*/
export async function registerSandbox(
options: RegisterSandboxOptions,
): Promise<RegisterSandboxResult> {
const state = options.state ?? _internalGetGlobalState();
await state.login({
apiKey: options.apiKey,
appUrl: options.appUrl,
orgName: options.orgName,
});

// Get project ID via project registration
const projectResponse = await state
.appConn()
.post_json("api/project/register", {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#nit we should likely have a more formal interface like we've done in framework2.ts for projects and now sandboxes, but don't think you have to sort this out.

i've added this to a weekly planning discussion

project_name: options.project,
org_id: state.orgId,
});
const projectId = projectResponse.project.id;
if (!state.orgName) {
throw new Error("Organization name is required to register sandbox evals");
}

const runtimeContext = {
runtime: "node",
version: process.version.slice(1),
} as const;

const listResponse = await state.proxyConn().post(
"function/sandbox-list",
{
sandbox_spec: {
provider: options.sandbox.provider,
snapshot_ref: options.sandbox.snapshotRef,
},
entrypoints: options.entrypoints,
project_id: projectId,
},
{
headers: {
"x-bt-org-name": state.orgName,
},
},
);
const evaluatorDefinitions = z
.record(z.unknown())
.parse(await listResponse.json());

const functions: RegisterSandboxResult["functions"] = [];
for (const [evalName, evaluatorDefinition] of Object.entries(
evaluatorDefinitions,
)) {
const functionName = evalName;
const functionSlug = slugify(evalName, { lower: true, strict: true });

const functionDef: Record<string, unknown> = {
project_id: projectId,
org_name: state.orgName,
name: functionName,
slug: functionSlug,
function_type: "sandbox",
function_data: {
type: "code",
data: {
type: "bundle",
runtime_context: runtimeContext,
location: {
type: "sandbox",
sandbox_spec: {
provider: options.sandbox.provider,
snapshot_ref: options.sandbox.snapshotRef,
},
entrypoints: options.entrypoints,
eval_name: evalName,
evaluator_definition: evaluatorDefinition,
},
bundle_id: null,
preview: null,
},
},
metadata: {
...(options.metadata ?? {}),
[SANDBOX_GROUP_NAME_METADATA_KEY]: options.name,
},
if_exists: options.ifExists ?? "replace",
};
if (options.description !== undefined) {
functionDef.description = options.description;
}

const response = await state
.apiConn()
.post_json("v1/function", functionDef);
functions.push({
evalName,
id: response.id,
name: response.name,
slug: response.slug,
});
}

return {
projectId,
functions,
};
}
1 change: 1 addition & 0 deletions py/src/braintrust/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def is_equal(expected, output):
from .oai import (
wrap_openai, # noqa: F401 # type: ignore[reportUnusedImport]
)
from .sandbox import *
from .util import (
BT_IS_ASYNC_ATTRIBUTE, # noqa: F401 # type: ignore[reportUnusedImport]
MarkAsyncWrapper, # noqa: F401 # type: ignore[reportUnusedImport]
Expand Down
Loading
Loading