diff --git a/js/src/cli/functions/infer-source.ts b/js/src/cli/functions/infer-source.ts index 563e57087..179b5c50d 100644 --- a/js/src/cli/functions/infer-source.ts +++ b/js/src/cli/functions/infer-source.ts @@ -51,8 +51,10 @@ function locationToString(location: CodeBundle["location"]): string { return `eval ${location.eval_name} -> ${location.position.type}`; } else if (location.type === "function") { return `task ${location.index}`; + } else if (location.type === "sandbox") { + return `sandbox eval ${location.eval_name}`; } else { - throw new Error(`Unsupported location type: ${location.type}`); + throw new Error(`Unknown location type`); } } @@ -66,7 +68,7 @@ export async function findCodeDefinition({ // eslint-disable-next-line @typescript-eslint/no-unsafe-function-type let fn: Function | undefined = undefined; - if (location.type === "experiment") { + if (location.type === "experiment" || location.type === "sandbox") { const evaluator = outFileModule.evaluators[location.eval_name]?.evaluator; if (!evaluator) { console.warn( @@ -77,14 +79,18 @@ export async function findCodeDefinition({ return undefined; } - fn = - location.position.type === "task" - ? evaluator.task - : evaluator.scores[location.position.index]; + if (location.type === "sandbox") { + fn = evaluator.task; + } else { + fn = + location.position.type === "task" + ? evaluator.task + : evaluator.scores[location.position.index]; + } } else if (location.type === "function") { fn = outFileModule.functions[location.index].handler; } else { - throw new Error(`Unsupported location type: ${location.type}`); + throw new Error(`Unknown location type`); } if (!fn) { diff --git a/js/src/cli/functions/upload.ts b/js/src/cli/functions/upload.ts index 841f24f5e..f9c59f075 100644 --- a/js/src/cli/functions/upload.ts +++ b/js/src/cli/functions/upload.ts @@ -21,7 +21,11 @@ import { findCodeDefinition, makeSourceMapContext } from "./infer-source"; import { slugify } from "../../../util/string_util"; import { zodToJsonSchema } from "../../zod/utils"; import pluralize from "pluralize"; -import { FunctionEvent, ProjectNameIdMap } from "../../framework2"; +import { + FunctionEvent, + ProjectNameIdMap, + serializeRemoteEvalParametersContainer, +} from "../../framework2"; export type EvaluatorMap = Record< string, @@ -44,6 +48,8 @@ interface BundledFunctionSpec { metadata?: Record; } +const SANDBOX_GROUP_NAME_METADATA_KEY = "_bt_sandbox_group_name"; + const pathInfoSchema = z .strictObject({ url: z.string(), @@ -152,6 +158,52 @@ export async function uploadHandleBundles({ } : undefined; + if (setCurrent) { + const sourceStem = path + .basename(sourceFile, path.extname(sourceFile)) + .replace(/\.eval$/, ""); + const evalName = evaluator.evaluator.evalName; + const sandboxGroupName = sourceStem; + + const resolvedParameters = evaluator.evaluator.parameters + ? await Promise.resolve(evaluator.evaluator.parameters) + : undefined; + + const evaluatorDefinition = { + ...(resolvedParameters + ? { + parameters: + serializeRemoteEvalParametersContainer(resolvedParameters), + } + : {}), + scores: evaluator.evaluator.scores.map((score, i) => ({ + name: scorerName(score, i), + })), + }; + + bundleSpecs.push({ + ...baseInfo, + name: `Eval ${evalName} sandbox`, + slug: slugify(`${sourceStem}-${evalName}-sandbox`), + description: `Sandbox eval ${evalName}`, + location: { + type: "sandbox", + sandbox_spec: { + provider: "lambda", + }, + entrypoints: [sourceFile], + eval_name: evalName, + evaluator_definition: evaluatorDefinition, + }, + function_type: "sandbox", + metadata: { + [SANDBOX_GROUP_NAME_METADATA_KEY]: sandboxGroupName, + }, + origin, + }); + continue; + } + const fileSpecs: BundledFunctionSpec[] = [ { ...baseInfo, @@ -355,10 +407,12 @@ async function uploadBundles({ runtime_context, location: spec.location, bundle_id: pathInfo!.bundleId, - preview: await findCodeDefinition({ - location: spec.location, - ctx: sourceMapContext, - }), + preview: sourceMapContext + ? await findCodeDefinition({ + location: spec.location, + ctx: sourceMapContext, + }) + : undefined, }, }, origin: spec.origin, diff --git a/js/src/exports.ts b/js/src/exports.ts index 0f579c5b4..2e7e3ee04 100644 --- a/js/src/exports.ts +++ b/js/src/exports.ts @@ -40,6 +40,12 @@ export type { WithTransactionId, } from "./logger"; +export type { + SandboxConfig, + RegisterSandboxOptions, + RegisterSandboxResult, +} from "./sandbox"; + export { Attachment, BaseAttachment, @@ -113,6 +119,8 @@ export { registerOtelFlush, } from "./logger"; +export { registerSandbox } from "./sandbox"; + // Internal isomorph layer for platform-specific implementations import _internalIso from "./isomorph"; export { _internalIso }; diff --git a/js/src/sandbox.ts b/js/src/sandbox.ts new file mode 100644 index 000000000..53b96f98e --- /dev/null +++ b/js/src/sandbox.ts @@ -0,0 +1,193 @@ +import { z } from "zod/v3"; +import { slugify } from "../util/string_util"; +import { type IfExistsType } from "./generated_types"; +import { type BraintrustState, _internalGetGlobalState } from "./logger"; + +/** + * Configuration for a sandbox runtime. + * @internal + */ +export interface SandboxConfig { + /** The sandbox provider. Currently only "modal" is supported. */ + provider: "modal"; + /** Reference to the sandbox snapshot. */ + snapshotRef: string; +} + +/** + * Options for registering a sandbox function. + * @internal + */ +export interface RegisterSandboxOptions { + /** Group name for the sandbox functions. */ + name: string; + /** Name of the project to register the sandbox in. */ + project: string; + /** Sandbox configuration (provider and snapshot reference). */ + sandbox: SandboxConfig; + /** Optional list of entrypoints available in the sandbox. */ + entrypoints?: string[]; + /** Optional description. */ + description?: string; + /** Optional metadata. */ + metadata?: Record; + /** What to do if function already exists. Defaults to "replace". */ + ifExists?: IfExistsType; + /** Braintrust API key. Uses BRAINTRUST_API_KEY env var if not provided. */ + apiKey?: string; + /** Braintrust app URL. Uses default if not provided. */ + appUrl?: string; + /** Organization name. */ + orgName?: string; + /** Optional BraintrustState instance. Defaults to the global state. */ + state?: BraintrustState; +} + +/** + * Result of registering a sandbox. + * @internal + */ +export interface RegisterSandboxResult { + /** Project ID the sandbox is registered in. */ + projectId: string; + /** Registered eval functions discovered from this sandbox. */ + functions: { + /** Eval name discovered from sandbox list endpoint. */ + evalName: string; + /** Unique identifier for the function. */ + id: string; + /** Function name. */ + name: string; + /** URL-friendly identifier. */ + slug: string; + }[]; +} + +const SANDBOX_GROUP_NAME_METADATA_KEY = "_bt_sandbox_group_name"; + +/** + * Register a sandbox function with Braintrust. + * + * @param options Configuration for the sandbox to register. + * @returns The registered sandbox function details. + * @internal + * + * @example + * ```typescript + * const result = await registerSandbox({ + * name: "My Sandbox", + * project: "My Project", + * entrypoints: ["./my-eval.eval.ts"], + * sandbox: { + * provider: "modal", + * snapshotRef: "sb-xxx", + * }, + * }); + * console.log(result.functions.map((f) => f.id)); + * ``` + */ +export async function registerSandbox( + options: RegisterSandboxOptions, +): Promise { + const state = options.state ?? _internalGetGlobalState(); + await state.login({ + apiKey: options.apiKey, + appUrl: options.appUrl, + orgName: options.orgName, + }); + + // Get project ID via project registration + const projectResponse = await state + .appConn() + .post_json("api/project/register", { + project_name: options.project, + org_id: state.orgId, + }); + const projectId = projectResponse.project.id; + if (!state.orgName) { + throw new Error("Organization name is required to register sandbox evals"); + } + + const runtimeContext = { + runtime: "node", + version: process.version.slice(1), + } as const; + + const listResponse = await state.proxyConn().post( + "function/sandbox-list", + { + sandbox_spec: { + provider: options.sandbox.provider, + snapshot_ref: options.sandbox.snapshotRef, + }, + entrypoints: options.entrypoints, + project_id: projectId, + }, + { + headers: { + "x-bt-org-name": state.orgName, + }, + }, + ); + const evaluatorDefinitions = z + .record(z.unknown()) + .parse(await listResponse.json()); + + const functions: RegisterSandboxResult["functions"] = []; + for (const [evalName, evaluatorDefinition] of Object.entries( + evaluatorDefinitions, + )) { + const functionName = evalName; + const functionSlug = slugify(evalName, { lower: true, strict: true }); + + const functionDef: Record = { + project_id: projectId, + org_name: state.orgName, + name: functionName, + slug: functionSlug, + function_type: "sandbox", + function_data: { + type: "code", + data: { + type: "bundle", + runtime_context: runtimeContext, + location: { + type: "sandbox", + sandbox_spec: { + provider: options.sandbox.provider, + snapshot_ref: options.sandbox.snapshotRef, + }, + entrypoints: options.entrypoints, + eval_name: evalName, + evaluator_definition: evaluatorDefinition, + }, + bundle_id: null, + preview: null, + }, + }, + metadata: { + ...(options.metadata ?? {}), + [SANDBOX_GROUP_NAME_METADATA_KEY]: options.name, + }, + if_exists: options.ifExists ?? "replace", + }; + if (options.description !== undefined) { + functionDef.description = options.description; + } + + const response = await state + .apiConn() + .post_json("v1/function", functionDef); + functions.push({ + evalName, + id: response.id, + name: response.name, + slug: response.slug, + }); + } + + return { + projectId, + functions, + }; +}