diff --git a/generated_types.json b/generated_types.json index a0e94f600..8713ad7d9 100644 --- a/generated_types.json +++ b/generated_types.json @@ -558,6 +558,26 @@ "function_ids", "triggered_xact_id" ] + }, + { + "type": "object", + "properties": { + "kind": { + "type": "string", + "enum": [ + "mark_attempt_failed" + ] + }, + "function_ids": { + "type": "array", + "items": {}, + "minItems": 1 + } + }, + "required": [ + "kind", + "function_ids" + ] } ] }, @@ -1853,12 +1873,17 @@ ], "additionalProperties": {}, "description": "User-controlled metadata about the dataset" + }, + "url_slug": { + "type": "string", + "description": "URL slug for the dataset. used to construct dataset URLs" } }, "required": [ "id", "project_id", - "name" + "name", + "url_slug" ] }, "DatasetEvent": { @@ -2046,6 +2071,150 @@ "name" ] }, + "EvalStatusPage": { + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid", + "description": "Unique identifier for the eval status page" + }, + "project_id": { + "type": "string", + "format": "uuid", + "description": "Unique identifier for the project that the eval status page belongs under" + }, + "user_id": { + "type": [ + "string", + "null" + ], + "format": "uuid", + "description": "Identifies the user who created the eval status page" + }, + "created": { + "type": [ + "string", + "null" + ], + "format": "date-time", + "description": "Date of eval status page creation" + }, + "deleted_at": { + "type": [ + "string", + "null" + ], + "format": "date-time", + "description": "Date of eval status page deletion, or null if the eval status page is still active" + }, + "name": { + "type": "string", + "description": "Name of the eval status page" + }, + "description": { + "type": [ + "string", + "null" + ], + "description": "Textual description of the eval status page" + }, + "logo_url": { + "type": [ + "string", + "null" + ], + "description": "URL of the logo to display on the page" + }, + "theme": { + "$ref": "#/components/schemas/EvalStatusPageTheme" + }, + "config": { + "$ref": "#/components/schemas/EvalStatusPageConfig" + } + }, + "required": [ + "id", + "project_id", + "name", + "theme", + "config" + ], + "description": "A public eval status page that displays aggregate experiment results" + }, + "EvalStatusPageConfig": { + "type": "object", + "properties": { + "score_columns": { + "type": [ + "array", + "null" + ], + "items": { + "type": "string" + }, + "description": "The score columns to display on the page" + }, + "metric_columns": { + "type": [ + "array", + "null" + ], + "items": { + "type": "string" + }, + "description": "The metric columns to display on the page" + }, + "grouping_field": { + "type": [ + "string", + "null" + ], + "description": "The metadata field to use for grouping experiments (model)" + }, + "filter": { + "type": [ + "string", + "null" + ], + "description": "BTQL filter to apply to experiment data" + }, + "sort_by": { + "type": [ + "string", + "null" + ], + "description": "Field to sort results by (format: 'score:' or 'metric:')" + }, + "sort_order": { + "type": [ + "string", + "null" + ], + "enum": [ + "asc", + "desc" + ], + "description": "Sort order (ascending or descending)" + }, + "api_key": { + "type": [ + "string", + "null" + ], + "description": "The API key used for fetching experiment data" + } + }, + "description": "Configuration for what data to display" + }, + "EvalStatusPageTheme": { + "type": "string", + "enum": [ + "light", + "dark" + ], + "description": "The theme for the page" + }, "Experiment": { "type": "object", "properties": { @@ -3038,9 +3207,11 @@ "tool", "scorer", "task", + "workflow", "custom_view", "preprocessor", - "facet" + "facet", + "classifier" ] }, "FunctionOutputType": { @@ -3048,6 +3219,8 @@ "enum": [ "completion", "score", + "facet", + "tag", "any" ] }, @@ -3060,7 +3233,9 @@ "tool", "custom_view", "preprocessor", - "facet" + "facet", + "classifier", + "tag" ], "default": "scorer", "description": "The type of global function. Defaults to 'scorer'." @@ -3077,7 +3252,9 @@ "tool", "custom_view", "preprocessor", - "facet" + "facet", + "classifier", + "tag" ] }, "GitMetadataSettings": { @@ -5904,13 +6081,24 @@ "type": "number", "minimum": 0, "maximum": 1 - } + }, + "description": "Map of choices to scores (0-1). Used by scorers." + }, + "choice": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of valid choices without score mapping. Used by classifiers that deposit output to tags." + }, + "allow_no_match": { + "type": "boolean", + "description": "If true, adds a 'No match' option. When selected, no tag is deposited." } }, "required": [ "type", - "use_cot", - "choice_scores" + "use_cot" ] }, "PromptSessionEvent": { @@ -6739,7 +6927,8 @@ "tool", "automation", "facet", - "preprocessor" + "preprocessor", + "classifier" ], "description": "Type of the span, for display purposes only" }, @@ -7063,6 +7252,7 @@ "prompts", "tools", "scorers", + "classifiers", "logs", "monitor", "for_review" @@ -7503,7 +7693,7 @@ "license": { "name": "Apache 2.0" }, - "x-internal-git-sha": "87ac73f4945a47eff2d4e42775ba4dbc58854c73" + "x-internal-git-sha": "c99d1e6fbf062688da7f1f22394c72ea480fe81a" }, "paths": {}, "webhooks": {} diff --git a/js/src/generated_types.ts b/js/src/generated_types.ts index 4389ddd66..47fc447bd 100644 --- a/js/src/generated_types.ts +++ b/js/src/generated_types.ts @@ -1,4 +1,4 @@ -// Auto-generated file (internal git SHA 87ac73f4945a47eff2d4e42775ba4dbc58854c73) -- do not modify +// Auto-generated file (internal git SHA c99d1e6fbf062688da7f1f22394c72ea480fe81a) -- do not modify import { z } from "zod/v3"; @@ -182,6 +182,10 @@ export const AsyncScoringControl = z.union([ function_ids: z.array(z.unknown()).min(1), triggered_xact_id: z.string(), }), + z.object({ + kind: z.literal("mark_attempt_failed"), + function_ids: z.array(z.unknown()).min(1), + }), ]); export type AsyncScoringControlType = z.infer; export const BraintrustAttachmentReference = z.object({ @@ -222,6 +226,8 @@ export const FunctionTypeEnum = z.enum([ "custom_view", "preprocessor", "facet", + "classifier", + "tag", ]); export type FunctionTypeEnumType = z.infer; export const NullableSavedFunctionId = z.union([ @@ -491,6 +497,7 @@ export const Dataset = z.object({ metadata: z .union([z.object({}).partial().passthrough(), z.null()]) .optional(), + url_slug: z.string(), }); export type DatasetType = z.infer; export const ObjectReferenceNullish = z.union([ @@ -555,6 +562,33 @@ export const EnvVar = z.object({ .default("env_var"), }); export type EnvVarType = z.infer; +export const EvalStatusPageTheme = z.enum(["light", "dark"]); +export type EvalStatusPageThemeType = z.infer; +export const EvalStatusPageConfig = z + .object({ + score_columns: z.union([z.array(z.string()), z.null()]), + metric_columns: z.union([z.array(z.string()), z.null()]), + grouping_field: z.union([z.string(), z.null()]), + filter: z.union([z.string(), z.null()]), + sort_by: z.union([z.string(), z.null()]), + sort_order: z.union([z.enum(["asc", "desc"]), z.null()]), + api_key: z.union([z.string(), z.null()]), + }) + .partial(); +export type EvalStatusPageConfigType = z.infer; +export const EvalStatusPage = z.object({ + id: z.string().uuid(), + project_id: z.string().uuid(), + user_id: z.union([z.string(), z.null()]).optional(), + created: z.union([z.string(), z.null()]).optional(), + deleted_at: z.union([z.string(), z.null()]).optional(), + name: z.string(), + description: z.union([z.string(), z.null()]).optional(), + logo_url: z.union([z.string(), z.null()]).optional(), + theme: EvalStatusPageTheme, + config: EvalStatusPageConfig, +}); +export type EvalStatusPageType = z.infer; export const RepoInfo = z.union([ z .object({ @@ -603,6 +637,7 @@ export const SpanType = z.union([ "automation", "facet", "preprocessor", + "classifier", ]), z.null(), ]); @@ -787,7 +822,9 @@ export const PromptParserNullish = z.union([ z.object({ type: z.literal("llm_classifier"), use_cot: z.boolean(), - choice_scores: z.record(z.number().gte(0).lte(1)), + choice_scores: z.record(z.number().gte(0).lte(1)).optional(), + choice: z.array(z.string()).optional(), + allow_no_match: z.boolean().optional(), }), z.null(), ]); @@ -859,6 +896,8 @@ export const FunctionTypeEnumNullish = z.union([ "custom_view", "preprocessor", "facet", + "classifier", + "tag", ]), z.null(), ]); @@ -1109,12 +1148,20 @@ export const FunctionObjectType = z.enum([ "tool", "scorer", "task", + "workflow", "custom_view", "preprocessor", "facet", + "classifier", ]); export type FunctionObjectTypeType = z.infer; -export const FunctionOutputType = z.enum(["completion", "score", "any"]); +export const FunctionOutputType = z.enum([ + "completion", + "score", + "facet", + "tag", + "any", +]); export type FunctionOutputTypeType = z.infer; export const GitMetadataSettings = z.object({ collect: z.enum(["all", "none", "some"]), @@ -1770,6 +1817,7 @@ export const View = z.object({ "prompts", "tools", "scorers", + "classifiers", "logs", "monitor", "for_review", diff --git a/js/util/span_types.ts b/js/util/span_types.ts index 0216c4e5b..214b3a741 100644 --- a/js/util/span_types.ts +++ b/js/util/span_types.ts @@ -8,6 +8,7 @@ export const spanTypeAttributeValues = [ "automation", "facet", "preprocessor", + "classifier", ] as const; // DEPRECATED: Use `spanTypeAttributeValues` instead @@ -21,6 +22,7 @@ export enum SpanTypeAttribute { AUTOMATION = "automation", FACET = "facet", PREPROCESSOR = "preprocessor", + CLASSIFIER = "classifier", } export type SpanType = (typeof spanTypeAttributeValues)[number]; diff --git a/py/src/braintrust/_generated_types.py b/py/src/braintrust/_generated_types.py index 85874b093..c503dac88 100644 --- a/py/src/braintrust/_generated_types.py +++ b/py/src/braintrust/_generated_types.py @@ -144,6 +144,11 @@ class AsyncScoringControlAsyncScoringControl5(TypedDict): triggered_xact_id: str +class AsyncScoringControlAsyncScoringControl6(TypedDict): + kind: Literal['mark_attempt_failed'] + function_ids: Sequence[Any] + + class AsyncScoringStateAsyncScoringState(TypedDict): status: Literal['enabled'] token: str @@ -484,6 +489,10 @@ class Dataset(TypedDict): """ User-controlled metadata about the dataset """ + url_slug: str + """ + URL slug for the dataset. used to construct dataset URLs + """ class DatasetEventMetadata(TypedDict): @@ -532,6 +541,43 @@ class EnvVar(TypedDict): """ +class EvalStatusPageConfig(TypedDict): + score_columns: NotRequired[Sequence[str] | None] + """ + The score columns to display on the page + """ + metric_columns: NotRequired[Sequence[str] | None] + """ + The metric columns to display on the page + """ + grouping_field: NotRequired[str | None] + """ + The metadata field to use for grouping experiments (model) + """ + filter: NotRequired[str | None] + """ + BTQL filter to apply to experiment data + """ + sort_by: NotRequired[str | None] + """ + Field to sort results by (format: 'score:' or 'metric:') + """ + sort_order: NotRequired[Literal['asc', 'desc'] | None] + """ + Sort order (ascending or descending) + """ + api_key: NotRequired[str | None] + """ + The API key used for fetching experiment data + """ + + +EvalStatusPageTheme: TypeAlias = Literal['light', 'dark'] +""" +The theme for the page +""" + + class ExperimentEventMetadata(TypedDict): model: NotRequired[str | None] """ @@ -749,20 +795,24 @@ class FunctionIdFunctionId4(TypedDict): FunctionObjectType: TypeAlias = Literal[ - 'prompt', 'tool', 'scorer', 'task', 'custom_view', 'preprocessor', 'facet' + 'prompt', 'tool', 'scorer', 'task', 'workflow', 'custom_view', 'preprocessor', 'facet', 'classifier' ] -FunctionOutputType: TypeAlias = Literal['completion', 'score', 'any'] +FunctionOutputType: TypeAlias = Literal['completion', 'score', 'facet', 'classification', 'any'] -FunctionTypeEnum: TypeAlias = Literal['llm', 'scorer', 'task', 'tool', 'custom_view', 'preprocessor', 'facet'] +FunctionTypeEnum: TypeAlias = Literal[ + 'llm', 'scorer', 'task', 'tool', 'custom_view', 'preprocessor', 'facet', 'classifier' +] """ The type of global function. Defaults to 'scorer'. """ -FunctionTypeEnumNullish: TypeAlias = Literal['llm', 'scorer', 'task', 'tool', 'custom_view', 'preprocessor', 'facet'] +FunctionTypeEnumNullish: TypeAlias = Literal[ + 'llm', 'scorer', 'task', 'tool', 'custom_view', 'preprocessor', 'facet', 'classifier' +] class GitMetadataSettings(TypedDict): @@ -1674,7 +1724,18 @@ class PromptDataNullishOrigin(TypedDict): class PromptParserNullish(TypedDict): type: Literal['llm_classifier'] use_cot: bool - choice_scores: Mapping[str, float] + choice_scores: NotRequired[Mapping[str, float] | None] + """ + Map of choices to scores (0-1). Used by scorers. + """ + choice: NotRequired[Sequence[str] | None] + """ + List of valid choices without score mapping. Used by classifiers that deposit output to tags. + """ + allow_no_match: NotRequired[bool | None] + """ + If true, adds a 'No match' option. When selected, no tag is deposited. + """ class PromptSessionEvent(TypedDict): @@ -2104,7 +2165,7 @@ class SpanScope(TypedDict): SpanType: TypeAlias = Literal[ - 'llm', 'score', 'function', 'eval', 'task', 'tool', 'automation', 'facet', 'preprocessor' + 'llm', 'score', 'function', 'eval', 'task', 'tool', 'automation', 'facet', 'preprocessor', 'classifier' ] """ Type of the span, for display purposes only @@ -2384,6 +2445,7 @@ class AsyncScoringControlAsyncScoringControl1(TypedDict): | AsyncScoringControlAsyncScoringControl3 | AsyncScoringControlAsyncScoringControl4 | AsyncScoringControlAsyncScoringControl5 + | AsyncScoringControlAsyncScoringControl6 ) @@ -2530,6 +2592,43 @@ class DatasetEvent(TypedDict): """ +class EvalStatusPage(TypedDict): + id: str + """ + Unique identifier for the eval status page + """ + project_id: str + """ + Unique identifier for the project that the eval status page belongs under + """ + user_id: NotRequired[str | None] + """ + Identifies the user who created the eval status page + """ + created: NotRequired[str | None] + """ + Date of eval status page creation + """ + deleted_at: NotRequired[str | None] + """ + Date of eval status page deletion, or null if the eval status page is still active + """ + name: str + """ + Name of the eval status page + """ + description: NotRequired[str | None] + """ + Textual description of the eval status page + """ + logo_url: NotRequired[str | None] + """ + URL of the logo to display on the page + """ + theme: EvalStatusPageTheme + config: EvalStatusPageConfig + + class Experiment(TypedDict): id: str """ @@ -3228,6 +3327,7 @@ class View(TypedDict): 'prompts', 'tools', 'scorers', + 'classifiers', 'logs', 'monitor', 'for_review', diff --git a/py/src/braintrust/generated_types.py b/py/src/braintrust/generated_types.py index 65be4dade..85faa9a3e 100644 --- a/py/src/braintrust/generated_types.py +++ b/py/src/braintrust/generated_types.py @@ -1,4 +1,4 @@ -"""Auto-generated file (internal git SHA 87ac73f4945a47eff2d4e42775ba4dbc58854c73) -- do not modify""" +"""Auto-generated file (internal git SHA 21146f64bf5ad1eadd3a99d186274728e25e5399) -- do not modify""" from ._generated_types import ( Acl, @@ -29,6 +29,9 @@ Dataset, DatasetEvent, EnvVar, + EvalStatusPage, + EvalStatusPageConfig, + EvalStatusPageTheme, Experiment, ExperimentEvent, ExtendedSavedFunctionId, @@ -136,6 +139,9 @@ "Dataset", "DatasetEvent", "EnvVar", + "EvalStatusPage", + "EvalStatusPageConfig", + "EvalStatusPageTheme", "Experiment", "ExperimentEvent", "ExtendedSavedFunctionId",