From dc73564206cd554fe702bc28bf1020f252700acf Mon Sep 17 00:00:00 2001 From: Suresh Chaudhary <83772622+impoiler@users.noreply.github.com> Date: Fri, 8 May 2026 16:59:55 +0530 Subject: [PATCH 01/81] feat: add granular RBAC checks for API keys, inference, metrics, and filter inaccessible sidebar items (#3295) This PR improves RBAC granularity in the sidebar by introducing dedicated resource types for `APIKeys`, `Inference`, and `Metrics`, and fixes sidebar visibility logic so that items and groups are hidden when the user lacks access rather than relying on broader, less specific permissions. - Added three new `RbacResource` enum values: `APIKeys`, `Inference`, and `Metrics` to the fallback RBAC context. - The API Keys sidebar item now gates access via the new `hasAPIKeyAccess` (`RbacResource.APIKeys`) check instead of the generic `hasSettingsAccess`. - The MCP Logs sidebar item now correctly gates access via `hasMCPGatewayAccess` instead of the unrelated `hasLogsAccess`. - Introduced an `accessibleItems` memoized computation that filters out sidebar items and entire groups whose sub-items are all inaccessible, ensuring users never see empty navigation sections. Previously, access filtering only happened during search. - Removed unused imports (`PanelLeft`, `PanelRight`, `cn`). - [ ] Bug fix - [x] Feature - [x] Refactor - [ ] Documentation - [ ] Chore/CI - [ ] Core (Go) - [ ] Transports (HTTP) - [ ] Providers/Integrations - [ ] Plugins - [x] UI (React) - [ ] Docs 1. Log in as a user with restricted RBAC permissions that exclude `APIKeys` and/or `Settings`. 2. Verify the API Keys entry under the Config section is hidden for users without `APIKeys` view permission. 3. Verify the MCP Logs entry is hidden for users without `MCPGateway` view permission. 4. Verify that sidebar groups with no accessible sub-items are hidden entirely rather than showing an empty group. 5. Verify that users with full access see no change in sidebar behavior. ```sh cd ui pnpm i || npm i pnpm build || npm run build ``` _Add before/after screenshots showing sidebar items hidden for restricted users._ - [ ] Yes - [x] No _Link related issues here._ Access control checks for API Keys management are now scoped to a dedicated `APIKeys` RBAC resource rather than the broader `Settings` resource, reducing the risk of unintended access to key management for users who have settings visibility but should not manage API keys. - [ ] I read `docs/contributing/README.md` and followed the guidelines - [ ] I added/updated tests where appropriate - [ ] I updated documentation where needed - [ ] I verified builds succeed (Go and UI) - [ ] I verified the CI pipeline passes locally if applicable --- ui/components/sidebar.tsx | 342 +++++++++++++++++++++++++++----------- 1 file changed, 248 insertions(+), 94 deletions(-) diff --git a/ui/components/sidebar.tsx b/ui/components/sidebar.tsx index c6042e044f..ba2132a2a7 100644 --- a/ui/components/sidebar.tsx +++ b/ui/components/sidebar.tsx @@ -38,10 +38,14 @@ import { UserRoundCheck, Users, Wallet, - WalletCards + WalletCards, } from "lucide-react"; -import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; +import { + Popover, + PopoverContent, + PopoverTrigger, +} from "@/components/ui/popover"; import { Separator } from "@/components/ui/separator"; import { Sidebar, @@ -68,7 +72,11 @@ import { import { RbacOperation, RbacResource, useRbac } from "@enterprise/lib"; import type { UserInfo } from "@enterprise/lib/store/utils/tokenManager"; import { getUserInfo } from "@enterprise/lib/store/utils/tokenManager"; -import { BooksIcon, DiscordLogoIcon, GithubLogoIcon } from "@phosphor-icons/react"; +import { + BooksIcon, + DiscordLogoIcon, + GithubLogoIcon, +} from "@phosphor-icons/react"; import { Link, useLocation, useNavigate } from "@tanstack/react-router"; import { ChevronRight } from "lucide-react"; import { useTheme } from "next-themes"; @@ -134,7 +142,8 @@ const productionSetupHelpCard = { title: "Need help with production setup?", description: ( <> - We offer help with production setup including custom integrations and dedicated support. + We offer help with production setup including custom integrations and + dedicated support.

Book a demo with our team{" "} @@ -221,7 +230,8 @@ const SidebarItemView = ({ if (flyoutCloseTimer.current) clearTimeout(flyoutCloseTimer.current); }; }, []); - const hasSubItems = "subItems" in item && item.subItems && item.subItems.length > 0; + const hasSubItems = + "subItems" in item && item.subItems && item.subItems.length > 0; const isRouteMatch = (url: string) => { if (url === "/workspace/custom-pricing") return pathname === url; return pathname.startsWith(url); @@ -250,14 +260,15 @@ const SidebarItemView = ({ const isHighlighted = !hasSubItems && highlightedUrl === item.url; - const buttonClassName = `relative h-7.5 cursor-pointer rounded-sm border px-3 transition-all duration-200 ${isHighlighted - ? "bg-sidebar-accent text-accent-foreground border-primary/20" - : isActive || isAnySubItemActive - ? "bg-sidebar-accent text-primary border-primary/20" - : item.hasAccess - ? "hover:bg-sidebar-accent hover:text-accent-foreground border-transparent text-slate-500 dark:text-zinc-400" - : "hover:bg-destructive/5 hover:text-muted-foreground text-muted-foreground cursor-not-allowed border-transparent" - } `; + const buttonClassName = `relative h-7.5 cursor-pointer rounded-sm border px-3 transition-all duration-200 ${ + isHighlighted + ? "bg-sidebar-accent text-accent-foreground border-primary/20" + : isActive || isAnySubItemActive + ? "bg-sidebar-accent text-primary border-primary/20" + : item.hasAccess + ? "hover:bg-sidebar-accent hover:text-accent-foreground border-transparent text-slate-500 dark:text-zinc-400" + : "hover:bg-destructive/5 hover:text-muted-foreground text-muted-foreground cursor-not-allowed border-transparent" + } `; const innerContent = (
@@ -314,19 +325,31 @@ const SidebarItemView = ({ ); } else if (!item.hasAccess) { menuButton = ( - + {innerContent} ); } else if (isExternal) { menuButton = ( - + e.stopPropagation() : undefined} + onClick={ + isSidebarCollapsed + ? (e: React.MouseEvent) => e.stopPropagation() + : undefined + } > {innerContent} @@ -334,12 +357,20 @@ const SidebarItemView = ({ ); } else { menuButton = ( - + e.stopPropagation() : undefined} + onClick={ + isSidebarCollapsed + ? (e: React.MouseEvent) => e.stopPropagation() + : undefined + } > {innerContent} @@ -351,8 +382,14 @@ const SidebarItemView = ({ {isSidebarCollapsed && hasSubItems ? ( - -
{menuButton}
+ +
+ {menuButton} +
{subItem.tag && ( - + {subItem.tag} )} @@ -428,7 +468,10 @@ const SidebarItemView = ({ {item.subItems?.map((subItem: SidebarItem) => { const baseHref = getSidebarItemHref(subItem); const subItemHref = (() => { - if (TIME_FILTER_PAGES.has(subItem.url) && TIME_FILTER_PAGES.has(pathname)) { + if ( + TIME_FILTER_PAGES.has(subItem.url) && + TIME_FILTER_PAGES.has(pathname) + ) { const currentParams = new URLSearchParams(search); const startTime = currentParams.get("start_time"); const endTime = currentParams.get("end_time"); @@ -452,14 +495,15 @@ const SidebarItemView = ({ ? subItemHref.startsWith(highlightedUrl) : false; const SubItemIcon = subItem.icon; - const subItemClassName = `h-7 cursor-pointer rounded-sm px-2 transition-all duration-200 ${isSubItemHighlighted - ? "bg-sidebar-accent text-accent-foreground" - : isSubItemActive - ? "bg-sidebar-accent text-primary font-medium" - : subItem.hasAccess === false - ? "hover:bg-destructive/5 hover:text-muted-foreground text-muted-foreground cursor-not-allowed border-transparent" - : "hover:bg-sidebar-accent hover:text-accent-foreground text-slate-500 dark:text-zinc-400" - }`; + const subItemClassName = `h-7 cursor-pointer rounded-sm px-2 transition-all duration-200 ${ + isSubItemHighlighted + ? "bg-sidebar-accent text-accent-foreground" + : isSubItemActive + ? "bg-sidebar-accent text-primary font-medium" + : subItem.hasAccess === false + ? "hover:bg-destructive/5 hover:text-muted-foreground text-muted-foreground cursor-not-allowed border-transparent" + : "hover:bg-sidebar-accent hover:text-accent-foreground text-slate-500 dark:text-zinc-400" + }`; const subInner = (
{SubItemIcon && ( @@ -467,11 +511,16 @@ const SidebarItemView = ({ className={`h-3.5 w-3.5 ${isSubItemActive ? "text-primary" : "text-muted-foreground"}`} /> )} - + {subItem.title} {subItem.tag && ( - + {subItem.tag} )} @@ -480,12 +529,19 @@ const SidebarItemView = ({ return ( {subItem.hasAccess === false ? ( - + {subInner} ) : ( - + {subInner} @@ -544,7 +600,10 @@ export default function AppSidebar() { const tsNavigate = useNavigate(); // Wrapper that accepts arbitrary string URLs (TanStack Router's `to` is // strictly typed, but our sidebar items come from a runtime config). - const navigate = useCallback((url: string) => tsNavigate({ to: url as string }), [tsNavigate]); + const navigate = useCallback( + (url: string) => tsNavigate({ to: url as string }), + [tsNavigate], + ); const [mounted, setMounted] = useState(false); const [expandedItems, setExpandedItems] = useState>(new Set()); const [areCardsEmpty, setAreCardsEmpty] = useState(false); @@ -553,38 +612,87 @@ export default function AppSidebar() { const [focusedIndex, setFocusedIndex] = useState(-1); const searchInputRef = useRef(null); const [cookies, setCookie] = useCookies([PRODUCTION_SETUP_DISMISSED_COOKIE]); - const isProductionSetupDismissed = !!cookies[PRODUCTION_SETUP_DISMISSED_COOKIE]; + const isProductionSetupDismissed = + !!cookies[PRODUCTION_SETUP_DISMISSED_COOKIE]; const { data: latestRelease } = useGetLatestReleaseQuery(undefined, { skip: !mounted, // Only fetch after component is mounted }); const hasLogsAccess = useRbac(RbacResource.Logs, RbacOperation.View); - const hasObservabilityAccess = useRbac(RbacResource.Observability, RbacOperation.View); - const hasModelProvidersAccess = useRbac(RbacResource.ModelProvider, RbacOperation.View); - const hasMCPGatewayAccess = useRbac(RbacResource.MCPGateway, RbacOperation.View); - const hasMCPToolGroupsAccess = useRbac(RbacResource.MCPToolGroups, RbacOperation.View); + const hasObservabilityAccess = useRbac( + RbacResource.Observability, + RbacOperation.View, + ); + const hasModelProvidersAccess = useRbac( + RbacResource.ModelProvider, + RbacOperation.View, + ); + const hasMCPGatewayAccess = useRbac( + RbacResource.MCPGateway, + RbacOperation.View, + ); + const hasMCPToolGroupsAccess = useRbac( + RbacResource.MCPToolGroups, + RbacOperation.View, + ); const hasMCPLogsAccess = useRbac(RbacResource.MCPLogs, RbacOperation.View); const hasPluginsAccess = useRbac(RbacResource.Plugins, RbacOperation.View); const hasUsersAccess = useRbac(RbacResource.Users, RbacOperation.View); - const hasUserProvisioningAccess = useRbac(RbacResource.UserProvisioning, RbacOperation.View); - const hasAuditLogsAccess = useRbac(RbacResource.AuditLogs, RbacOperation.View); - const hasCustomersAccess = useRbac(RbacResource.Customers, RbacOperation.View); + const hasUserProvisioningAccess = useRbac( + RbacResource.UserProvisioning, + RbacOperation.View, + ); + const hasAuditLogsAccess = useRbac( + RbacResource.AuditLogs, + RbacOperation.View, + ); + const hasCustomersAccess = useRbac( + RbacResource.Customers, + RbacOperation.View, + ); const hasTeamsAccess = useRbac(RbacResource.Teams, RbacOperation.View); - const hasBusinessUnitsAccess = useRbac(RbacResource.UserProvisioning, RbacOperation.View); + const hasBusinessUnitsAccess = useRbac( + RbacResource.UserProvisioning, + RbacOperation.View, + ); const hasRbacAccess = useRbac(RbacResource.RBAC, RbacOperation.View); - const hasVirtualKeysAccess = useRbac(RbacResource.VirtualKeys, RbacOperation.View); - const hasGovernanceLegacyAccess = useRbac(RbacResource.Governance, RbacOperation.View); - const hasRoutingRulesAccess = useRbac(RbacResource.RoutingRules, RbacOperation.View); + const hasVirtualKeysAccess = useRbac( + RbacResource.VirtualKeys, + RbacOperation.View, + ); + const hasGovernanceLegacyAccess = useRbac( + RbacResource.Governance, + RbacOperation.View, + ); + const hasRoutingRulesAccess = useRbac( + RbacResource.RoutingRules, + RbacOperation.View, + ); const hasGuardrailsProvidersAccess = useRbac( RbacResource.GuardrailsProviders, RbacOperation.View, ); - const hasGuardrailsConfigAccess = useRbac(RbacResource.GuardrailsConfig, RbacOperation.View); - const hasClusterConfigAccess = useRbac(RbacResource.Cluster, RbacOperation.View); - const isAdaptiveRoutingAllowed = useRbac(RbacResource.AdaptiveRouter, RbacOperation.View); + const hasGuardrailsConfigAccess = useRbac( + RbacResource.GuardrailsConfig, + RbacOperation.View, + ); + const hasClusterConfigAccess = useRbac( + RbacResource.Cluster, + RbacOperation.View, + ); + const isAdaptiveRoutingAllowed = useRbac( + RbacResource.AdaptiveRouter, + RbacOperation.View, + ); const hasSettingsAccess = useRbac(RbacResource.Settings, RbacOperation.View); const hasAPIKeyAccess = useRbac(RbacResource.APIKeys, RbacOperation.View); - const hasPromptRepositoryAccess = useRbac(RbacResource.PromptRepository, RbacOperation.View); - const hasAccessProfilesAccess = useRbac(RbacResource.AccessProfiles, RbacOperation.View); + const hasPromptRepositoryAccess = useRbac( + RbacResource.PromptRepository, + RbacOperation.View, + ); + const hasAccessProfilesAccess = useRbac( + RbacResource.AccessProfiles, + RbacOperation.View, + ); const hasAnyGovernanceAccess = hasVirtualKeysAccess || hasTeamsAccess || @@ -842,14 +950,14 @@ export default function AppSidebar() { }, ...(isDbConnected ? [ - { - title: "Prompt Repository", - url: "/workspace/prompt-repo", - icon: FolderGit, - description: "Prompt repository", - hasAccess: hasPromptRepositoryAccess, - }, - ] + { + title: "Prompt Repository", + url: "/workspace/prompt-repo", + icon: FolderGit, + description: "Prompt repository", + hasAccess: hasPromptRepositoryAccess, + }, + ] : []), { title: "Evals", @@ -864,7 +972,8 @@ export default function AppSidebar() { url: "/workspace/config", icon: Settings2Icon, description: "Bifrost settings", - hasAccess: hasSettingsAccess || hasAuditLogsAccess || hasUserProvisioningAccess, + hasAccess: + hasSettingsAccess || hasAuditLogsAccess || hasUserProvisioningAccess, subItems: [ { title: "Client Settings", @@ -896,14 +1005,14 @@ export default function AppSidebar() { }, ...(IS_ENTERPRISE ? [ - { - title: "Proxy", - url: "/workspace/config/proxy", - icon: Globe, - description: "Proxy configuration", - hasAccess: hasSettingsAccess, - }, - ] + { + title: "Proxy", + url: "/workspace/config/proxy", + icon: Globe, + description: "Proxy configuration", + hasAccess: hasSettingsAccess, + }, + ] : []), { title: "API Keys", @@ -923,13 +1032,13 @@ export default function AppSidebar() { }, ], [ - hasLogsAccess, - hasObservabilityAccess, - hasModelProvidersAccess, - hasMCPGatewayAccess, - hasMCPToolGroupsAccess, - hasMCPLogsAccess, - hasPluginsAccess, + hasLogsAccess, + hasObservabilityAccess, + hasModelProvidersAccess, + hasMCPGatewayAccess, + hasMCPToolGroupsAccess, + hasMCPLogsAccess, + hasPluginsAccess, hasUsersAccess, hasUserProvisioningAccess, hasAuditLogsAccess, @@ -957,7 +1066,9 @@ export default function AppSidebar() { .map((item) => { const hadSubItems = !!item.subItems?.length; if (hadSubItems) { - const visibleSubItems = item.subItems!.filter((sub) => sub.hasAccess !== false); + const visibleSubItems = item.subItems!.filter( + (sub) => sub.hasAccess !== false, + ); if (visibleSubItems.length === 0) return null; return { ...item, subItems: visibleSubItems, hasAccess: true }; } @@ -1042,7 +1153,9 @@ export default function AppSidebar() { if (!item.subItems?.length) return; const parentMatches = item.title.toLowerCase().includes(query); if (parentMatches) return; - const hasMatchingChild = item.subItems.some((sub) => sub.title.toLowerCase().includes(query)); + const hasMatchingChild = item.subItems.some((sub) => + sub.title.toLowerCase().includes(query), + ); if (hasMatchingChild) { toExpand.add(item.title); } @@ -1078,7 +1191,8 @@ export default function AppSidebar() { }[] = []; for (const item of filteredItems) { if (item.isExternal) { - if (item.hasAccess) result.push({ title: item.title, url: item.url, isExternal: true }); + if (item.hasAccess) + result.push({ title: item.title, url: item.url, isExternal: true }); continue; } const hasSubItems = item.subItems && item.subItems.length > 0; @@ -1108,7 +1222,9 @@ export default function AppSidebar() { (e: React.KeyboardEvent) => { if (e.key === "ArrowDown") { e.preventDefault(); - setFocusedIndex((prev) => Math.min(prev + 1, navigableItems.length - 1)); + setFocusedIndex((prev) => + Math.min(prev + 1, navigableItems.length - 1), + ); } else if (e.key === "ArrowUp") { e.preventDefault(); setFocusedIndex((prev) => Math.max(prev - 1, 0)); @@ -1163,7 +1279,10 @@ export default function AppSidebar() { // Avoid double-highlighting with "/workspace/custom-pricing/overrides" if (url === "/workspace/custom-pricing") return pathname === url; if (url !== "/" && pathname.startsWith(url)) { - if (url === "/workspace/config" && configExceptions.some((e) => pathname.startsWith(e))) { + if ( + url === "/workspace/config" && + configExceptions.some((e) => pathname.startsWith(e)) + ) { return false; } return true; @@ -1173,9 +1292,13 @@ export default function AppSidebar() { // Always render the light theme version for SSR to avoid hydration mismatch const logoSrc = - mounted && resolvedTheme === "dark" ? "/bifrost-logo-dark.webp" : "/bifrost-logo.webp"; + mounted && resolvedTheme === "dark" + ? "/bifrost-logo-dark.webp" + : "/bifrost-logo.webp"; const iconSrc = - mounted && resolvedTheme === "dark" ? "/bifrost-icon-dark.webp" : "/bifrost-icon.webp"; + mounted && resolvedTheme === "dark" + ? "/bifrost-icon-dark.webp" + : "/bifrost-icon.webp"; const { isConnected: isWebSocketConnected } = useWebSocket(); @@ -1209,7 +1332,11 @@ export default function AppSidebar() { title: `${latestRelease.name} is now available.`, description: ( - + {filteredItems.map((item) => { const isActive = isActiveRoute(item.url); const highlightedUrl = - focusedIndex >= 0 ? navigableItems[focusedIndex]?.url : undefined; + focusedIndex >= 0 + ? navigableItems[focusedIndex]?.url + : undefined; return ( ))} - {IS_ENTERPRISE && userInfo && (userInfo.name || userInfo.email) ? ( - + {IS_ENTERPRISE && + userInfo && + (userInfo.name || userInfo.email) ? ( +
+
+ +
+ {displayError && ( diff --git a/ui/lib/store/apis/mcpLogsApi.ts b/ui/lib/store/apis/mcpLogsApi.ts index 6f2e52a1cb..b92cc7cd4d 100644 --- a/ui/lib/store/apis/mcpLogsApi.ts +++ b/ui/lib/store/apis/mcpLogsApi.ts @@ -208,6 +208,7 @@ export const { useLazyGetMCPLogByIdQuery, useLazyGetMCPLogsStatsQuery, useLazyGetMCPAvailableFilterDataQuery, + useGetMCPHistogramQuery, useLazyGetMCPHistogramQuery, useLazyGetMCPCostHistogramQuery, useLazyGetMCPTopToolsQuery, From 5b548320661c4be090540469af0f545d27e956b7 Mon Sep 17 00:00:00 2001 From: Pratham Mishra <99235987+Pratham-Mishra04@users.noreply.github.com> Date: Wed, 13 May 2026 01:01:11 +0530 Subject: [PATCH 09/81] refactor: semantic cache plugin (#3210) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary This PR refactors the semantic cache plugin to simplify its internal state management, improves cache lookup correctness, and adds a new `cache_hit_types` filter to the logs API and UI. The direct cache lookup path is now a single deterministic point-fetch by a UUIDv5 `directCacheID` (replacing the previous dual-path of chunk lookup + legacy metadata scan), and several context keys are consolidated. The UI gains a "Local Caching" filter sidebar section and cache hit type badges in the log detail view. ## Changes - **Semantic cache plugin refactor:** - Replaced the dual direct-search path (`performDirectChunkLookup` + `performLegacyDirectSearch`) with a single `performDirectSearch` that does an O(1) `GetChunk` by deterministic `directCacheID` (UUIDv5 derived from provider, model, cacheKey, requestHash, paramsHash). - `generateDirectCacheID` now returns an error instead of silently falling back to a string concatenation, making failures explicit. - `request_hash` is no longer stored as a top-level metadata field; it is encoded into the `directCacheID` instead. - Reduced context keys from ~10 to 4 (`directCacheIDKey`, `paramsHashKey`, `embeddingsKey`, `embeddingsInputTokensKey`), removing stale keys like `requestIDKey`, `requestHashKey`, `isCacheHitKey`, and `cacheHitTypeKey`. - `shouldSkipCaching` is extracted into its own method; cache-hit detection now reads `CacheDebug.CacheHit` from the response rather than a context flag. - `buildUnifiedMetadata` no longer accepts `requestHash` as a parameter. - `addSingleResponse` renamed to `addNonStreamingResponse`. - `StreamAccumulator` fields `HasError`, `FinalTimestamp`, and `FinishReason` on `StreamChunk` are removed; error streams are handled by early return in `PostLLMHook`. - Streaming replay goroutine now guards every send with `ctx.Done()` to prevent goroutine leaks on dropped consumers. - A background `runStreamCleanupLoop` goroutine (started by `Init`, stopped by `Cleanup` via `stopCh`) replaces the one-shot cleanup call, periodically reaping stale stream accumulators. - `buildResponseFromResult` now accepts `threshold`, `similarity`, and `inputTokens` as pointers, and `attachCacheDebug` is extracted as a shared helper for both streaming and non-streaming paths. - `isExpiredEntry` is extracted as a standalone function. - `chunkSortKey` replaces the large inline sort comparator in `processAccumulatedStream`. - Tools, stop sequences, modalities, include lists, and other order-insensitive set fields are now hashed with `hashSortedSet` / `sortedStringSet` to prevent MCP's randomized map iteration from perturbing the request hash. - `extractAttachmentsForCaching` is extracted so attachment URLs are included in the cache key metadata rather than the embedding text. - `extractTextForEmbedding` no longer returns a `paramsHash`; callers compute it once via `buildRequestMetadataForCaching` + `hashMap`. - `generateEmbedding` moved from `utils.go` to `search.go`. - `generateRequestHash` now accepts prebuilt metadata to avoid recomputing it. - `removeField` no longer mutates the input slice's backing array. - Added `PronunciationDictionaryLocators`, `TimestampGranularities`, `Include`, `AdditionalFormats`, and `InputImages` to their respective parameter metadata extractors. - Public context key names changed from `semantic_cache_*` to `semantic_cache-*` (underscore → hyphen separator after the plugin prefix). - `SelectFields` no longer includes `request_hash`. - `VectorStoreProperties` no longer includes a `request_hash` entry. - `CacheByModel` and `CacheByProvider` default-value log messages added. - **Log filtering — `cache_hit_types`:** - Added `CacheHitTypes []string` to `SearchFilters` in `framework/logstore/tables.go`. - `applyFilters` in `rdb.go` applies a JSON path filter on `cache_debug` for both SQLite (`json_extract`) and PostgreSQL (`substring` regex) dialects, restricted to the allowlist `["direct", "semantic"]`. - `canUseMatViewFilters` excludes queries with `CacheHitTypes` set from the materialized-view fast path. - HTTP handlers (`getLogs`, `getLogsStats`, `parseHistogramFilters`) parse a `cache_hit_types` comma-separated query parameter. - **UI:** - Added a "Local Caching" filter section to `LogsFilterSidebar` with checkboxes for "Direct cache" and "Semantic cache". - `cache_hit_types` is added to URL state, filter state, and the `buildFilterParams` API helper. - Log detail view shows "Direct Cache" (indigo) and "Semantic Cache" (rose) badges based on `cache_debug.hit_type`. - Plugins form now filters the provider dropdown to embedding-capable providers only (`EmbeddingSupportedProviders` for built-ins; `custom_provider_config.allowed_requests.embedding` for custom providers), shows an error message when no embedding provider is configured, and disables the toggle accordingly. - Embedding model input replaced with `ModelMultiselect` (single-select mode) scoped to the selected provider. - Provider dropdown clears the embedding model when the provider changes. - Provider icons rendered in the provider dropdown. - `EmbeddingSupportedProviders` constant added to `ui/lib/constants/logs.ts`. - **Misc:** - HTTP request logging in `CorsMiddleware` and an auth debug log are commented out. - `transports/bifrost-http/v1.5.x` added to `.gitignore`. - Minor formatting fixes in `core/schemas/bifrost.go` and `framework/modelcatalog/sync.go`. - Missing newline at end of `sync.go` added. ## Type of change - [ ] Bug fix - [x] Feature - [x] Refactor - [ ] Documentation - [ ] Chore/CI ## Affected areas - [x] Core (Go) - [x] Transports (HTTP) - [ ] Providers/Integrations - [x] Plugins - [x] UI (React) - [ ] Docs ## How to test ```sh # Core/Transports go test ./plugins/semanticcache/... go test ./framework/logstore/... go test ./transports/bifrost-http/... # UI cd ui pnpm i pnpm build ``` - Configure the semantic cache plugin with a direct and/or semantic cache type and verify that cache hits are recorded with the correct `hit_type` in `cache_debug`. - Query `/logs?cache_hit_types=direct` and `/logs?cache_hit_types=semantic` and confirm only matching entries are returned. - In the UI, open the logs filter sidebar and verify the "Local Caching" section appears with "Direct cache" and "Semantic cache" checkboxes that correctly filter the log list. - Open a log detail for a cache hit and confirm the appropriate badge ("Direct Cache" or "Semantic Cache") is displayed. - In the plugins form, verify that only embedding-capable providers appear in the provider dropdown and that the embedding model field uses the model multiselect. ## Breaking changes - [x] Yes The public semantic cache context key names have changed from `semantic_cache_*` to `semantic_cache-*`. Any caller setting `CacheKey`, `CacheTTLKey`, `CacheThresholdKey`, `CacheTypeKey`, or `CacheNoStoreKey` via the old string values will no longer be recognized by the plugin. Update all call sites to use the exported constants from the plugin package rather than raw string literals. `request_hash` is no longer stored as a top-level metadata field in the vector store. Existing cache entries written by prior versions will not be found by the new direct-search path (they will be treated as misses and re-populated). `ClearCacheForRequestID` is documented as currently broken for entries written by the new direct-search path; callers should not rely on it until the TODO is resolved. ## Related issues N/A ## Security considerations The `CacheHitTypes` filter allowlists values to `"direct"` and `"semantic"` before interpolating them into SQL, preventing arbitrary input from reaching the JSON path expression. ## Checklist - [ ] I read `docs/contributing/README.md` and followed the guidelines - [x] I added/updated tests where appropriate - [ ] I updated documentation where needed - [x] I verified builds succeed (Go and UI) - [ ] I verified the CI pipeline passes locally if applicable --- .claude/skills/docs-writer/SKILL.md | 2 +- .gitignore | 1 + core/schemas/bifrost.go | 4 + core/schemas/context.go | 35 + core/schemas/context_test.go | 34 + docs/features/semantic-caching.mdx | 28 +- docs/migration-guides/v1.5.0.mdx | 65 + docs/openapi/openapi.json | 12 +- docs/openapi/openapi.yaml | 4 +- docs/openapi/paths/management/cache.yaml | 15 +- framework/logstore/matviews.go | 3 +- framework/logstore/rdb.go | 25 +- framework/logstore/tables.go | 1 + framework/modelcatalog/sync.go | 4 +- framework/vectorstore/weaviate.go | 23 + plugins/logging/main.go | 10 +- plugins/logging/operations.go | 10 - plugins/semanticcache/main.go | 940 +++++------ plugins/semanticcache/main_test.go | 39 + plugins/semanticcache/plugin_api_test.go | 378 +++++ .../semanticcache/plugin_cache_type_test.go | 313 ++-- .../plugin_conversation_config_test.go | 50 +- plugins/semanticcache/plugin_core_test.go | 95 +- .../semanticcache/plugin_cross_cache_test.go | 78 +- .../plugin_default_cache_key_test.go | 28 +- .../semanticcache/plugin_edge_cases_test.go | 84 +- .../semanticcache/plugin_embedding_test.go | 52 +- .../plugin_image_generation_test.go | 24 +- .../semanticcache/plugin_integration_test.go | 224 ++- .../semanticcache/plugin_nil_content_test.go | 153 +- .../semanticcache/plugin_no_mutation_test.go | 198 +++ plugins/semanticcache/plugin_no_store_test.go | 64 +- .../plugin_normalization_test.go | 31 +- plugins/semanticcache/plugin_paths_test.go | 572 +++++++ .../semanticcache/plugin_responses_test.go | 145 +- .../semanticcache/plugin_streaming_test.go | 23 +- .../semanticcache/plugin_vectorstore_test.go | 51 +- plugins/semanticcache/search.go | 636 ++++--- plugins/semanticcache/state.go | 110 ++ plugins/semanticcache/stream.go | 216 ++- plugins/semanticcache/test_utils.go | 135 +- plugins/semanticcache/utils.go | 1455 ++++++++--------- transports/bifrost-http/handlers/cache.go | 20 +- .../bifrost-http/handlers/cache_test.go | 139 ++ transports/bifrost-http/handlers/logging.go | 9 + .../bifrost-http/handlers/middlewares.go | 44 +- ui/app/workspace/config/views/pluginsForm.tsx | 188 ++- ui/app/workspace/logs/page.tsx | 4 + .../workspace/logs/sheets/logDetailView.tsx | 71 +- ui/components/filters/logsFilterSidebar.tsx | 33 + ui/lib/constants/logs.ts | 19 + ui/lib/store/apis/logsApi.ts | 3 + ui/lib/types/config.ts | 4 +- ui/lib/types/logs.ts | 1 + ui/lib/types/schemas.ts | 4 +- 55 files changed, 4467 insertions(+), 2442 deletions(-) create mode 100644 plugins/semanticcache/main_test.go create mode 100644 plugins/semanticcache/plugin_api_test.go create mode 100644 plugins/semanticcache/plugin_no_mutation_test.go create mode 100644 plugins/semanticcache/plugin_paths_test.go create mode 100644 plugins/semanticcache/state.go create mode 100644 transports/bifrost-http/handlers/cache_test.go diff --git a/.claude/skills/docs-writer/SKILL.md b/.claude/skills/docs-writer/SKILL.md index 02da9b6524..61929a4f67 100644 --- a/.claude/skills/docs-writer/SKILL.md +++ b/.claude/skills/docs-writer/SKILL.md @@ -176,7 +176,7 @@ grep -n 'func.*create\|func.*update\|func.*delete\|func.*get' transports/bifrost | `plugins.go` | `/api/plugins` | CRUD plugins | | `config.go` | `/api/config` | GET/PUT config | | `config.go` | `/api/proxy-config` | GET/PUT proxy config | -| `cache.go` | `/api/cache/clear/{requestId}` | DELETE cache | +| `cache.go` | `/api/cache/clear/{cacheId}` | DELETE cache | | `session.go` | `/api/session/*` | Login/logout/auth check | | `oauth2.go` | `/api/oauth/*` | OAuth callback/status | diff --git a/.gitignore b/.gitignore index a7c2e26109..d3c42cfd4c 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,7 @@ transports/schema/config.schema.json *.db *.db-shm *.db-wal +transports/bifrost-http/v1.5.x # Test reports test-reports diff --git a/core/schemas/bifrost.go b/core/schemas/bifrost.go index 0c2352e53e..8abd0390f0 100644 --- a/core/schemas/bifrost.go +++ b/core/schemas/bifrost.go @@ -1412,6 +1412,10 @@ type BifrostCacheDebug struct { // Semantic cache only (only when cache is hit) Threshold *float64 `json:"threshold,omitempty"` Similarity *float64 `json:"similarity,omitempty"` + + // CacheHitLatency is the time in milliseconds spent serving the cache hit + // (lookup + response build). Only set when CacheHit is true. + CacheHitLatency *int64 `json:"cache_hit_latency,omitempty"` } const ( diff --git a/core/schemas/context.go b/core/schemas/context.go index e7cace6def..14bcd18643 100644 --- a/core/schemas/context.go +++ b/core/schemas/context.go @@ -132,6 +132,41 @@ func (bc *BifrostContext) WithValue(key any, value any) *BifrostContext { return bc } +// Root returns the underlying root BifrostContext. For root contexts this is +// the receiver itself; for plugin-scoped contexts it is the underlying root +// that scoped Value/SetValue calls delegate to. +// +// PLUGIN AUTHORS: capture Root() synchronously inside Pre/PostLLMHook (or +// any other hook) when you need to write to the context from a goroutine +// that outlives the hook. The plugin-scoped *BifrostContext passed into your +// hook is reclaimed by an internal sync.Pool the moment the hook returns — +// any later SetValue/Value call on it lands in detached storage that nobody +// downstream can read (and can leak into a future pool reuse). The root, +// in contrast, lives for the entire request, so a pointer captured here is +// safe to use for the lifetime of the request even after your hook returns. +// +// Example: +// +// func (p *Plugin) PreLLMHook(ctx *schemas.BifrostContext, req ...) (...) { +// rootCtx := ctx.Root() // capture before the scope is released +// go func() { +// // ... long-running work that produces stream chunks ... +// rootCtx.SetValue(schemas.BifrostContextKeyStreamEndIndicator, true) +// }() +// return req, &schemas.LLMPluginShortCircuit{Stream: ch}, nil +// } +func (bc *BifrostContext) Root() *BifrostContext { + // Unwrap the full delegation chain. A scoped context can in principle be + // derived from another scoped context (e.g. nested plugin scopes), and + // stopping at the first valueDelegate would return an intermediate pooled + // scope — which loses the async-safety guarantee as soon as that + // intermediate scope is released. + for bc != nil && bc.valueDelegate != nil { + bc = bc.valueDelegate + } + return bc +} + // BlockRestrictedWrites returns true if restricted writes are blocked. func (bc *BifrostContext) BlockRestrictedWrites() { bc.blockRestrictedWrites.Store(true) diff --git a/core/schemas/context_test.go b/core/schemas/context_test.go index 4d75f50528..da6f368188 100644 --- a/core/schemas/context_test.go +++ b/core/schemas/context_test.go @@ -376,6 +376,40 @@ func TestPluginLog_PoolReuse(t *testing.T) { } } +// TestRoot_UnwrapsChainedValueDelegates verifies Root() walks the entire +// delegate chain. A naive single-step unwrap would return an intermediate +// pooled scope, which loses the async-safety guarantee as soon as that +// intermediate scope is recycled. +func TestRoot_UnwrapsChainedValueDelegates(t *testing.T) { + root := NewBifrostContext(context.Background(), NoDeadline) + + a := "outer" + b := "inner" + outer := root.WithPluginScope(&a) + // Manually build a second scoped context whose delegate is the first + // scoped context — simulates a plugin that derives its own scope from + // an already-scoped ctx. + inner := &BifrostContext{ + parent: outer.parent, + done: outer.done, + pluginScope: &b, + valueDelegate: outer, + } + + got := inner.Root() + if got != root { + t.Fatalf("Root() did not walk the chain to the request root: got %p, want %p", got, root) + } + if got.valueDelegate != nil { + t.Fatalf("Root() returned a context with a non-nil valueDelegate: %+v", got) + } + + // Sanity: Root() on a non-scoped context returns itself. + if root.Root() != root { + t.Fatal("Root() on a non-scoped context should return the receiver") + } +} + // TestNewBifrostContext_DerivedFromReleasedScope_NoPanic locks in the // deterministic half of the scoped-parent-release bug: a derived BifrostContext // must not deref a pool-released scoped ancestor when its accessors are called. diff --git a/docs/features/semantic-caching.mdx b/docs/features/semantic-caching.mdx index f25747c720..5413649ea2 100644 --- a/docs/features/semantic-caching.mdx +++ b/docs/features/semantic-caching.mdx @@ -169,7 +169,9 @@ bifrostConfig := schemas.BifrostConfig{ **Cache Settings**: - **TTL (seconds)**: How long cached responses are kept (default: 300 s). - **Similarity Threshold**: Cosine similarity cutoff for a cache hit (0–1, default: 0.8). -- **Dimension**: Vector dimension matching your embedding model (e.g. 1536 for `text-embedding-3-small`). +- **Dimension**: Vector size produced by the embedding model — must match the model exactly. Common values: `1536` for OpenAI `text-embedding-3-small`, `3072` for `text-embedding-3-large`, `768` for many Cohere/Voyage models. Use `1` only in direct-only mode (no provider). + +> **Heads up**: a vector store namespace can only hold vectors of *one* dimension. Whenever you change the embedding **provider**, **model**, or **dimension**, make sure the new dimension still matches what the model produces — otherwise writes to the existing namespace will fail and reads will silently miss. The namespace is **not** recreated automatically; either point `vector_store_namespace` at a fresh name or drop the existing class/index in your vector store before saving. **Conversation Settings**: - **Conversation History Threshold**: Skip caching when the conversation has more than this many messages (default: 3). @@ -612,6 +614,7 @@ Example HTTP Response: "extra_fields": { "cache_debug": { "cache_hit": false, + "cache_id": "550e8500-e29b-41d4-a725-446655440001", "provider_used": "openai", "model_used": "gpt-4o-mini", "input_tokens": 20 @@ -620,22 +623,21 @@ Example HTTP Response: } ``` - -These variables allow you to detect cached responses and get the cache entry ID needed for clearing specific entries. +`cache_debug` is populated on both hits and misses. `cache_id` is the storage ID of the entry — use it to invalidate the entry later. The embedding-related fields (`provider_used`, `model_used`, `input_tokens`) are only present when semantic search actually ran. ### Clear Specific Cache Entry -Use the request ID from cached responses to clear specific entries: +Use the `cache_id` from `cache_debug` to clear a specific entry: ```go -// Clear specific entry by request ID -err := plugin.ClearCacheForRequestID("550e8400-e29b-41d4-a716-446655440000") +// Clear specific entry by cache ID (read from response.ExtraFields.CacheDebug.CacheID) +err := plugin.ClearCacheForCacheID("550e8500-e29b-41d4-a725-446655440001") -// Clear all entries for a cache key +// Clear all entries for a cache key err := plugin.ClearCacheForKey("support-session-456") ``` @@ -644,8 +646,8 @@ err := plugin.ClearCacheForKey("support-session-456") ```bash -# Clear specific cached entry by request ID -curl -X DELETE http://localhost:8080/api/cache/clear/550e8400-e29b-41d4-a716-446655440000 +# Clear specific cached entry by cache ID +curl -X DELETE http://localhost:8080/api/cache/clear/550e8500-e29b-41d4-a725-446655440001 # Clear all entries for a cache key curl -X DELETE http://localhost:8080/api/cache/clear-by-key/support-session-456 @@ -665,7 +667,7 @@ The semantic cache automatically handles cleanup to prevent storage bloat: - **Namespace Isolation**: Each Bifrost instance uses isolated vector store namespaces to prevent conflicts **Manual Cleanup Options:** -- Clear specific entries by request ID (see examples above) +- Clear specific entries by cache ID (see examples above) - Clear all entries for a cache key - Restart Bifrost to clear all cache data @@ -674,7 +676,11 @@ The semantic cache namespace and all its cache entries are deleted when Bifrost -**Dimension Changes**: If you update the `dimension` config, the existing namespace will contain data with mixed dimensions, causing retrieval issues. To avoid this, either use a different `vector_store_namespace` or set `cleanup_on_shutdown: true` before restarting. +**Dimension / Provider / Model Changes**: A vector store namespace can only hold vectors of **one** dimension. If you change `dimension` (or switch to an embedding `provider`/`model` that produces a different vector size), the existing namespace is **not** recreated automatically — `CreateNamespace` is a no-op when the class/collection already exists. Subsequent writes will fail (vector-size mismatch) and reads will silently miss. Before saving the change, either: + +- point `vector_store_namespace` at a fresh name, or +- drop the existing class/index in your vector store, or +- set `cleanup_on_shutdown: true` and restart so the old namespace is removed first. --- diff --git a/docs/migration-guides/v1.5.0.mdx b/docs/migration-guides/v1.5.0.mdx index 0878720d67..95d4a49dd8 100644 --- a/docs/migration-guides/v1.5.0.mdx +++ b/docs/migration-guides/v1.5.0.mdx @@ -690,6 +690,67 @@ The supported `BifrostContextKeyAPIKeyID` / `BifrostContextKeyAPIKeyName` path c --- +## Breaking Change 15: Semantic Cache Clear API is Now Cache-ID Based + +The semantic cache "clear by request ID" API has been removed. Storage IDs in the cache are deterministic UUIDv5 hashes derived from the request payload (so the same prompt across many requests maps to a single cache entry), which made the previous request-ID-based delete unable to match anything written by the direct-search path. + +The replacement is keyed on the cache entry's storage ID, which is now stamped on every response in `extra_fields.cache_debug.cache_id` — on cache hits **and** cache misses. Hold onto that ID from the response if you ever need to invalidate the entry. + +### REST API + +| Before (v1.4.x) | After (v1.5.0) | +|---|---| +| `DELETE /api/cache/clear/{requestId}` | `DELETE /api/cache/clear/{cacheId}` | + +The path parameter name and meaning both changed. The cache key endpoint (`DELETE /api/cache/clear-by-key/{cacheKey}`) is unchanged. + +**Before:** +```bash +curl -X DELETE localhost:8080/api/cache/clear/req-aaa-bbb-ccc +``` + +**After:** +```bash +# Read the cache ID from a prior response +CACHE_ID=$(curl ... | jq -r '.extra_fields.cache_debug.cache_id') + +curl -X DELETE localhost:8080/api/cache/clear/$CACHE_ID +``` + +### Go SDK + +The `ClearCacheForRequestID` method on `*semanticcache.Plugin` has been removed and replaced by `ClearCacheForCacheID`. + +**Before:** +```go +err := plugin.ClearCacheForRequestID(requestID) +``` + +**After:** +```go +// On hit or miss, the storage ID is exposed via CacheDebug.CacheID +cacheID := response.ExtraFields.CacheDebug.CacheID +if cacheID != nil { + err := plugin.ClearCacheForCacheID(*cacheID) +} +``` + +### Why the rename + +A single cache entry is reused across many request IDs (that is the point of caching). A request-ID-based delete only ever made sense for the original writer of the entry, and even that broke once direct search switched to deterministic storage IDs. The cache ID is the only stable handle that works for both writers and readers, so the API now reflects that. + +### CacheDebug on misses + +`extra_fields.cache_debug` is now populated on cache misses too — previously it was only emitted when semantic search ran. The new fields on a miss: + +- `cache_hit: false` +- `cache_id`: the storage ID where the entry was written (use this with `ClearCacheForCacheID`) +- `provider_used` / `model_used` / `input_tokens`: only present when semantic search actually ran (i.e. embedding model was invoked) + +If you parse `cache_debug` and assumed it was either absent or had `cache_hit: true`, update your consumer to handle the `cache_hit: false` shape. + +--- + ## Opting Out: `version: 1` Compatibility Mode If you are not ready to adopt the new deny-by-default semantics, you can add a single field to `config.json` to restore v1.4.x behavior for all allow-list fields loaded from that file: @@ -788,6 +849,10 @@ If your code reads `selected_key_id` / `selected_key_name` from the request cont Remove `allow_direct_keys` from `config.json` and any `PUT /api/config` payloads. Audit HTTP callers that sent provider keys in `Authorization` / `x-api-key` / `x-goog-api-key` / `x-bf-bedrock-*` / `x-bf-azure-endpoint` headers — those keys are no longer forwarded. Audit Go SDK callers for any reference to `schemas.BifrostContextKeyDirectKey` — the constant is removed and code referencing it will not compile. Replace both flavours with a Bifrost-managed provider key, optionally pinned per request via `BifrostContextKeyAPIKeyID` / `BifrostContextKeyAPIKeyName` (Go SDK) or a virtual key (`sk-bf-*`, HTTP). + + +Replace `DELETE /api/cache/clear/{requestId}` with `DELETE /api/cache/clear/{cacheId}`, and replace `plugin.ClearCacheForRequestID(...)` with `plugin.ClearCacheForCacheID(...)`. Read the cache ID from `extra_fields.cache_debug.cache_id` on the response (now populated on misses too). + --- diff --git a/docs/openapi/openapi.json b/docs/openapi/openapi.json index afcd4003dc..9bb52d600c 100644 --- a/docs/openapi/openapi.json +++ b/docs/openapi/openapi.json @@ -41914,20 +41914,20 @@ } } }, - "/api/cache/clear/{requestId}": { + "/api/cache/clear/{cacheId}": { "delete": { - "operationId": "clearCacheByRequestId", - "summary": "Clear cache by request ID", - "description": "Clears cache entries associated with a specific request ID.", + "operationId": "clearCacheByCacheId", + "summary": "Clear cache entry by cache ID", + "description": "Deletes a single cache entry by its storage ID. Read the cache ID from\n`extra_fields.cache_debug.cache_id` on a prior response — it is populated\non both cache hits and cache misses.\n", "tags": [ "Cache" ], "parameters": [ { - "name": "requestId", + "name": "cacheId", "in": "path", "required": true, - "description": "Request ID to clear cache for", + "description": "Storage ID of the cache entry to delete", "schema": { "type": "string" } diff --git a/docs/openapi/openapi.yaml b/docs/openapi/openapi.yaml index ebfad00cce..bb33f921a1 100644 --- a/docs/openapi/openapi.yaml +++ b/docs/openapi/openapi.yaml @@ -788,8 +788,8 @@ paths: $ref: './paths/management/prompts.yaml#/sessions-commit' # Cache - /api/cache/clear/{requestId}: - $ref: './paths/management/cache.yaml#/clear-by-request-id' + /api/cache/clear/{cacheId}: + $ref: './paths/management/cache.yaml#/clear-by-cache-id' /api/cache/clear-by-key/{cacheKey}: $ref: './paths/management/cache.yaml#/clear-by-cache-key' diff --git a/docs/openapi/paths/management/cache.yaml b/docs/openapi/paths/management/cache.yaml index 7c570acebf..29c9d5609d 100644 --- a/docs/openapi/paths/management/cache.yaml +++ b/docs/openapi/paths/management/cache.yaml @@ -1,15 +1,18 @@ -clear-by-request-id: +clear-by-cache-id: delete: - operationId: clearCacheByRequestId - summary: Clear cache by request ID - description: Clears cache entries associated with a specific request ID. + operationId: clearCacheByCacheId + summary: Clear cache entry by cache ID + description: | + Deletes a single cache entry by its storage ID. Read the cache ID from + `extra_fields.cache_debug.cache_id` on a prior response — it is populated + on both cache hits and cache misses. tags: - Cache parameters: - - name: requestId + - name: cacheId in: path required: true - description: Request ID to clear cache for + description: Storage ID of the cache entry to delete schema: type: string responses: diff --git a/framework/logstore/matviews.go b/framework/logstore/matviews.go index efb220c633..25d2ad0922 100644 --- a/framework/logstore/matviews.go +++ b/framework/logstore/matviews.go @@ -634,7 +634,8 @@ func canUseMatViewFilters(f SearchFilters) bool { f.MinLatency == nil && f.MaxLatency == nil && f.MinTokens == nil && f.MaxTokens == nil && f.MinCost == nil && f.MaxCost == nil && - !f.MissingCostOnly + !f.MissingCostOnly && + len(f.CacheHitTypes) == 0 } // canUseMatView checks both that materialized views are ready (created and diff --git a/framework/logstore/rdb.go b/framework/logstore/rdb.go index f3527c9b9e..7776550565 100644 --- a/framework/logstore/rdb.go +++ b/framework/logstore/rdb.go @@ -192,6 +192,29 @@ func (s *RDBLogStore) applyFilters(baseQuery *gorm.DB, filters SearchFilters) *g // cost is null and status is not error baseQuery = baseQuery.Where("(cost IS NULL OR cost <= 0) AND status NOT IN ('error')") } + if len(filters.CacheHitTypes) > 0 { + // Only keep allowed values to avoid passing arbitrary input into the JSON path expression. + valid := make([]string, 0, len(filters.CacheHitTypes)) + for _, t := range filters.CacheHitTypes { + if t == "direct" || t == "semantic" { + valid = append(valid, t) + } + } + if len(valid) > 0 { + if s.db.Dialector.Name() == "postgres" { + // Match the same loose-JSON guard used by aggregateCacheHits so the regex extract is safe. + baseQuery = baseQuery.Where( + "cache_debug IS NOT NULL AND cache_debug <> '' AND cache_debug ~ '^\\s*\\{.*\\}\\s*$' AND substring(cache_debug from '\"hit_type\"[[:space:]]*:[[:space:]]*\"([^\"]+)\"') IN ?", + valid, + ) + } else { + baseQuery = baseQuery.Where( + "cache_debug IS NOT NULL AND cache_debug != '' AND json_valid(cache_debug) AND json_extract(cache_debug, '$.hit_type') IN ?", + valid, + ) + } + } + } if filters.ContentSearch != "" { dialect := s.db.Dialector.Name() if dialect == "postgres" { @@ -642,7 +665,7 @@ func (s *RDBLogStore) listSelectColumns() string { "business_unit_id", "business_unit_name", "speech_input", "transcription_input", "image_generation_input", "video_generation_input", "latency", "token_usage", "cost", "status", "error_details", "stream", - "content_summary", "metadata", + "content_summary", "metadata", "cache_debug", "is_large_payload_request", "is_large_payload_response", "prompt_tokens", "completion_tokens", "total_tokens", "created_at", diff --git a/framework/logstore/tables.go b/framework/logstore/tables.go index ac0b352628..95f1aff4f1 100644 --- a/framework/logstore/tables.go +++ b/framework/logstore/tables.go @@ -68,6 +68,7 @@ type SearchFilters struct { MinCost *float64 `json:"min_cost,omitempty"` MaxCost *float64 `json:"max_cost,omitempty"` MissingCostOnly bool `json:"missing_cost_only,omitempty"` + CacheHitTypes []string `json:"cache_hit_types,omitempty"` // For filtering by local-cache hit type ("direct", "semantic") ContentSearch string `json:"content_search,omitempty"` MetadataFilters map[string]string `json:"metadata_filters,omitempty"` // key=metadataKey, value=metadataValue for filtering by metadata } diff --git a/framework/modelcatalog/sync.go b/framework/modelcatalog/sync.go index 3c10f929f8..3aad4f5925 100644 --- a/framework/modelcatalog/sync.go +++ b/framework/modelcatalog/sync.go @@ -395,7 +395,7 @@ func (mc *ModelCatalog) applyModelParameters(paramsData map[string]json.RawMessa } if err := json.Unmarshal(rawData, &p); err == nil && (p.MaxOutputTokens != nil || parsed.VertexMultiRegionOnly != nil) { modelParamsEntries[model] = providerUtils.ModelParams{ - MaxOutputTokens: p.MaxOutputTokens, + MaxOutputTokens: p.MaxOutputTokens, IsVertexMultiRegionOnly: parsed.VertexMultiRegionOnly, } } @@ -504,4 +504,4 @@ func (mc *ModelCatalog) loadModelParametersFromURL(ctx context.Context) (map[str mc.logger.Debug("successfully downloaded and parsed %d model parameters records", len(paramsData)) return paramsData, nil -} \ No newline at end of file +} diff --git a/framework/vectorstore/weaviate.go b/framework/vectorstore/weaviate.go index 9c34ab2c83..4db066e156 100644 --- a/framework/vectorstore/weaviate.go +++ b/framework/vectorstore/weaviate.go @@ -476,6 +476,12 @@ func newWeaviateStore(ctx context.Context, config *WeaviateConfig, logger schema } func (s *WeaviateStore) CreateNamespace(ctx context.Context, className string, dimension int, properties map[string]VectorStoreProperties) error { + // Reject names Weaviate would silently auto-capitalize: writes via REST + // route fine, but the GraphQL read path is case-strict and breaks. + if err := validateClassName(className); err != nil { + return err + } + // Check if class exists exists, err := s.client.Schema().ClassExistenceChecker(). WithClassName(className). @@ -637,3 +643,20 @@ func convertOperator(op QueryOperator) filters.WhereOperator { return filters.Equal } } + +// validateClassName enforces Weaviate's class-name rule that the first +// character must be an uppercase ASCII letter. Weaviate's REST endpoints +// silently auto-capitalize a lowercase first character on class creation, +// which means writes appear to succeed under the user-supplied name but +// GraphQL reads (which are case-strict) then fail with "Did you mean +// ?". Surface this at config-save time instead. +func validateClassName(name string) error { + if name == "" { + return nil + } + first := name[0] + if first < 'A' || first > 'Z' { + return fmt.Errorf("Weaviate requires class names to start with an uppercase letter (A-Z); got %q. Try %q", name, strings.ToUpper(name[:1])+name[1:]) + } + return nil +} diff --git a/plugins/logging/main.go b/plugins/logging/main.go index b843e290a8..0180ada0b4 100644 --- a/plugins/logging/main.go +++ b/plugins/logging/main.go @@ -830,10 +830,16 @@ func (p *LoggerPlugin) PostLLMHook(ctx *schemas.BifrostContext, result *schemas. // Build the complete log entry with input (from PreLLMHook) + output (from PostLLMHook) entry := buildCompleteLogEntryFromPending(pending) - // Apply common output fields + // Apply common output fields. For cache hits, prefer the cache-serve + // latency stamped by the semantic cache plugin over the original provider + // latency preserved in the cached response. var latency int64 if result != nil { - latency = result.GetExtraFields().Latency + ef := result.GetExtraFields() + latency = ef.Latency + if ef.CacheDebug != nil && ef.CacheDebug.CacheHit && ef.CacheDebug.CacheHitLatency != nil { + latency = *ef.CacheDebug.CacheHitLatency + } } applyOutputFieldsToEntry(entry, selectedKeyID, selectedKeyName, virtualKeyID, virtualKeyName, routingRuleID, routingRuleName, selectedPromptID, selectedPromptName, selectedPromptVersion, teamID, teamName, customerID, customerName, userID, userName, businessUnitID, businessUnitName, numberOfRetries, latency, attemptTrail) entry.MetadataParsed = pending.InitialData.Metadata diff --git a/plugins/logging/operations.go b/plugins/logging/operations.go index d11310505c..83f6f4bc14 100644 --- a/plugins/logging/operations.go +++ b/plugins/logging/operations.go @@ -378,16 +378,6 @@ func (p *LoggerPlugin) applyStreamingOutputToEntry(entry *logstore.Log, streamRe entry.StopReason = streamResponse.Data.FinishReason } - // Cache - if streamResponse.Data.CacheDebug != nil { - entry.CacheDebugParsed = streamResponse.Data.CacheDebug - } - - // Finish/stop reason - always persist regardless of content logging settings - if streamResponse.Data.FinishReason != nil { - entry.StopReason = streamResponse.Data.FinishReason - } - // Passthrough status code if streamResponse.Data.PassthroughOutput != nil { if params, ok := entry.ParamsParsed.(*schemas.PassthroughLogParams); ok { diff --git a/plugins/semanticcache/main.go b/plugins/semanticcache/main.go index 235560f6a9..798c98a52d 100644 --- a/plugins/semanticcache/main.go +++ b/plugins/semanticcache/main.go @@ -7,12 +7,9 @@ import ( "context" "encoding/json" "fmt" - "strconv" "sync" "time" - "github.com/google/uuid" - bifrost "github.com/maximhq/bifrost/core" "github.com/maximhq/bifrost/core/schemas" "github.com/maximhq/bifrost/framework/vectorstore" @@ -21,6 +18,13 @@ import ( // Config contains configuration for the semantic cache plugin. // The VectorStore abstraction handles the underlying storage implementation and its defaults. // Only specify values you want to override from the semantic cache defaults. +// +// Modes: +// - Semantic mode: set Provider + EmbeddingModel + Dimension > 0. Both direct +// hash matching and embedding-based similarity search are enabled. +// - Direct-only mode: set Provider="" and Dimension=1. The plugin disables +// semantic search entirely; cache lookups go through the deterministic +// direct hash path. Dimension=1 keeps stores that require a vector happy. type Config struct { // Embedding Model settings - REQUIRED for semantic caching Provider schemas.ModelProvider `json:"provider"` @@ -29,9 +33,9 @@ type Config struct { // Plugin behavior settings CleanUpOnShutdown bool `json:"cleanup_on_shutdown,omitempty"` // Clean up cache on shutdown (default: false) TTL time.Duration `json:"ttl,omitempty"` // Time-to-live for cached responses (default: 5min) - Threshold float64 `json:"threshold,omitempty"` // Cosine similarity threshold for semantic matching (default: 0.8) + Threshold float64 `json:"threshold,omitempty"` // Cosine similarity threshold for semantic matching (0 = unset → default 0.8) VectorStoreNamespace string `json:"vector_store_namespace,omitempty"` // Namespace for vector store (optional) - Dimension int `json:"dimension"` // Dimension for vector store + Dimension int `json:"dimension"` // Dimension for vector store (must be > 0 when Provider is set; use 1 for direct-only mode) // Advanced caching behavior DefaultCacheKey string `json:"default_cache_key,omitempty"` // Default cache key used when no per-request key is provided (optional, caching is disabled when empty and no per-request key is set) @@ -41,117 +45,125 @@ type Config struct { ExcludeSystemPrompt *bool `json:"exclude_system_prompt,omitempty"` // Exclude system prompt in cache key (default: false) } -// UnmarshalJSON implements custom JSON unmarshaling for semantic cache Config. -// It supports TTL parsing from both string durations ("1m", "1hr") and numeric seconds for configurable cache behavior. +// UnmarshalJSON implements custom JSON unmarshaling for Config so TTL accepts +// either a duration string ("1m", "1h") or a JSON number (seconds). All other +// fields decode through the default path via a type alias, so adding a new +// field on Config does not require touching this method. func (c *Config) UnmarshalJSON(data []byte) error { - // Define a temporary struct to avoid infinite recursion - type TempConfig struct { - Provider string `json:"provider"` - EmbeddingModel string `json:"embedding_model,omitempty"` - CleanUpOnShutdown bool `json:"cleanup_on_shutdown,omitempty"` - Dimension int `json:"dimension"` - TTL interface{} `json:"ttl,omitempty"` - Threshold float64 `json:"threshold,omitempty"` - VectorStoreNamespace string `json:"vector_store_namespace,omitempty"` - DefaultCacheKey string `json:"default_cache_key,omitempty"` - ConversationHistoryThreshold int `json:"conversation_history_threshold,omitempty"` - CacheByModel *bool `json:"cache_by_model,omitempty"` - CacheByProvider *bool `json:"cache_by_provider,omitempty"` - ExcludeSystemPrompt *bool `json:"exclude_system_prompt,omitempty"` - } - - var temp TempConfig - if err := json.Unmarshal(data, &temp); err != nil { + // alias suppresses Config's UnmarshalJSON to avoid infinite recursion. + // The outer TTL (json.RawMessage) shadows alias.TTL because the json + // package picks the shallower field on a name conflict. + type alias Config + aux := &struct { + TTL json.RawMessage `json:"ttl,omitempty"` + *alias + }{alias: (*alias)(c)} + if err := json.Unmarshal(data, aux); err != nil { return fmt.Errorf("failed to unmarshal config: %w", err) } - // Set simple fields - c.Provider = schemas.ModelProvider(temp.Provider) - c.EmbeddingModel = temp.EmbeddingModel - c.CleanUpOnShutdown = temp.CleanUpOnShutdown - c.Dimension = temp.Dimension - c.CacheByModel = temp.CacheByModel - c.CacheByProvider = temp.CacheByProvider - c.VectorStoreNamespace = temp.VectorStoreNamespace - c.ConversationHistoryThreshold = temp.ConversationHistoryThreshold - c.Threshold = temp.Threshold - c.DefaultCacheKey = temp.DefaultCacheKey - c.ExcludeSystemPrompt = temp.ExcludeSystemPrompt - // Handle TTL field with custom parsing for VectorStore-backed cache behavior - if temp.TTL != nil { - switch v := temp.TTL.(type) { - case string: - // Try parsing as duration string (e.g., "1m", "1hr") for semantic cache TTL - duration, err := time.ParseDuration(v) - if err != nil { - return fmt.Errorf("failed to parse TTL duration string '%s': %w", v, err) - } - c.TTL = duration - case int: - // Handle integer seconds for semantic cache TTL - c.TTL = time.Duration(v) * time.Second - default: - // Try converting to string and parsing as number for semantic cache TTL - ttlStr := fmt.Sprintf("%v", v) - if seconds, err := strconv.ParseFloat(ttlStr, 64); err == nil { - c.TTL = time.Duration(seconds * float64(time.Second)) - } else { - return fmt.Errorf("unsupported TTL type: %T (value: %v)", v, v) - } - } + if len(aux.TTL) == 0 || string(aux.TTL) == "null" { + return nil } + // Try string first ("1m"); fall back to a JSON number (seconds). + var s string + if err := json.Unmarshal(aux.TTL, &s); err == nil { + d, err := time.ParseDuration(s) + if err != nil { + return fmt.Errorf("failed to parse TTL duration string '%s': %w", s, err) + } + c.TTL = d + } else { + var seconds float64 + if err := json.Unmarshal(aux.TTL, &seconds); err != nil { + return fmt.Errorf("unsupported TTL value: %s", string(aux.TTL)) + } + c.TTL = time.Duration(seconds * float64(time.Second)) + } + if c.TTL < 0 { + return fmt.Errorf("TTL must be non-negative, got %v", c.TTL) + } return nil } -// StreamChunk represents a single chunk from a streaming response +// StreamChunk is one chunk from a streaming response, retained until the +// stream completes so it can be persisted as part of the cache entry. type StreamChunk struct { - Timestamp time.Time // When chunk was received - Response *schemas.BifrostResponse // The actual response chunk - FinishReason *string // If this is the final chunk + // Timestamp records when this chunk arrived at PostLLMHook. Used by the + // reaper to drop accumulators stuck without a final chunk. + Timestamp time.Time + // Response is the chunk payload as delivered by the provider. + Response *schemas.BifrostResponse } -// StreamAccumulator manages accumulation of streaming chunks for caching +// StreamAccumulator collects the chunks of a single streaming response so +// they can be flushed as one cache entry on the final chunk. type StreamAccumulator struct { - RequestID string // The request ID - StorageID string // The final cache entry ID - Chunks []*StreamChunk // All chunks for this stream - IsComplete bool // Whether the stream is complete - HasError bool // Whether any chunk in the stream had an error - FinalTimestamp time.Time // When the stream completed - Embedding []float32 // Embedding for the original request - Metadata map[string]any // Metadata for caching - TTL time.Duration // TTL for this cache entry - mu sync.Mutex // Protects chunk operations + // mu serializes Chunks/IsComplete updates across the per-chunk PostLLMHook + // invocations and the periodic reaper. + mu sync.Mutex + // RequestID is the BifrostContext request ID this accumulator is keyed by. + RequestID string + // StorageID is the cache entry ID the accumulated stream will be written under. + StorageID string + // Chunks holds every chunk seen so far, in arrival order. + Chunks []*StreamChunk + // LastSeenAt records the arrival time of the most recent chunk. The reaper + // uses this so a long-running stream isn't evicted mid-flight; first-chunk + // time alone would falsely flag still-active streams as abandoned. + LastSeenAt time.Time + // IsComplete is set when the final chunk has been observed; further final + // chunks are no-ops to keep flush idempotent. + IsComplete bool + // Embedding is the request embedding to attach to the cache entry, or nil + // for direct-only writes. + Embedding []float32 + // Metadata is the unified metadata captured at first-chunk time and reused + // at flush. expires_at is locked in here, so TTL is fixed at first chunk. + Metadata map[string]any + // TTL is retained for symmetry with Metadata; the effective expiry is the + // expires_at value already baked into Metadata. + TTL time.Duration } -// EmbeddingRequestExecutor is a function that executes a request and returns a response and an error. -// It maps to .EmbeddingRequest() of the bifrost client. +// EmbeddingRequestExecutor invokes the embedding endpoint on the bifrost +// client. The plugin calls it on cache misses to compute the request +// embedding for semantic similarity search and storage. It mirrors the +// signature of bifrost.Client.EmbeddingRequest. type EmbeddingRequestExecutor func(ctx *schemas.BifrostContext, req *schemas.BifrostEmbeddingRequest) (*schemas.BifrostEmbeddingResponse, *schemas.BifrostError) -// Plugin implements the schemas.LLMPlugin interface for semantic caching. -// It caches responses using a two-tier approach: direct hash matching for exact requests -// and semantic similarity search for related content. The plugin supports configurable caching behavior -// via the VectorStore abstraction, including TTL management and streaming response handling. -// -// Fields: -// - store: VectorStore instance for semantic cache operations -// - config: Plugin configuration including semantic cache and caching settings -// - logger: Logger instance for plugin operations +// Plugin implements schemas.LLMPlugin for semantic caching. It serves cached +// responses via two complementary lookup paths: a direct O(1) hash match on +// (provider, model, cache_key, request_hash, params_hash) for exact replays, +// and an embedding-based similarity search for semantically related content. +// Streaming responses are accumulated chunk-by-chunk and stored as a single +// entry on the final chunk; TTL bookkeeping is per-entry via expires_at. type Plugin struct { store vectorstore.VectorStore config *Config logger schemas.Logger embeddingRequestExecutor EmbeddingRequestExecutor - streamAccumulators sync.Map // Track stream accumulators by request ID - waitGroup sync.WaitGroup + // streamAccumulators maps request ID → its in-progress *StreamAccumulator. + streamAccumulators sync.Map + // cacheStates maps request ID → its *cacheState (see state.go) for the + // span between PreLLMHook and PostLLMHook. + cacheStates sync.Map + // writersWg tracks short-lived per-request goroutines (the async cache + // writes spawned in PostLLMHook). WaitForPendingOperations blocks on this + // — tests use it to flush writes before asserting on the store. + writersWg sync.WaitGroup + // cleanupWg tracks the long-running background loops (stream + cacheState + // reapers). Only Cleanup blocks on this, after closing stopCh. + cleanupWg sync.WaitGroup + // stopCh is closed by Cleanup to signal the background reaper loops to exit. + stopCh chan struct{} } // Plugin constants const ( PluginName string = "semantic_cache" DefaultVectorStoreNamespace string = "BifrostSemanticCachePlugin" - PluginLoggerPrefix string = "[Semantic Cache]" CacheConnectionTimeout time.Duration = 5 * time.Second CreateNamespaceTimeout time.Duration = 30 * time.Second CacheSetTimeout time.Duration = 30 * time.Second @@ -160,13 +172,14 @@ const ( DefaultConversationHistoryThreshold int = 3 ) -var SelectFields = []string{"request_hash", "response", "stream_chunks", "expires_at", "cache_key", "provider", "model"} +// SelectFields enumerates the properties projected back from the vector store +// on a cache hit. params_hash and from_bifrost_semantic_cache_plugin are +// filter-only (used in WHERE-style queries to narrow matches) and intentionally +// omitted from this projection — keep them defined in VectorStoreProperties +// below so the store creates the columns/indexes, but don't fetch them. +var SelectFields = []string{"response", "stream_chunks", "expires_at", "cache_key", "provider", "model"} var VectorStoreProperties = map[string]vectorstore.VectorStoreProperties{ - "request_hash": { - DataType: vectorstore.VectorStorePropertyTypeString, - Description: "The hash of the request", - }, "response": { DataType: vectorstore.VectorStorePropertyTypeString, Description: "The response from the provider", @@ -201,24 +214,15 @@ var VectorStoreProperties = map[string]vectorstore.VectorStoreProperties{ }, } +// Per-request context keys. Callers set these on BifrostContext before the +// request enters Bifrost; the plugin reads them in Pre/PostLLMHook. CacheKey +// (or Config.DefaultCacheKey) is the only one required for caching to engage. const ( - CacheKey schemas.BifrostContextKey = "semantic_cache_key" // To set the cache key for a request - REQUIRED for all requests - CacheTTLKey schemas.BifrostContextKey = "semantic_cache_ttl" // To explicitly set the TTL for a request - CacheThresholdKey schemas.BifrostContextKey = "semantic_cache_threshold" // To explicitly set the threshold for a request - CacheTypeKey schemas.BifrostContextKey = "semantic_cache_cache_type" // To explicitly set the cache type for a request - CacheNoStoreKey schemas.BifrostContextKey = "semantic_cache_no_store" // To explicitly disable storing the response in the cache - - // context keys for internal usage - requestIDKey schemas.BifrostContextKey = "semantic_cache_request_id" - requestStorageIDKey schemas.BifrostContextKey = "semantic_cache_request_storage_id" - requestHashKey schemas.BifrostContextKey = "semantic_cache_request_hash" - requestEmbeddingKey schemas.BifrostContextKey = "semantic_cache_embedding" - requestEmbeddingTokensKey schemas.BifrostContextKey = "semantic_cache_embedding_tokens" - requestParamsHashKey schemas.BifrostContextKey = "semantic_cache_params_hash" - requestModelKey schemas.BifrostContextKey = "semantic_cache_model" - requestProviderKey schemas.BifrostContextKey = "semantic_cache_provider" - isCacheHitKey schemas.BifrostContextKey = "semantic_cache_is_cache_hit" - cacheHitTypeKey schemas.BifrostContextKey = "semantic_cache_cache_hit_type" + CacheKey schemas.BifrostContextKey = "semantic_cache-key" // String. Required (or DefaultCacheKey) — bucket entries under a tenant/feature scope. + CacheTTLKey schemas.BifrostContextKey = "semantic_cache-ttl" // time.Duration. Per-request override of Config.TTL. + CacheThresholdKey schemas.BifrostContextKey = "semantic_cache-threshold" // float64. Per-request override of the semantic similarity threshold. + CacheTypeKey schemas.BifrostContextKey = "semantic_cache-cache_type" // CacheType. Narrow lookup to a single path (direct or semantic). + CacheNoStoreKey schemas.BifrostContextKey = "semantic_cache-no_store" // bool. Skip writing the response to cache (still served from cache on hit). ) type CacheType string @@ -228,20 +232,12 @@ const ( CacheTypeSemantic CacheType = "semantic" ) -// Init creates a new semantic cache plugin instance with the provided configuration. -// It uses the VectorStore abstraction for cache operations and returns a configured plugin. +// Init validates the configuration, creates the namespace in the underlying +// VectorStore, starts the background reaper goroutines, and returns a plugin +// ready to be wired into the Bifrost plugin pipeline. // -// The VectorStore handles the underlying storage implementation and its defaults. -// The plugin only sets defaults for its own behavior (TTL, cache key generation, etc.). -// -// Parameters: -// - config: Semantic cache and plugin configuration (CacheKey is required) -// - logger: Logger instance for the plugin -// - store: VectorStore instance for cache operations -// -// Returns: -// - schemas.LLMPlugin: A configured semantic cache plugin instance -// - error: Any error that occurred during plugin initialization +// Note: Init mutates *config in place to fill in defaults — TTL, Threshold, +// CacheBy* — so the caller sees the resolved values after this returns. func Init(ctx context.Context, config *Config, logger schemas.Logger, store vectorstore.VectorStore) (schemas.LLMPlugin, error) { if config == nil { return nil, fmt.Errorf("config is required") @@ -249,43 +245,51 @@ func Init(ctx context.Context, config *Config, logger schemas.Logger, store vect if store == nil { return nil, fmt.Errorf("store is required") } + if config.Dimension < 0 { + return nil, fmt.Errorf("dimension must be non-negative, got %d", config.Dimension) + } + if config.Provider != "" && config.Dimension <= 0 { + return nil, fmt.Errorf("dimension must be > 0 when provider is set (got dimension=%d, provider=%q)", config.Dimension, config.Provider) + } // Set plugin-specific defaults if config.VectorStoreNamespace == "" { - logger.Debug(PluginLoggerPrefix + " Vector store namespace is not set, using default of " + DefaultVectorStoreNamespace) + logger.Debug("Vector store namespace is not set, using default of %s", DefaultVectorStoreNamespace) config.VectorStoreNamespace = DefaultVectorStoreNamespace } if config.TTL == 0 { - logger.Debug(PluginLoggerPrefix + " TTL is not set, using default of 5 minutes") + logger.Debug("TTL is not set, using default of %v", DefaultCacheTTL) config.TTL = DefaultCacheTTL } if config.Threshold == 0 { - logger.Debug(PluginLoggerPrefix + " Threshold is not set, using default of " + strconv.FormatFloat(DefaultCacheThreshold, 'f', -1, 64)) + logger.Debug("Threshold is not set, using default of %v", DefaultCacheThreshold) config.Threshold = DefaultCacheThreshold } if config.ConversationHistoryThreshold == 0 { - logger.Debug(PluginLoggerPrefix + " Conversation history threshold is not set, using default of " + strconv.Itoa(DefaultConversationHistoryThreshold)) + logger.Debug("Conversation history threshold is not set, using default of %d", DefaultConversationHistoryThreshold) config.ConversationHistoryThreshold = DefaultConversationHistoryThreshold } // Set cache behavior defaults if config.CacheByModel == nil { + logger.Debug("CacheByModel is not set, defaulting to true") config.CacheByModel = new(true) } if config.CacheByProvider == nil { + logger.Debug("CacheByProvider is not set, defaulting to true") config.CacheByProvider = new(true) } plugin := &Plugin{ - store: store, - config: config, - logger: logger, - waitGroup: sync.WaitGroup{}, + store: store, + config: config, + logger: logger, + stopCh: make(chan struct{}), } if config.Provider == "" && config.Dimension == 1 { - logger.Info(PluginLoggerPrefix + " Starting in direct-only mode (dimension=1, no embedding provider)") + logger.Info("Starting in direct-only mode (dimension=1, no embedding provider)") } else if config.Provider == "" { - logger.Warn(PluginLoggerPrefix + " Incomplete semantic mode config: missing provider, falling back to direct search only") + logger.Warn("Incomplete semantic mode config: missing provider, falling back to direct search only") } createCtx, cancel := context.WithTimeout(ctx, CreateNamespaceTimeout) @@ -294,382 +298,281 @@ func Init(ctx context.Context, config *Config, logger schemas.Logger, store vect return nil, fmt.Errorf("failed to create namespace for semantic cache: %w", err) } + plugin.cleanupWg.Add(1) + go plugin.runStreamCleanupLoop() + + plugin.cleanupWg.Add(1) + go plugin.runCacheStateCleanupLoop() + return plugin, nil } -// GetName returns the canonical name of the semantic cache plugin. -// This name is used for plugin identification and logging purposes. -// -// Returns: -// - string: The plugin name for semantic cache +// GetName returns the canonical name used for plugin identification and logging. func (plugin *Plugin) GetName() string { return PluginName } -// HTTPTransportPreHook is not used for this plugin +// HTTPTransportPreHook is not used by the semantic cache plugin. func (plugin *Plugin) HTTPTransportPreHook(ctx *schemas.BifrostContext, req *schemas.HTTPRequest) (*schemas.HTTPResponse, error) { return nil, nil } -// HTTPTransportPostHook is not used for this plugin +// HTTPTransportPostHook is not used by the semantic cache plugin. func (plugin *Plugin) HTTPTransportPostHook(ctx *schemas.BifrostContext, req *schemas.HTTPRequest, resp *schemas.HTTPResponse) error { return nil } -// HTTPTransportStreamChunkHook passes through streaming chunks unchanged +// HTTPTransportStreamChunkHook passes streaming chunks through unchanged. func (plugin *Plugin) HTTPTransportStreamChunkHook(ctx *schemas.BifrostContext, req *schemas.HTTPRequest, chunk *schemas.BifrostStreamChunk) (*schemas.BifrostStreamChunk, error) { return chunk, nil } -// PreLLMHook is called before a request is processed by Bifrost. -// It performs a two-stage cache lookup: first direct hash matching, then semantic similarity search. -// Uses UUID-based keys for entries stored in the VectorStore. -// -// Parameters: -// - ctx: Pointer to the schemas.BifrostContext -// - req: The incoming Bifrost request -// -// Returns: -// - *schemas.BifrostRequest: The original request -// - *schemas.BifrostResponse: Cached response if found, nil otherwise -// - error: Any error that occurred during cache lookup +// PreLLMHook performs the cache lookup before the request reaches the +// provider. It runs the direct hash path first (cheapest), falls back to +// semantic similarity search when configured, and short-circuits the +// pipeline with a cached response on hit. On miss, it leaves per-request +// state on the plugin keyed by request ID for PostLLMHook to consume when +// the upstream response arrives. func (plugin *Plugin) PreLLMHook(ctx *schemas.BifrostContext, req *schemas.BifrostRequest) (*schemas.BifrostRequest, *schemas.LLMPluginShortCircuit, error) { - provider, model, _ := req.GetRequestFields() - // Get the cache key from the context - var cacheKey string - var ok bool - - cacheKey, ok = ctx.Value(CacheKey).(string) - if !ok || cacheKey == "" { - if plugin.config.DefaultCacheKey != "" { - cacheKey = plugin.config.DefaultCacheKey - plugin.logger.Debug(PluginLoggerPrefix + " Using default cache key: " + cacheKey) - } else { - plugin.logger.Debug(PluginLoggerPrefix + " No cache key found in context, continuing without caching") - return req, nil, nil - } + cacheKey, ok := plugin.resolveCacheKey(ctx) + if !ok { + return req, nil, nil } - // Clear request-scoped semantic cache state up front in case the context is reused. - plugin.clearRequestScopedContext(ctx) + // Without a request ID we have nowhere to anchor per-request state. The + // framework always stamps this before plugin hooks run; direct callers + // (tests, custom integrations) must set it too. + requestID, ok := ctx.Value(schemas.BifrostContextKeyRequestID).(string) + if !ok || requestID == "" { + return req, nil, nil + } if !isSemanticCacheSupportedRequestType(req.RequestType) { - plugin.logger.Debug(PluginLoggerPrefix + " Skipping caching for unsupported request type: " + string(req.RequestType)) return req, nil, nil } - if plugin.isConversationHistoryThresholdExceeded(req) { - plugin.logger.Debug(PluginLoggerPrefix + " Skipping caching for request with conversation history threshold exceeded") + // Create state up front so a reused/retried request ID never inherits stale fields. + state := plugin.createCacheState(requestID) + + if plugin.isConversationHistoryThresholdExceeded(state, req) { + plugin.clearCacheState(requestID) return req, nil, nil } - // Generate UUID for this request - requestID := uuid.New().String() - - // Store request ID, model, and provider in context for PostLLMHook - ctx.SetValue(requestIDKey, requestID) - ctx.SetValue(requestModelKey, model) - ctx.SetValue(requestProviderKey, provider) + performDirectSearch, performSemanticSearch := plugin.resolveCacheTypes(ctx) - performDirectSearch, performSemanticSearch := true, true - if ctx.Value(CacheTypeKey) != nil { - cacheTypeVal, ok := ctx.Value(CacheTypeKey).(CacheType) - if !ok { - plugin.logger.Warn(PluginLoggerPrefix + " Cache type is not a CacheType, using all available cache types") - } else { - performDirectSearch = cacheTypeVal == CacheTypeDirect - performSemanticSearch = cacheTypeVal == CacheTypeSemantic - } + // Compute metadata + paramsHash once and reuse across both search paths. + metadata, err := plugin.buildRequestMetadataForCaching(state, req) + if err != nil { + plugin.clearCacheState(requestID) + plugin.logger.Debug("metadata build failed, caching disabled for this request: %v", err) + return req, nil, nil } + paramsHash, err := hashMap(metadata) + if err != nil { + plugin.clearCacheState(requestID) + plugin.logger.Debug("params hash failed, caching disabled for this request: %v", err) + return req, nil, nil + } + state.ParamsHash = paramsHash if performDirectSearch { - shortCircuit, err := plugin.performDirectSearch(ctx, req, cacheKey) + shortCircuit, err := plugin.performDirectSearch(ctx, state, req, cacheKey, metadata, paramsHash) if err != nil { - plugin.logger.Warn(PluginLoggerPrefix + " Direct search failed: " + err.Error() + " (" + describeRequestShape(req) + ")") - // Don't return - continue to semantic search fallback - shortCircuit = nil // Ensure we don't use an invalid shortCircuit - } - - if shortCircuit != nil { + msg := fmt.Sprintf("direct search failed (vector store unreachable?): %v", err) + plugin.logger.Warn(msg) + ctx.Log(schemas.LogLevelWarn, msg) + } else if shortCircuit != nil { return req, shortCircuit, nil } } - if performSemanticSearch && plugin.embeddingRequestExecutor != nil { - if req.EmbeddingRequest != nil || req.TranscriptionRequest != nil { - plugin.logger.Debug(PluginLoggerPrefix + " Skipping semantic search for embedding/transcription input") - // For vector stores that require vectors, set a zero vector placeholder - // This allows direct hash matching to work without the overhead of generating embeddings - if plugin.store.RequiresVectors() && plugin.config.Dimension > 0 { - zeroVector := make([]float32, plugin.config.Dimension) - ctx.SetValue(requestEmbeddingKey, zeroVector) - plugin.logger.Debug(PluginLoggerPrefix + " Using zero vector placeholder for embedding/transcription request storage") - } - return req, nil, nil - } - - // Try semantic search as fallback - shortCircuit, err := plugin.performSemanticSearch(ctx, req, cacheKey) - if err != nil { - plugin.logger.Debug(PluginLoggerPrefix + " Semantic search skipped: " + err.Error() + " (" + describeRequestShape(req) + ")") - return req, nil, nil - } - - if shortCircuit != nil { - return req, shortCircuit, nil - } - } else if !performSemanticSearch && plugin.store.RequiresVectors() && plugin.embeddingRequestExecutor != nil { - // Vector store requires vectors but we're in direct-only mode - // Generate embeddings for storage purposes (not for searching) - if req.EmbeddingRequest != nil || req.TranscriptionRequest != nil { - plugin.logger.Debug(PluginLoggerPrefix + " Skipping embedding generation for embedding/transcription input") - // For vector stores that require vectors, set a zero vector placeholder - // This allows direct hash matching to work without the overhead of generating embeddings - if plugin.config.Dimension > 0 { - zeroVector := make([]float32, plugin.config.Dimension) - ctx.SetValue(requestEmbeddingKey, zeroVector) - plugin.logger.Debug(PluginLoggerPrefix + " Using zero vector placeholder for embedding/transcription request storage") + if performSemanticSearch { + // Suppress semantic for ineligible cases (no executor, or request + // types whose input cannot itself be embedded). + semanticEligible := plugin.embeddingRequestExecutor != nil && + req.EmbeddingRequest == nil && + req.TranscriptionRequest == nil + if !semanticEligible { + plugin.setZeroVectorIfRequired(state) + } else { + shortCircuit, err := plugin.performSemanticSearch(ctx, state, req, cacheKey, paramsHash) + if err != nil { + // Embedding failures (rate-limit, auth, timeout) are + // operationally important — surface at Warn and on the response. + msg := fmt.Sprintf("semantic search skipped: %v", err) + plugin.logger.Warn(msg) + ctx.Log(schemas.LogLevelWarn, msg) + } else if shortCircuit != nil { + return req, shortCircuit, nil } - return req, nil, nil - } - - // Use zero vector for direct-only cache type to prevent semantic search matches - // This preserves cache type isolation - direct-only entries won't be found by semantic search - if plugin.config.Dimension > 0 { - zeroVector := make([]float32, plugin.config.Dimension) - ctx.SetValue(requestEmbeddingKey, zeroVector) - plugin.logger.Debug(PluginLoggerPrefix + " Using zero vector for direct-only cache storage (preserves isolation)") } + } else if !performSemanticSearch { + // Direct-only mode. If the vector store requires vectors for every entry + // (Qdrant, Pinecone) we write a zero vector. Note: this collapses all + // direct-only entries onto the same point in vector space, so a + // semantic search across cache types under the same cache_key/params + // could surface them. params_hash filtering is the actual isolation. + plugin.setZeroVectorIfRequired(state) } return req, nil, nil } -// PostLLMHook is called after a response is received from a provider. -// It caches responses in the VectorStore using UUID-based keys with unified metadata structure -// including provider, model, request hash, and TTL. Handles both single and streaming responses. -// -// The function performs the following operations: -// 1. Checks configurable caching behavior and skips caching for unsuccessful responses if configured -// 2. Retrieves the request hash and ID from the context (set during PreLLMHook) -// 3. Marshals the response for storage -// 4. Stores the unified cache entry in the VectorStore asynchronously (non-blocking) -// -// The VectorStore Add operation runs in a separate goroutine to avoid blocking the response. -// The function gracefully handles errors and continues without caching if any step fails, -// ensuring that response processing is never interrupted by caching issues. -// -// Parameters: -// - ctx: Pointer to the schemas.BifrostContext containing the request hash and ID -// - res: The response from the provider to be cached -// - bifrostErr: The error from the provider, if any (used for success determination) -// -// Returns: -// - *schemas.BifrostResponse: The original response, unmodified -// - *schemas.BifrostError: The original error, unmodified -// - error: Any error that occurred during caching preparation (always nil as errors are handled gracefully) -func (plugin *Plugin) PostLLMHook(ctx *schemas.BifrostContext, res *schemas.BifrostResponse, bifrostErr *schemas.BifrostError) (*schemas.BifrostResponse, *schemas.BifrostError, error) { - if bifrostErr != nil { - return res, bifrostErr, nil +// resolveCacheKey returns the per-request cache key (or the configured default) +// and a bool indicating whether the caller should proceed with caching. +func (plugin *Plugin) resolveCacheKey(ctx *schemas.BifrostContext) (string, bool) { + if cacheKey, ok := ctx.Value(CacheKey).(string); ok && cacheKey != "" { + return cacheKey, true } - - // Skip caching for large payloads — body is too large to materialize for cache storage - if isLargePayload, ok := ctx.Value(schemas.BifrostContextKeyLargePayloadMode).(bool); ok && isLargePayload { - plugin.logger.Debug(PluginLoggerPrefix + " Skipping semantic cache for large payload request") - return res, nil, nil - } - if isLargeResponse, ok := ctx.Value(schemas.BifrostContextKeyLargeResponseMode).(bool); ok && isLargeResponse { - plugin.logger.Debug(PluginLoggerPrefix + " Skipping semantic cache for large payload response") - return res, nil, nil + if plugin.config.DefaultCacheKey != "" { + return plugin.config.DefaultCacheKey, true } + return "", false +} - isCacheHit := ctx.Value(isCacheHitKey) - if isCacheHit != nil { - isCacheHitValue, ok := isCacheHit.(bool) - if ok && isCacheHitValue { - return res, nil, nil - } +// resolveCacheTypes returns whether direct and semantic search paths should +// run for this request. Defaults both to true; an explicit CacheTypeKey on +// the context narrows to just one. +func (plugin *Plugin) resolveCacheTypes(ctx *schemas.BifrostContext) (direct bool, semantic bool) { + direct, semantic = true, true + ctxVal := ctx.Value(CacheTypeKey) + if ctxVal == nil { + return } + cacheTypeVal, ok := ctxVal.(CacheType) + if !ok { + msg := fmt.Sprintf("CacheTypeKey is not a CacheType (got %T), using all available cache types", ctxVal) + plugin.logger.Warn(msg) + ctx.Log(schemas.LogLevelWarn, msg) + return + } + direct = cacheTypeVal == CacheTypeDirect + semantic = cacheTypeVal == CacheTypeSemantic + return +} - // Check if caching is explicitly disabled - noStore := ctx.Value(CacheNoStoreKey) - if noStore != nil { - noStoreValue, ok := noStore.(bool) - if ok && noStoreValue { - plugin.logger.Debug(PluginLoggerPrefix + " Caching is explicitly disabled for this request, continuing without caching") - return res, nil, nil - } +// setZeroVectorIfRequired writes a zero embedding placeholder when the store +// mandates a vector per entry. See PreLLMHook for the isolation caveat. +func (plugin *Plugin) setZeroVectorIfRequired(state *cacheState) { + if !plugin.store.RequiresVectors() || plugin.config.Dimension <= 0 { + return } + state.Embeddings = make([]float32, plugin.config.Dimension) +} - // Get the cache key from context - cacheKey, ok := ctx.Value(CacheKey).(string) - if !ok || cacheKey == "" { - if plugin.config.DefaultCacheKey != "" { - cacheKey = plugin.config.DefaultCacheKey - } else { - return res, nil, nil - } +// PostLLMHook caches the upstream response keyed by the storageID resolved +// in PreLLMHook (deterministic directCacheID for direct hits, request UUID +// otherwise). The store write runs in a goroutine tracked by writersWg with +// its own background context + CacheSetTimeout, so client cancellation +// after the response is delivered doesn't drop the cache write. Returns the +// response unmodified — caching never alters the request flow. +func (plugin *Plugin) PostLLMHook(ctx *schemas.BifrostContext, res *schemas.BifrostResponse, bifrostErr *schemas.BifrostError) (*schemas.BifrostResponse, *schemas.BifrostError, error) { + if bifrostErr != nil { + // We rely on errors always arriving as the final chunk for streams, so + // we abort caching here without further bookkeeping. Any partial + // accumulator from a prior chunk gets reaped by the periodic cleanup. + return res, bifrostErr, nil } - // Get the request ID from context - requestID, ok := ctx.Value(requestIDKey).(string) + requestID, ok := ctx.Value(schemas.BifrostContextKeyRequestID).(string) if !ok { return res, nil, nil } - storageID := requestID - // When direct lookup prepared a deterministic storage ID, reuse it here so - // default-mode traffic warms the GetChunk fast path instead of only the - // legacy search path. - if v, ok := ctx.Value(requestStorageIDKey).(string); ok && v != "" { - storageID = v - } - // Check cache type to optimize embedding handling - var embedding []float32 - var hash string - var shouldStoreEmbeddings = true - var shouldStoreHash = true - - if ctx.Value(CacheTypeKey) != nil { - cacheTypeVal, ok := ctx.Value(CacheTypeKey).(CacheType) - if ok { - if cacheTypeVal == CacheTypeDirect { - // For direct-only caching, skip embedding operations entirely - // unless the vector store requires vectors for all entries - if plugin.store.RequiresVectors() { - // Vector stores like Qdrant and Pinecone require vectors for all entries - // Keep embeddings enabled for storage, but lookups will still use direct hash matching - plugin.logger.Debug(PluginLoggerPrefix + " Vector store requires vectors, keeping embedding generation enabled for storage") - } else { - shouldStoreEmbeddings = false - plugin.logger.Debug(PluginLoggerPrefix + " Skipping embedding operations for direct-only cache type") - } - } else if cacheTypeVal == CacheTypeSemantic { - shouldStoreHash = false - plugin.logger.Debug(PluginLoggerPrefix + " Skipping hash operations for semantic cache type") - } - } - } - - if shouldStoreHash { - // Get the hash from context - hash, ok = ctx.Value(requestHashKey).(string) - if !ok { - plugin.logger.Warn(PluginLoggerPrefix + " Hash is not a string. Continuing without caching") - return res, nil, nil - } - } extraFields := res.GetExtraFields() requestType := extraFields.RequestType - - // Get embedding from context if available and needed - // For embedding/transcription requests, we still need to retrieve the zero vector placeholder - // if the vector store requires vectors for all entries - isEmbeddingOrTranscription := requestType == schemas.EmbeddingRequest || requestType == schemas.TranscriptionRequest - needsEmbedding := shouldStoreEmbeddings && !isEmbeddingOrTranscription - needsZeroVector := isEmbeddingOrTranscription && plugin.store.RequiresVectors() - - if needsEmbedding || needsZeroVector { - embeddingValue := ctx.Value(requestEmbeddingKey) - if embeddingValue != nil { - embedding, ok = embeddingValue.([]float32) - if !ok { - plugin.logger.Warn(PluginLoggerPrefix + " Embedding is not a []float32, continuing without caching") - return res, nil, nil - } + cacheDebug := extraFields.CacheDebug + + // Final-chunk signaling for cache replays: stampCacheDebugForHit only + // stamps CacheDebug.CacheHit=true on the LAST replay chunk (see search.go). + // When we see that stamp, we set the stream-end indicator on the root ctx + // synchronously — same goroutine as the rest of the post-hook chain. This + // MUST run before shouldSkipCaching, otherwise we early-return without + // setting the indicator and downstream plugins (logging) never see + // isFinalChunk=true on the final replay chunk. + // + // Why not set the indicator from the cache replay goroutine instead? It + // races: the producer can advance to its next iteration (and SetValue) + // while the receiver is still running PostLLMHooks for the previous + // chunk, poisoning that chunk's IsFinalChunk read. + if bifrost.IsStreamRequestType(requestType) && cacheDebug != nil && cacheDebug.CacheHit { + ctx.SetValue(schemas.BifrostContextKeyStreamEndIndicator, true) + } + if plugin.shouldSkipCaching(ctx, res) { + // Clear state on the same condition the defer at the end of this + // function uses — otherwise the early return below leaks *cacheState + // (notably the ~6 KB Embeddings slice) until the periodic reaper + // runs. Non-final stream chunks of an in-flight short-circuit replay + // keep their state because they need it for later chunks. + if !bifrost.IsStreamRequestType(requestType) || bifrost.IsFinalChunk(ctx) { + plugin.clearCacheState(requestID) } - // Note: embedding can be nil for direct cache hits or when semantic search is disabled - // This is fine - we can still cache using direct hash matching (unless store requires vectors) - } - - // Get the provider from context - provider, ok := ctx.Value(requestProviderKey).(schemas.ModelProvider) - if !ok { - plugin.logger.Warn(PluginLoggerPrefix + " Provider is not a schemas.ModelProvider, continuing without caching") return res, nil, nil } - // Get the model from context - model, ok := ctx.Value(requestModelKey).(string) + cacheKey, ok := plugin.resolveCacheKey(ctx) if !ok { - plugin.logger.Warn(PluginLoggerPrefix + " Model is not a string, continuing without caching") return res, nil, nil } - + provider := extraFields.Provider + model := extraFields.OriginalModelRequested + isStream := bifrost.IsStreamRequestType(requestType) isFinalChunk := bifrost.IsFinalChunk(ctx) - // Get the input tokens from context (can be nil if not set) - inputTokens, ok := ctx.Value(requestEmbeddingTokensKey).(int) - if ok { - isStreamRequest := bifrost.IsStreamRequestType(requestType) + state := plugin.getCacheState(requestID) + if state == nil || state.ParamsHash == "" { + // PreLLMHook bailed before computing the params hash (unsupported + // request type, conversation-history threshold, metadata error, + // etc.). Caching now would write an entry without params_hash that + // no future lookup can match. + return res, nil, nil + } - if !isStreamRequest || (isStreamRequest && isFinalChunk) { - if extraFields.CacheDebug == nil { - extraFields.CacheDebug = &schemas.BifrostCacheDebug{} - } - extraFields.CacheDebug.CacheHit = false - extraFields.CacheDebug.ProviderUsed = bifrost.Ptr(string(plugin.config.Provider)) - extraFields.CacheDebug.ModelUsed = bifrost.Ptr(plugin.config.EmbeddingModel) - extraFields.CacheDebug.InputTokens = &inputTokens + // Free state once the request is fully observed. For non-streams that's + // after this PostLLMHook returns; for streams, only on the final chunk. + defer func() { + if !isStream || isFinalChunk { + plugin.clearCacheState(requestID) } + }() + + // PreLLMHook short-circuited from cache; chunks here are the cached + // replay, not a fresh upstream response. shouldSkipCaching only catches + // the FINAL chunk (the only one carrying CacheDebug.CacheHit=true via + // stampCacheDebugForHit) — without this guard the non-final chunks + // would slip into addStreamingResponse and trigger a duplicate write + // at the same directCacheID (Weaviate 422 "id already exists"). + if state.ShortCircuited { + return res, nil, nil } - cacheTTL := plugin.config.TTL + storageID, embedding, shouldStoreEmbeddings := plugin.resolveStorageIDAndEmbedding(ctx, state, requestID, requestType) - ttlValue := ctx.Value(CacheTTLKey) - if ttlValue != nil { - // Get the request TTL from the context - ttl, ok := ttlValue.(time.Duration) - if !ok { - plugin.logger.Warn(PluginLoggerPrefix + " TTL is not a time.Duration, using default TTL") - } else { - cacheTTL = ttl - } - } + plugin.stampCacheDebugForMiss(state, extraFields, storageID, isStream, isFinalChunk) - // Get metadata from context BEFORE goroutine to avoid race conditions - // when the same context is reused across multiple requests - paramsHash, _ := ctx.Value(requestParamsHashKey).(string) - - // Snapshot the response synchronously for the non-streaming cache path. - // Marshaling inside the cache goroutine races with the framework returning - // res upstream and downstream consumers mutating it (CacheDebug, etc.). - // Streaming uses a chunk accumulator that snapshots per-chunk separately. - var singleResponseData []byte - if !bifrost.IsStreamRequestType(requestType) { - var marshalErr error - singleResponseData, marshalErr = json.Marshal(res) - if marshalErr != nil { - plugin.logger.Warn("%s Failed to snapshot response for caching: %v", PluginLoggerPrefix, marshalErr) - return res, nil, nil - } + cacheTTL := plugin.resolveTTL(ctx) + paramsHash := state.ParamsHash + + embeddingToStore := embedding + if !shouldStoreEmbeddings { + embeddingToStore = nil } - // Cache everything in a unified VectorEntry asynchronously to avoid blocking the response - plugin.waitGroup.Add(1) + plugin.writersWg.Add(1) go func() { - defer plugin.waitGroup.Done() - // Create a background context with timeout for the cache operation + defer plugin.writersWg.Done() cacheCtx, cancel := context.WithTimeout(context.Background(), CacheSetTimeout) defer cancel() - // Build unified metadata with provider, model, and all params - unifiedMetadata := plugin.buildUnifiedMetadata(provider, model, paramsHash, hash, cacheKey, cacheTTL) - - // Handle streaming vs non-streaming responses - // Pass nil for embedding if we're in direct-only mode to optimize storage - embeddingToStore := embedding - if !shouldStoreEmbeddings { - embeddingToStore = nil - } - - if bifrost.IsStreamRequestType(requestType) { - if err := plugin.addStreamingResponse(cacheCtx, requestID, storageID, res, bifrostErr, embeddingToStore, unifiedMetadata, cacheTTL, isFinalChunk); err != nil { - plugin.logger.Warn("%s Failed to cache streaming response: %v", PluginLoggerPrefix, err) + unifiedMetadata := plugin.buildUnifiedMetadata(provider, model, paramsHash, cacheKey, cacheTTL) + if isStream { + if err := plugin.addStreamingResponse(cacheCtx, requestID, storageID, res, embeddingToStore, unifiedMetadata, cacheTTL, isFinalChunk); err != nil { + plugin.logger.Warn("Failed to cache streaming response (namespace=%s, id=%s): %v. The cache_id stamped on the response will not resolve on subsequent lookups.", plugin.config.VectorStoreNamespace, storageID, err) } } else { - if err := plugin.addSingleResponse(cacheCtx, storageID, singleResponseData, embeddingToStore, unifiedMetadata, cacheTTL); err != nil { - plugin.logger.Warn("%s Failed to cache single response: %v", PluginLoggerPrefix, err) + if err := plugin.addNonStreamingResponse(cacheCtx, storageID, res, embeddingToStore, unifiedMetadata, cacheTTL); err != nil { + plugin.logger.Warn("Failed to cache single response (namespace=%s, id=%s): %v. The cache_id stamped on the response will not resolve on subsequent lookups.", plugin.config.VectorStoreNamespace, storageID, err) } } }() @@ -677,35 +580,113 @@ func (plugin *Plugin) PostLLMHook(ctx *schemas.BifrostContext, res *schemas.Bifr return res, nil, nil } +// shouldSkipCaching returns true if the response cannot or should not be +// written to the cache (large payload mode, cache hit replay, or explicit +// no-store). +func (plugin *Plugin) shouldSkipCaching(ctx *schemas.BifrostContext, res *schemas.BifrostResponse) bool { + if isLargePayload, ok := ctx.Value(schemas.BifrostContextKeyLargePayloadMode).(bool); ok && isLargePayload { + return true + } + if isLargeResponse, ok := ctx.Value(schemas.BifrostContextKeyLargeResponseMode).(bool); ok && isLargeResponse { + return true + } + if cacheDebug := res.GetExtraFields().CacheDebug; cacheDebug != nil && cacheDebug.CacheHit { + return true + } + if noStore, ok := ctx.Value(CacheNoStoreKey).(bool); ok && noStore { + return true + } + return false +} + +// resolveStorageIDAndEmbedding picks the storage ID (deterministic directCacheID +// when direct search ran, else the request UUID) and resolves the embedding +// from per-request state. shouldStoreEmbeddings is false for explicit +// direct-only requests on stores that don't require vectors — those entries +// skip the embedding column entirely. +func (plugin *Plugin) resolveStorageIDAndEmbedding(ctx *schemas.BifrostContext, state *cacheState, requestID string, requestType schemas.RequestType) (storageID string, embedding []float32, shouldStoreEmbeddings bool) { + storageID = requestID + if state.DirectCacheID != "" { + storageID = state.DirectCacheID + } + + shouldStoreEmbeddings = true + if cacheTypeVal, isCacheType := ctx.Value(CacheTypeKey).(CacheType); isCacheType && cacheTypeVal == CacheTypeDirect && !plugin.store.RequiresVectors() { + shouldStoreEmbeddings = false + } + + isEmbeddingOrTranscription := requestType == schemas.EmbeddingRequest || requestType == schemas.TranscriptionRequest + needsEmbedding := shouldStoreEmbeddings && !isEmbeddingOrTranscription + needsZeroVector := isEmbeddingOrTranscription && plugin.store.RequiresVectors() + + if needsEmbedding || needsZeroVector { + // embedding may still be nil — fine for direct hash matching unless the + // store requires vectors (in which case Add will reject downstream). + embedding = state.Embeddings + } + return storageID, embedding, shouldStoreEmbeddings +} + +// stampCacheDebugForMiss attaches cache miss telemetry to the response. It +// always sets CacheHit=false and CacheID to the storage ID where the entry +// will be written, so the caller can later invalidate via ClearCacheForCacheID. +// Embedding-cost fields (ProviderUsed/ModelUsed/InputTokens) are only stamped +// when semantic search actually ran. For streams, only the final chunk is +// stamped to avoid duplicating telemetry. +func (plugin *Plugin) stampCacheDebugForMiss(state *cacheState, extraFields *schemas.BifrostResponseExtraFields, storageID string, isStream, isFinalChunk bool) { + if isStream && !isFinalChunk { + return + } + if extraFields.CacheDebug == nil { + extraFields.CacheDebug = &schemas.BifrostCacheDebug{} + } + cd := extraFields.CacheDebug + cd.CacheHit = false + cd.CacheID = bifrost.Ptr(storageID) + if state.EmbeddingsInputTokens > 0 { + inputTokens := state.EmbeddingsInputTokens + cd.ProviderUsed = bifrost.Ptr(string(plugin.config.Provider)) + cd.ModelUsed = bifrost.Ptr(plugin.config.EmbeddingModel) + cd.InputTokens = &inputTokens + } +} + +// resolveTTL returns the per-request TTL override if present, else the plugin default. +func (plugin *Plugin) resolveTTL(ctx *schemas.BifrostContext) time.Duration { + if v := ctx.Value(CacheTTLKey); v != nil { + if ttl, ok := v.(time.Duration); ok { + return ttl + } + plugin.logger.Warn("TTL is not a time.Duration, using default TTL") + } + return plugin.config.TTL +} + // WaitForPendingOperations blocks until all pending cache operations (goroutines) complete. // This is useful in tests to ensure cache entries are stored before checking for cache hits. +// It does NOT wait on background loops — those only exit on Cleanup. func (plugin *Plugin) WaitForPendingOperations() { - plugin.waitGroup.Wait() + plugin.writersWg.Wait() } -// Cleanup performs cleanup operations for the semantic cache plugin. -// It removes all cached entries created by this plugin from the VectorStore only if CleanUpOnShutdown is true. -// Identifies cache entries by the presence of semantic cache-specific fields (request_hash, cache_key). -// -// The function performs the following operations: -// 1. Checks if cleanup is enabled via CleanUpOnShutdown config -// 2. Retrieves all entries and filters client-side to identify cache entries -// 3. Deletes all matching cache entries from the VectorStore in batches -// -// This method should be called when shutting down the application to ensure -// proper resource cleanup if configured to do so. -// -// Returns: -// - error: Any error that occurred during cleanup operations +// Cleanup signals the background loops to stop and waits for in-flight cache +// writes to drain before returning. When CleanUpOnShutdown is true, it then +// deletes every entry tagged from_bifrost_semantic_cache_plugin and drops +// the namespace — useful for ephemeral test environments. The default is to +// leave entries in place so they can serve subsequent process restarts. func (plugin *Plugin) Cleanup() error { - plugin.waitGroup.Wait() + close(plugin.stopCh) + plugin.writersWg.Wait() + plugin.cleanupWg.Wait() - // Clean up old stream accumulators first + // Final sweep: the periodic reaper only fires once per streamCleanupInterval, + // so any abandoned accumulator added in the window between the last tick + // and stopCh is still in memory. This call evicts those before we return. plugin.cleanupOldStreamAccumulators() // Only clean up cache entries if configured to do so if !plugin.config.CleanUpOnShutdown { - plugin.logger.Debug(PluginLoggerPrefix + " Cleanup on shutdown is disabled, skipping cache cleanup") + plugin.logger.Debug("Cleanup on shutdown is disabled, skipping cache cleanup") return nil } @@ -713,7 +694,7 @@ func (plugin *Plugin) Cleanup() error { ctx, cancel := context.WithTimeout(context.Background(), CacheSetTimeout) defer cancel() - plugin.logger.Debug(PluginLoggerPrefix + " Starting cleanup of cache entries...") + plugin.logger.Debug("Starting cleanup of cache entries...") // Delete all cache entries created by this plugin queries := []vectorstore.Query{ @@ -731,10 +712,10 @@ func (plugin *Plugin) Cleanup() error { for _, result := range results { if result.Status == vectorstore.DeleteStatusError { - plugin.logger.Warn("%s Failed to delete cache entry: %s", PluginLoggerPrefix, result.Error) + plugin.logger.Warn("Failed to delete cache entry: %s", result.Error) } } - plugin.logger.Info("%s Cleanup completed - deleted all cache entries", PluginLoggerPrefix) + plugin.logger.Debug("Cleanup completed - deleted all cache entries") if err := plugin.store.DeleteNamespace(ctx, plugin.config.VectorStoreNamespace); err != nil { return fmt.Errorf("failed to delete namespace: %w", err) @@ -743,27 +724,17 @@ func (plugin *Plugin) Cleanup() error { return nil } -// SetEmbeddingRequestExecutor sets the embedding request executor for the plugin. -// Needs to be set before the plugin is used. -// -// Parameters: -// - executor: The embedding request executor to set +// SetEmbeddingRequestExecutor wires up the function the plugin uses to call +// out to the embedding provider. Must be set before the plugin starts +// serving traffic; semantic search is silently skipped while it's nil. func (plugin *Plugin) SetEmbeddingRequestExecutor(executor EmbeddingRequestExecutor) { plugin.embeddingRequestExecutor = executor } -// Public Methods for External Use - -// ClearCacheForKey deletes cache entries for a specific cache key. -// Uses the unified VectorStore interface for deletion of all entries with the given cache key. -// -// Parameters: -// - cacheKey: The specific cache key to delete -// -// Returns: -// - error: Any error that occurred during cache key deletion +// ClearCacheForKey deletes every entry written under the given cache_key. +// Use this to invalidate a tenant or feature scope in bulk. Per-entry +// deletion is available via ClearCacheForCacheID. func (plugin *Plugin) ClearCacheForKey(cacheKey string) error { - // Delete all entries with "cache_key" equal to the given cacheKey queries := []vectorstore.Query{ { Field: "cache_key", @@ -781,52 +752,35 @@ func (plugin *Plugin) ClearCacheForKey(cacheKey string) error { defer cancel() results, err := plugin.store.DeleteAll(ctx, plugin.config.VectorStoreNamespace, queries) if err != nil { - plugin.logger.Warn("%s Failed to delete cache entries for key '%s': %v", PluginLoggerPrefix, cacheKey, err) + plugin.logger.Warn("Failed to delete cache entries for key '%s': %v", cacheKey, err) return err } for _, result := range results { if result.Status == vectorstore.DeleteStatusError { - plugin.logger.Warn("%s Failed to delete cache entry for key %s: %s", PluginLoggerPrefix, result.ID, result.Error) + plugin.logger.Warn("Failed to delete cache entry for key %s: %s", result.ID, result.Error) } } - plugin.logger.Debug(fmt.Sprintf("%s Deleted all cache entries for key %s", PluginLoggerPrefix, cacheKey)) + plugin.logger.Debug("Deleted all cache entries for key %s", cacheKey) return nil } -// ClearCacheForRequestID deletes cache entries for a specific request ID. -// Uses the unified VectorStore interface to delete the single entry by its UUID. -// -// Parameters: -// - requestID: The UUID-based request ID to delete cache entries for -// -// Returns: -// - error: Any error that occurred during cache key deletion -func (plugin *Plugin) ClearCacheForRequestID(requestID string) error { - // With the unified VectorStore interface, we delete the single entry by its UUID +// ClearCacheForCacheID deletes a single cache entry by its storage ID. The +// caller obtains the ID from BifrostResponse.ExtraFields.CacheDebug.CacheID, +// which is stamped on both cache hits and cache misses — so the same handle +// works whether the request wrote the entry or read it. +func (plugin *Plugin) ClearCacheForCacheID(cacheID string) error { + if cacheID == "" { + return fmt.Errorf("cache ID is required") + } ctx, cancel := context.WithTimeout(context.Background(), CacheSetTimeout) defer cancel() - if err := plugin.store.Delete(ctx, plugin.config.VectorStoreNamespace, requestID); err != nil { - plugin.logger.Warn("%s Failed to delete cache entry: %v", PluginLoggerPrefix, err) + if err := plugin.store.Delete(ctx, plugin.config.VectorStoreNamespace, cacheID); err != nil { + plugin.logger.Warn("Failed to delete cache entry %s: %v", cacheID, err) return err } - - plugin.logger.Debug(fmt.Sprintf("%s Deleted cache entry for key %s", PluginLoggerPrefix, requestID)) - + plugin.logger.Debug("Deleted cache entry %s", cacheID) return nil } - -func (plugin *Plugin) clearRequestScopedContext(ctx *schemas.BifrostContext) { - ctx.ClearValue(requestIDKey) - ctx.ClearValue(requestStorageIDKey) - ctx.ClearValue(requestHashKey) - ctx.ClearValue(requestParamsHashKey) - ctx.ClearValue(requestModelKey) - ctx.ClearValue(requestProviderKey) - ctx.ClearValue(requestEmbeddingKey) - ctx.ClearValue(requestEmbeddingTokensKey) - ctx.ClearValue(isCacheHitKey) - ctx.ClearValue(cacheHitTypeKey) -} diff --git a/plugins/semanticcache/main_test.go b/plugins/semanticcache/main_test.go new file mode 100644 index 0000000000..0924fa726b --- /dev/null +++ b/plugins/semanticcache/main_test.go @@ -0,0 +1,39 @@ +package semanticcache + +import ( + "context" + "os" + "testing" + "time" + + bifrost "github.com/maximhq/bifrost/core" + "github.com/maximhq/bifrost/core/schemas" + "github.com/maximhq/bifrost/framework/vectorstore" +) + +// TestMain drops the shared test namespace BEFORE the run starts (in case a +// previous run was interrupted and left stale entries) AND once after — both +// matter: tests share one namespace + one cache_key prefix per t.Name(), +// so stale writes from a prior interrupted run would surface as spurious +// cache hits on the first request of the next run. +func TestMain(m *testing.M) { + dropSharedTestNamespace() // pre-run sweep + code := m.Run() + dropSharedTestNamespace() // post-run sweep + os.Exit(code) +} + +func dropSharedTestNamespace() { + cfg := getWeaviateConfigFromEnv() + store, err := vectorstore.NewVectorStore(context.Background(), &vectorstore.Config{ + Type: vectorstore.VectorStoreTypeWeaviate, + Config: cfg, + Enabled: true, + }, bifrost.NewDefaultLogger(schemas.LogLevelError)) + if err != nil { + return + } + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + _ = store.DeleteNamespace(ctx, SharedTestNamespace) +} diff --git a/plugins/semanticcache/plugin_api_test.go b/plugins/semanticcache/plugin_api_test.go new file mode 100644 index 0000000000..908e88149d --- /dev/null +++ b/plugins/semanticcache/plugin_api_test.go @@ -0,0 +1,378 @@ +package semanticcache + +import ( + "context" + "sync" + "sync/atomic" + "testing" + "time" + + bifrost "github.com/maximhq/bifrost/core" + "github.com/maximhq/bifrost/core/schemas" + "github.com/maximhq/bifrost/framework/vectorstore" +) + +// observableStore is a fuller mock than directFastPathStore — it records all +// Delete / DeleteAll / DeleteNamespace calls so the tests can assert on the +// public Clear* APIs and on Cleanup teardown behavior. +type observableStore struct { + mu sync.Mutex + chunks map[string]vectorstore.SearchResult + addIDs []string + deleteIDs []string + deleteAllQueries [][]vectorstore.Query + namespaceDeletes int + deleteAllErr error + deleteErr error + deleteAllResults []vectorstore.DeleteResult +} + +func newObservableStore() *observableStore { + return &observableStore{chunks: make(map[string]vectorstore.SearchResult)} +} + +func (s *observableStore) Ping(ctx context.Context) error { return nil } +func (s *observableStore) CreateNamespace(ctx context.Context, ns string, dim int, props map[string]vectorstore.VectorStoreProperties) error { + return nil +} +func (s *observableStore) DeleteNamespace(ctx context.Context, ns string) error { + s.mu.Lock() + s.namespaceDeletes++ + s.mu.Unlock() + return nil +} +func (s *observableStore) GetChunk(ctx context.Context, ns string, id string) (vectorstore.SearchResult, error) { + s.mu.Lock() + defer s.mu.Unlock() + r, ok := s.chunks[id] + if !ok { + return vectorstore.SearchResult{}, vectorstore.ErrNotFound + } + return r, nil +} +func (s *observableStore) GetChunks(ctx context.Context, ns string, ids []string) ([]vectorstore.SearchResult, error) { + return nil, vectorstore.ErrNotSupported +} +func (s *observableStore) GetAll(ctx context.Context, ns string, q []vectorstore.Query, sf []string, cur *string, lim int64) ([]vectorstore.SearchResult, *string, error) { + return nil, nil, vectorstore.ErrNotSupported +} +func (s *observableStore) GetNearest(ctx context.Context, ns string, v []float32, q []vectorstore.Query, sf []string, th float64, lim int64) ([]vectorstore.SearchResult, error) { + return nil, vectorstore.ErrNotSupported +} +func (s *observableStore) RequiresVectors() bool { return false } +func (s *observableStore) Add(ctx context.Context, ns string, id string, e []float32, m map[string]interface{}) error { + s.mu.Lock() + s.addIDs = append(s.addIDs, id) + s.chunks[id] = vectorstore.SearchResult{ID: id, Properties: m} + s.mu.Unlock() + return nil +} +func (s *observableStore) Delete(ctx context.Context, ns string, id string) error { + s.mu.Lock() + s.deleteIDs = append(s.deleteIDs, id) + delete(s.chunks, id) + err := s.deleteErr + s.mu.Unlock() + return err +} +func (s *observableStore) DeleteAll(ctx context.Context, ns string, queries []vectorstore.Query) ([]vectorstore.DeleteResult, error) { + s.mu.Lock() + s.deleteAllQueries = append(s.deleteAllQueries, queries) + results := s.deleteAllResults + err := s.deleteAllErr + s.mu.Unlock() + return results, err +} +func (s *observableStore) Close(ctx context.Context, ns string) error { return nil } + +func newTestPlugin(t *testing.T, store vectorstore.VectorStore, cleanupOnShutdown bool) *Plugin { + t.Helper() + cfg := getDefaultTestConfig() + cfg.CleanUpOnShutdown = cleanupOnShutdown + return &Plugin{ + store: store, + config: cfg, + logger: bifrost.NewDefaultLogger(schemas.LogLevelDebug), + stopCh: make(chan struct{}), + } +} + +// ----------------------------------------------------------------------------- +// ClearCacheForCacheID +// ----------------------------------------------------------------------------- + +func TestClearCacheForCacheID_EmptyIDRejected(t *testing.T) { + plugin := newTestPlugin(t, newObservableStore(), false) + if err := plugin.ClearCacheForCacheID(""); err == nil { + t.Fatal("expected error for empty cache ID") + } +} + +func TestClearCacheForCacheID_PointDelete(t *testing.T) { + store := newObservableStore() + plugin := newTestPlugin(t, store, false) + + if err := plugin.ClearCacheForCacheID("cache-abc"); err != nil { + t.Fatalf("ClearCacheForCacheID failed: %v", err) + } + store.mu.Lock() + defer store.mu.Unlock() + if len(store.deleteIDs) != 1 || store.deleteIDs[0] != "cache-abc" { + t.Fatalf("expected single Delete call for 'cache-abc', got %v", store.deleteIDs) + } +} + +// ----------------------------------------------------------------------------- +// ClearCacheForKey +// ----------------------------------------------------------------------------- + +func TestClearCacheForKey_FiltersByCacheKeyAndPluginMarker(t *testing.T) { + store := newObservableStore() + plugin := newTestPlugin(t, store, false) + + if err := plugin.ClearCacheForKey("session-42"); err != nil { + t.Fatalf("ClearCacheForKey failed: %v", err) + } + + store.mu.Lock() + defer store.mu.Unlock() + if len(store.deleteAllQueries) != 1 { + t.Fatalf("expected one DeleteAll call, got %d", len(store.deleteAllQueries)) + } + queries := store.deleteAllQueries[0] + gotKey, gotMarker := false, false + for _, q := range queries { + if q.Field == "cache_key" && q.Value == "session-42" && q.Operator == vectorstore.QueryOperatorEqual { + gotKey = true + } + if q.Field == "from_bifrost_semantic_cache_plugin" && q.Value == true { + gotMarker = true + } + } + if !gotKey { + t.Errorf("expected cache_key=session-42 filter, got %+v", queries) + } + if !gotMarker { + t.Errorf("expected from_bifrost_semantic_cache_plugin=true filter, got %+v", queries) + } +} + +// ----------------------------------------------------------------------------- +// stampCacheDebugForMiss +// ----------------------------------------------------------------------------- + +func TestStampCacheDebugForMiss_AlwaysSetsCacheID(t *testing.T) { + plugin := newTestPlugin(t, newObservableStore(), false) + state := &cacheState{} + extra := &schemas.BifrostResponseExtraFields{} + + plugin.stampCacheDebugForMiss(state, extra, "stored-id-123", false, false) + + if extra.CacheDebug == nil { + t.Fatal("expected CacheDebug to be stamped on miss") + } + if extra.CacheDebug.CacheHit { + t.Fatal("expected CacheHit=false on miss") + } + if extra.CacheDebug.CacheID == nil || *extra.CacheDebug.CacheID != "stored-id-123" { + t.Fatalf("expected CacheID=stored-id-123, got %v", extra.CacheDebug.CacheID) + } + // No semantic search ran → embedding fields should be unset. + if extra.CacheDebug.ProviderUsed != nil || extra.CacheDebug.InputTokens != nil { + t.Fatalf("expected embedding fields nil on direct-only miss, got %+v", extra.CacheDebug) + } +} + +func TestStampCacheDebugForMiss_AddsTelemetryWhenSemanticRan(t *testing.T) { + plugin := newTestPlugin(t, newObservableStore(), false) + state := &cacheState{EmbeddingsInputTokens: 42} + extra := &schemas.BifrostResponseExtraFields{} + + plugin.stampCacheDebugForMiss(state, extra, "id-x", false, false) + + if extra.CacheDebug.InputTokens == nil || *extra.CacheDebug.InputTokens != 42 { + t.Fatalf("expected InputTokens=42, got %v", extra.CacheDebug.InputTokens) + } + if extra.CacheDebug.ProviderUsed == nil { + t.Fatal("expected ProviderUsed to be stamped when semantic ran") + } +} + +func TestStampCacheDebugForMiss_StreamSkipsNonFinalChunks(t *testing.T) { + plugin := newTestPlugin(t, newObservableStore(), false) + state := &cacheState{} + extra := &schemas.BifrostResponseExtraFields{} + + plugin.stampCacheDebugForMiss(state, extra, "id-y", true, false) // mid-stream + + if extra.CacheDebug != nil { + t.Fatal("expected mid-stream chunk to NOT be stamped") + } +} + +// ----------------------------------------------------------------------------- +// Cleanup +// ----------------------------------------------------------------------------- + +func TestCleanup_SkipsEntryDeletionWhenDisabled(t *testing.T) { + store := newObservableStore() + plugin := newTestPlugin(t, store, false) // CleanUpOnShutdown=false + + if err := plugin.Cleanup(); err != nil { + t.Fatalf("Cleanup failed: %v", err) + } + + store.mu.Lock() + defer store.mu.Unlock() + if len(store.deleteAllQueries) != 0 { + t.Errorf("expected no DeleteAll calls when cleanup disabled, got %d", len(store.deleteAllQueries)) + } + if store.namespaceDeletes != 0 { + t.Errorf("expected no DeleteNamespace calls when cleanup disabled, got %d", store.namespaceDeletes) + } +} + +func TestCleanup_DeletesEntriesAndNamespaceWhenEnabled(t *testing.T) { + store := newObservableStore() + plugin := newTestPlugin(t, store, true) // CleanUpOnShutdown=true + + if err := plugin.Cleanup(); err != nil { + t.Fatalf("Cleanup failed: %v", err) + } + + store.mu.Lock() + defer store.mu.Unlock() + if len(store.deleteAllQueries) != 1 { + t.Fatalf("expected one DeleteAll call, got %d", len(store.deleteAllQueries)) + } + if store.namespaceDeletes != 1 { + t.Fatalf("expected one DeleteNamespace call, got %d", store.namespaceDeletes) + } +} + +func TestCleanup_DrainsPendingWriters(t *testing.T) { + plugin := newTestPlugin(t, newObservableStore(), false) + + var done atomic.Bool + plugin.writersWg.Add(1) + go func() { + defer plugin.writersWg.Done() + time.Sleep(50 * time.Millisecond) + done.Store(true) + }() + + if err := plugin.Cleanup(); err != nil { + t.Fatalf("Cleanup failed: %v", err) + } + if !done.Load() { + t.Fatal("expected Cleanup to wait for pending writers to finish") + } +} + +// ----------------------------------------------------------------------------- +// cacheState reaper +// ----------------------------------------------------------------------------- + +func TestCleanupOldCacheStates_ReapsOldEntries(t *testing.T) { + plugin := newTestPlugin(t, newObservableStore(), false) + + plugin.cacheStates.Store("old-1", &cacheState{CreatedAt: time.Now().Add(-2 * cacheStateMaxAge)}) + plugin.cacheStates.Store("old-2", &cacheState{CreatedAt: time.Now().Add(-2 * cacheStateMaxAge)}) + plugin.cacheStates.Store("recent", &cacheState{CreatedAt: time.Now()}) + + plugin.cleanupOldCacheStates() + + if _, ok := plugin.cacheStates.Load("old-1"); ok { + t.Error("expected old-1 to be reaped") + } + if _, ok := plugin.cacheStates.Load("old-2"); ok { + t.Error("expected old-2 to be reaped") + } + if _, ok := plugin.cacheStates.Load("recent"); !ok { + t.Error("expected recent to be preserved") + } +} + +// ----------------------------------------------------------------------------- +// Stream accumulator reaper +// ----------------------------------------------------------------------------- + +func TestCleanupOldStreamAccumulators_ReapsByLastSeenAt(t *testing.T) { + plugin := newTestPlugin(t, newObservableStore(), false) + + plugin.streamAccumulators.Store("old", &StreamAccumulator{ + RequestID: "old", + LastSeenAt: time.Now().Add(-2 * streamAccumulatorMaxAge), + }) + plugin.streamAccumulators.Store("recent", &StreamAccumulator{ + RequestID: "recent", + LastSeenAt: time.Now(), + }) + + plugin.cleanupOldStreamAccumulators() + + if _, ok := plugin.streamAccumulators.Load("old"); ok { + t.Error("expected old accumulator to be reaped") + } + if _, ok := plugin.streamAccumulators.Load("recent"); !ok { + t.Error("expected recent accumulator to be preserved") + } +} + +// ----------------------------------------------------------------------------- +// Replay goroutine cancellation (buildStreamingResponseFromResult) +// ----------------------------------------------------------------------------- + +func TestBuildStreamingResponseFromResult_ConsumerAbandonment(t *testing.T) { + plugin := newTestPlugin(t, newObservableStore(), false) + + // Build a cached entry with multiple chunks. + chunkJSON := `{"chat_response":{"choices":[]}}` + streamArray := []string{chunkJSON, chunkJSON, chunkJSON, chunkJSON, chunkJSON} + + req := &schemas.BifrostRequest{ + RequestType: schemas.ChatCompletionStreamRequest, + ChatRequest: CreateBasicChatRequest("hi", 0.7, 50), + } + ctx := newBaseTestContext() + state := &cacheState{} + + sc, err := plugin.buildStreamingResponseFromResult( + ctx, state, req, + vectorstore.SearchResult{ID: "stream-id"}, + streamArray, CacheTypeSemantic, nil, nil, nil, + ) + if err != nil { + t.Fatalf("buildStreamingResponseFromResult failed: %v", err) + } + if sc == nil || sc.Stream == nil { + t.Fatal("expected a stream short-circuit") + } + + // Read one chunk, then cancel ctx — the replay goroutine should exit + // (close the channel) instead of blocking on its send forever. + // Guard the first receive so a regression that stalls the producer + // fails fast instead of hanging until the suite-level timeout. + select { + case _, ok := <-sc.Stream: + if !ok { + t.Fatal("expected first replay chunk before cancellation, channel closed early") + } + case <-time.After(2 * time.Second): + t.Fatal("replay goroutine did not emit the first chunk") + } + ctx.Cancel() + + // Drain remaining; channel must close within a reasonable bound. + timeout := time.After(2 * time.Second) + for { + select { + case _, ok := <-sc.Stream: + if !ok { + return // channel closed → replay goroutine exited cleanly ✓ + } + case <-timeout: + t.Fatal("replay goroutine did not exit after ctx.Cancel()") + } + } +} diff --git a/plugins/semanticcache/plugin_cache_type_test.go b/plugins/semanticcache/plugin_cache_type_test.go index ee28902ae8..9d8d655a1d 100644 --- a/plugins/semanticcache/plugin_cache_type_test.go +++ b/plugins/semanticcache/plugin_cache_type_test.go @@ -2,7 +2,6 @@ package semanticcache import ( "context" - "errors" "sync" "testing" "time" @@ -14,24 +13,25 @@ import ( // TestCacheTypeDirectOnly tests that CacheTypeKey set to "direct" only performs direct hash matching func TestCacheTypeDirectOnly(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() // First, cache a response using CacheTypeDirect so it is stored under the deterministic ID - ctx1 := CreateContextWithCacheKeyAndType("test-cache-type-direct", CacheTypeDirect) + ctx1 := CreateContextWithCacheKeyAndType(t, "test-cache-type-direct", CacheTypeDirect) testRequest := CreateBasicChatRequest("What is Bifrost?", 0.7, 50) t.Log("Making first request to populate cache...") response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) WaitForCache(setup.Plugin) // Now test with CacheTypeKey set to direct only - ctx2 := CreateContextWithCacheKeyAndType("test-cache-type-direct", CacheTypeDirect) + ctx2 := CreateContextWithCacheKeyAndType(t, "test-cache-type-direct", CacheTypeDirect) t.Log("Making second request with CacheTypeKey=direct...") response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest) @@ -47,17 +47,18 @@ func TestCacheTypeDirectOnly(t *testing.T) { // TestCacheTypeSemanticOnly tests that CacheTypeKey set to "semantic" only performs semantic search func TestCacheTypeSemanticOnly(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() // First, cache a response using normal behavior - ctx1 := CreateContextWithCacheKey("test-cache-type-semantic") + ctx1 := CreateContextWithCacheKey(t, "test-cache-type-semantic") testRequest := CreateBasicChatRequest("Explain machine learning concepts", 0.7, 50) t.Log("Making first request to populate cache...") response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) @@ -67,7 +68,7 @@ func TestCacheTypeSemanticOnly(t *testing.T) { similarRequest := CreateBasicChatRequest("Can you explain concepts in machine learning", 0.7, 50) // Try with semantic-only search - ctx2 := CreateContextWithCacheKeyAndType("test-cache-type-semantic", CacheTypeSemantic) + ctx2 := CreateContextWithCacheKeyAndType(t, "test-cache-type-semantic", CacheTypeSemantic) t.Log("Making second request with similar content and CacheTypeKey=semantic...") response2, err2 := setup.Client.ChatCompletionRequest(ctx2, similarRequest) @@ -79,9 +80,14 @@ func TestCacheTypeSemanticOnly(t *testing.T) { } } - // This might be a cache hit if semantic similarity is high enough - // The test validates that semantic search is attempted - if response2.ExtraFields.CacheDebug != nil && response2.ExtraFields.CacheDebug.CacheHit { + // This might be a cache hit if semantic similarity is high enough. + // Hit/miss is similarity-dependent, but CacheDebug must be stamped either + // way — semantic search ran. This catches a regression where the stamping + // stops without making the test flake on similarity scores. + if response2.ExtraFields.CacheDebug == nil { + t.Fatal("expected CacheDebug to be stamped on the response (semantic search should have run)") + } + if response2.ExtraFields.CacheDebug.CacheHit { AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "semantic") t.Log("✅ CacheTypeKey=semantic correctly found semantic match") } else { @@ -94,24 +100,25 @@ func TestCacheTypeSemanticOnly(t *testing.T) { // TestCacheTypeDirectWithSemanticFallback tests the default behavior (both direct and semantic) func TestCacheTypeDirectWithSemanticFallback(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() // Cache a response first - ctx1 := CreateContextWithCacheKey("test-cache-type-fallback") + ctx1 := CreateContextWithCacheKey(t, "test-cache-type-fallback") testRequest := CreateBasicChatRequest("Define artificial intelligence", 0.7, 50) t.Log("Making first request to populate cache...") response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) WaitForCache(setup.Plugin) // Test exact match (should hit direct cache) - ctx2 := CreateContextWithCacheKey("test-cache-type-fallback") + ctx2 := CreateContextWithCacheKey(t, "test-cache-type-fallback") t.Log("Making second identical request (should hit direct cache)...") response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest) @@ -133,8 +140,12 @@ func TestCacheTypeDirectWithSemanticFallback(t *testing.T) { t.Fatalf("Third request failed: %v", err3) } - // May or may not be a cache hit depending on semantic similarity - if response3.ExtraFields.CacheDebug != nil && response3.ExtraFields.CacheDebug.CacheHit { + // May or may not be a cache hit depending on semantic similarity, but + // CacheDebug must be stamped (regression guard). + if response3.ExtraFields.CacheDebug == nil { + t.Fatal("expected CacheDebug to be stamped on the response") + } + if response3.ExtraFields.CacheDebug.CacheHit { AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}, "semantic") t.Log("✅ Default behavior correctly found semantic match") } else { @@ -145,49 +156,66 @@ func TestCacheTypeDirectWithSemanticFallback(t *testing.T) { t.Log("✅ Default behavior correctly attempts both direct and semantic search") } -// TestCacheTypeInvalidValue tests behavior with invalid CacheTypeKey values +// TestCacheTypeInvalidValue tests behavior with invalid CacheTypeKey values: +// the plugin must fall back to default behavior (try both direct + semantic) +// rather than disable caching entirely. func TestCacheTypeInvalidValue(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - // Create context with invalid cache type - ctx := CreateContextWithCacheKey("test-invalid-cache-type") - ctx = ctx.WithValue(CacheTypeKey, "invalid_type") - testRequest := CreateBasicChatRequest("Test invalid cache type", 0.7, 50) - t.Log("Making request with invalid CacheTypeKey value...") - response, err := setup.Client.ChatCompletionRequest(ctx, testRequest) + // First request with invalid CacheTypeKey — must be a miss but ALSO must + // have caused the response to be cached (fallback to default behavior). + ctx1 := CreateContextWithCacheKey(t, "test-invalid-cache-type") + ctx1 = ctx1.WithValue(CacheTypeKey, "invalid_type") + + t.Log("Making first request with invalid CacheTypeKey value...") + response1, err := setup.Client.ChatCompletionRequest(ctx1, testRequest) if err != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err) } + AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) - // Should fall back to default behavior (both direct and semantic) - AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response}) + WaitForCache(setup.Plugin) - t.Log("✅ Invalid CacheTypeKey value falls back to default behavior") + // Second identical request — fallback should mean the entry was written + // the first time, so this must hit (proves the invalid value didn't + // disable caching as a side effect). + ctx2 := CreateContextWithCacheKey(t, "test-invalid-cache-type") + ctx2 = ctx2.WithValue(CacheTypeKey, "invalid_type") + t.Log("Making second identical request — must hit cache, proving fallback to default cached the first call...") + response2, err := setup.Client.ChatCompletionRequest(ctx2, testRequest) + if err != nil { + t.Fatalf("Second request failed: %v", err) + } + AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, string(CacheTypeDirect)) + + t.Log("✅ Invalid CacheTypeKey value falls back to default behavior (caching works)") } // TestCacheTypeWithEmbeddingRequests tests CacheTypeKey behavior with embedding requests func TestCacheTypeWithEmbeddingRequests(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() embeddingRequest := CreateEmbeddingRequest([]string{"Test embedding with cache type"}) // Cache first request - ctx1 := CreateContextWithCacheKey("test-embedding-cache-type") + ctx1 := CreateContextWithCacheKey(t, "test-embedding-cache-type") t.Log("Making first embedding request...") response1, err1 := setup.Client.EmbeddingRequest(ctx1, embeddingRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response1}) WaitForCache(setup.Plugin) // Test with direct-only cache type - ctx2 := CreateContextWithCacheKeyAndType("test-embedding-cache-type", CacheTypeDirect) + ctx2 := CreateContextWithCacheKeyAndType(t, "test-embedding-cache-type", CacheTypeDirect) t.Log("Making second embedding request with CacheTypeKey=direct...") response2, err2 := setup.Client.EmbeddingRequest(ctx2, embeddingRequest) if err2 != nil { @@ -200,7 +228,7 @@ func TestCacheTypeWithEmbeddingRequests(t *testing.T) { AssertCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response2}, "direct") // Test with semantic-only cache type (should not find semantic match for embeddings) - ctx3 := CreateContextWithCacheKeyAndType("test-embedding-cache-type", CacheTypeSemantic) + ctx3 := CreateContextWithCacheKeyAndType(t, "test-embedding-cache-type", CacheTypeSemantic) t.Log("Making third embedding request with CacheTypeKey=semantic...") response3, err3 := setup.Client.EmbeddingRequest(ctx3, embeddingRequest) if err3 != nil { @@ -214,24 +242,25 @@ func TestCacheTypeWithEmbeddingRequests(t *testing.T) { // TestCacheTypePerformanceCharacteristics tests that different cache types have expected performance func TestCacheTypePerformanceCharacteristics(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() testRequest := CreateBasicChatRequest("Performance test for cache types", 0.7, 50) // Cache first request using CacheTypeDirect so it is stored under the deterministic ID - ctx1 := CreateContextWithCacheKeyAndType("test-cache-performance", CacheTypeDirect) + ctx1 := CreateContextWithCacheKeyAndType(t, "test-cache-performance", CacheTypeDirect) t.Log("Making first request to populate cache...") response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) WaitForCache(setup.Plugin) // Test direct-only performance - ctx2 := CreateContextWithCacheKeyAndType("test-cache-performance", CacheTypeDirect) + ctx2 := CreateContextWithCacheKeyAndType(t, "test-cache-performance", CacheTypeDirect) start2 := time.Now() response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest) duration2 := time.Since(start2) @@ -243,7 +272,7 @@ func TestCacheTypePerformanceCharacteristics(t *testing.T) { t.Logf("Direct cache lookup took: %v", duration2) // Test default behavior (both direct and semantic) performance - ctx3 := CreateContextWithCacheKey("test-cache-performance") + ctx3 := CreateContextWithCacheKey(t, "test-cache-performance") start3 := time.Now() response3, err3 := setup.Client.ChatCompletionRequest(ctx3, testRequest) duration3 := time.Since(start3) @@ -254,8 +283,17 @@ func TestCacheTypePerformanceCharacteristics(t *testing.T) { t.Logf("Default cache lookup took: %v", duration3) - // Both should be fast since they hit direct cache - // Direct-only might be slightly faster as it doesn't need to prepare for semantic fallback + // Both lookups hit direct cache so both must be substantially faster than + // a real upstream call. Compare against an upper bound rather than each + // other (relative comparisons flake under CI load); 1s is generous and + // still proves a cached lookup didn't silently hit the network. + const upperBoundForCacheLookup = 1 * time.Second + if duration2 > upperBoundForCacheLookup { + t.Errorf("direct-only cache lookup took %v, expected < %v (provider likely called)", duration2, upperBoundForCacheLookup) + } + if duration3 > upperBoundForCacheLookup { + t.Errorf("default-mode cache lookup took %v, expected < %v (provider likely called)", duration3, upperBoundForCacheLookup) + } t.Log("✅ Cache type performance characteristics validated") } @@ -367,7 +405,7 @@ func TestDirectCacheHitPreservesCachedProviderMetadataAcrossProviders(t *testing const cacheKey = "cross-provider-direct-single" const prompt = "Explain green threading in Go in one short sentence." - seedCtx := CreateContextWithCacheKeyAndType(cacheKey, CacheTypeDirect) + seedCtx := CreateContextWithCacheKeyAndType(t, cacheKey, CacheTypeDirect) seedReq := newCrossProviderChatRequest(schemas.OpenAI, "gpt-5.2", schemas.ChatCompletionRequest, prompt) _, shortCircuit, err := plugin.PreLLMHook(seedCtx, seedReq) @@ -407,7 +445,7 @@ func TestDirectCacheHitPreservesCachedProviderMetadataAcrossProviders(t *testing } plugin.WaitForPendingOperations() - hitCtx := CreateContextWithCacheKeyAndType(cacheKey, CacheTypeDirect) + hitCtx := CreateContextWithCacheKeyAndType(t, cacheKey, CacheTypeDirect) hitReq := newCrossProviderChatRequest(schemas.Anthropic, "claude-sonnet-4-6", schemas.ChatCompletionRequest, prompt) _, shortCircuit, err = plugin.PreLLMHook(hitCtx, hitReq) @@ -461,7 +499,7 @@ func TestStreamingDirectCacheHitPreservesCachedProviderMetadataAcrossProviders(t const cacheKey = "cross-provider-direct-stream" const prompt = "Explain green threading in Go in one short sentence." - seedCtx := CreateContextWithCacheKeyAndType(cacheKey, CacheTypeDirect) + seedCtx := CreateContextWithCacheKeyAndType(t, cacheKey, CacheTypeDirect) seedReq := newCrossProviderChatRequest(schemas.OpenAI, "gpt-5.2", schemas.ChatCompletionStreamRequest, prompt) _, shortCircuit, err := plugin.PreLLMHook(seedCtx, seedReq) @@ -514,7 +552,7 @@ func TestStreamingDirectCacheHitPreservesCachedProviderMetadataAcrossProviders(t plugin.WaitForPendingOperations() } - hitCtx := CreateContextWithCacheKeyAndType(cacheKey, CacheTypeDirect) + hitCtx := CreateContextWithCacheKeyAndType(t, cacheKey, CacheTypeDirect) hitReq := newCrossProviderChatRequest(schemas.Anthropic, "claude-sonnet-4-6", schemas.ChatCompletionStreamRequest, prompt) _, shortCircuit, err = plugin.PreLLMHook(hitCtx, hitReq) @@ -564,6 +602,29 @@ func TestStreamingDirectCacheHitPreservesCachedProviderMetadataAcrossProviders(t } } +// runDirectSearchForTest is a small helper for the unit tests that directly +// exercise performDirectSearch. It builds the metadata + paramsHash + state +// the way PreLLMHook would and then calls the search. +func runDirectSearchForTest(t *testing.T, plugin *Plugin, ctx *schemas.BifrostContext, req *schemas.BifrostRequest, cacheKey string) (*cacheState, *schemas.LLMPluginShortCircuit, error) { + t.Helper() + requestID, _ := ctx.Value(schemas.BifrostContextKeyRequestID).(string) + if requestID == "" { + t.Fatal("test context is missing request ID") + } + state := plugin.createCacheState(requestID) + metadata, err := plugin.buildRequestMetadataForCaching(state, req) + if err != nil { + t.Fatalf("buildRequestMetadataForCaching failed: %v", err) + } + paramsHash, err := hashMap(metadata) + if err != nil { + t.Fatalf("hashMap failed: %v", err) + } + state.ParamsHash = paramsHash + sc, err := plugin.performDirectSearch(ctx, state, req, cacheKey, metadata, paramsHash) + return state, sc, err +} + func TestCacheTypeDirectUsesChunkLookup(t *testing.T) { logger := bifrost.NewDefaultLogger(schemas.LogLevelDebug) store := newDirectFastPathStore() @@ -578,10 +639,15 @@ func TestCacheTypeDirectUsesChunkLookup(t *testing.T) { ChatRequest: CreateBasicChatRequest("What is Bifrost?", 0.7, 50), } - ctx := CreateContextWithCacheKeyAndType("chunk-fast-path", CacheTypeDirect) - directID, err := plugin.prepareDirectCacheLookup(ctx, req, "chunk-fast-path") + // First pass: warm the deterministic cache ID and learn what it is. + ctx := CreateContextWithCacheKeyAndType(t, "chunk-fast-path", CacheTypeDirect) + state, _, err := runDirectSearchForTest(t, plugin, ctx, req, "chunk-fast-path") if err != nil { - t.Fatalf("prepareDirectCacheLookup failed: %v", err) + t.Fatalf("performDirectSearch failed: %v", err) + } + directID := state.DirectCacheID + if directID == "" { + t.Fatal("expected DirectCacheID to be populated") } cachedContent := "cached response" @@ -614,15 +680,18 @@ func TestCacheTypeDirectUsesChunkLookup(t *testing.T) { }, } - shortCircuit, err := plugin.performDirectChunkLookup(ctx, req, "chunk-fast-path") + // Second pass: should hit the chunk we just stored, via point-fetch only. + priorChunkCalls := store.getChunkCalls + ctx2 := CreateContextWithCacheKeyAndType(t, "chunk-fast-path", CacheTypeDirect) + _, shortCircuit, err := runDirectSearchForTest(t, plugin, ctx2, req, "chunk-fast-path") if err != nil { - t.Fatalf("performDirectChunkLookup failed: %v", err) + t.Fatalf("second performDirectSearch failed: %v", err) } if shortCircuit == nil || shortCircuit.Response == nil || shortCircuit.Response.ChatResponse == nil { t.Fatal("expected direct chunk lookup to return cached response") } - if store.getChunkCalls != 1 { - t.Fatalf("expected one GetChunk call, got %d", store.getChunkCalls) + if store.getChunkCalls != priorChunkCalls+1 { + t.Fatalf("expected one additional GetChunk call, got %d total", store.getChunkCalls) } if store.getAllCalls != 0 { t.Fatalf("expected no GetAll calls, got %d", store.getAllCalls) @@ -646,22 +715,22 @@ func TestDefaultDirectSearchSetsStorageIDForDeterministicWrites(t *testing.T) { ChatRequest: CreateBasicChatRequest("What is Bifrost?", 0.7, 50), } - ctx := CreateContextWithCacheKey("default-mode") - _, err := plugin.performDirectSearch(ctx, req, "default-mode") - if err != nil && !errors.Is(err, vectorstore.ErrNotSupported) { + ctx := CreateContextWithCacheKey(t, "default-mode") + state, _, err := runDirectSearchForTest(t, plugin, ctx, req, "default-mode") + if err != nil { t.Fatalf("performDirectSearch failed: %v", err) } - - storageID, _ := ctx.Value(requestStorageIDKey).(string) - if storageID == "" { - t.Fatal("expected default direct search to set requestStorageIDKey") + if state.DirectCacheID == "" { + t.Fatal("expected default direct search to populate state.DirectCacheID") } if store.getChunkCalls != 1 { t.Fatalf("expected one GetChunk call, got %d", store.getChunkCalls) } } -func TestPreLLMHookClearsStaleStorageIDOnReusedContext(t *testing.T) { +// TestPreLLMHookResetsStateOnReusedRequestID verifies that a second PreLLMHook +// call for the same request ID overwrites any prior state instead of inheriting it. +func TestPreLLMHookResetsStateOnReusedRequestID(t *testing.T) { logger := bifrost.NewDefaultLogger(schemas.LogLevelDebug) store := newDirectFastPathStore() config := getDefaultTestConfig() @@ -677,19 +746,29 @@ func TestPreLLMHookClearsStaleStorageIDOnReusedContext(t *testing.T) { ChatRequest: CreateBasicChatRequest("What is Bifrost?", 0.7, 50), } - ctx := CreateContextWithCacheKey("reused-context") - ctx.SetValue(requestStorageIDKey, "stale-storage-id") + ctx := CreateContextWithCacheKey(t, "reused-context") + requestID, _ := ctx.Value(schemas.BifrostContextKeyRequestID).(string) + // Seed stale state under the same request ID. + stale := plugin.createCacheState(requestID) + stale.DirectCacheID = "stale-storage-id" + stale.ParamsHash = "stale-params-hash" if _, _, err := plugin.PreLLMHook(ctx, req); err != nil { t.Fatalf("PreLLMHook failed: %v", err) } - storageID, _ := ctx.Value(requestStorageIDKey).(string) - if storageID == "" { - t.Fatal("expected PreLLMHook to replace stale requestStorageIDKey with a deterministic id") + state := plugin.getCacheState(requestID) + if state == nil { + t.Fatal("expected cache state to be present after PreLLMHook") + } + if state == stale { + t.Fatal("expected PreLLMHook to replace the stale state object") } - if storageID == "stale-storage-id" { - t.Fatal("expected PreLLMHook to clear stale requestStorageIDKey before setting a deterministic id") + if state.DirectCacheID == "" { + t.Fatal("expected PreLLMHook to populate a deterministic DirectCacheID") + } + if state.DirectCacheID == "stale-storage-id" { + t.Fatal("expected PreLLMHook to clear stale DirectCacheID before populating a new one") } } @@ -707,16 +786,17 @@ func TestCacheTypeDirectStoresDeterministicID(t *testing.T) { RequestType: schemas.ChatCompletionRequest, ChatRequest: CreateBasicChatRequest("What is Bifrost?", 0.7, 50), } - ctx := CreateContextWithCacheKeyAndType("deterministic-store", CacheTypeDirect) - ctx.SetValue(requestIDKey, "request-uuid") - ctx.SetValue(requestProviderKey, schemas.OpenAI) - ctx.SetValue(requestModelKey, req.ChatRequest.Model) + ctx := CreateContextWithCacheKeyAndType(t, "deterministic-store", CacheTypeDirect) - directID, err := plugin.prepareDirectCacheLookup(ctx, req, "deterministic-store") - if err != nil { - t.Fatalf("prepareDirectCacheLookup failed: %v", err) + if _, _, err := plugin.PreLLMHook(ctx, req); err != nil { + t.Fatalf("PreLLMHook failed: %v", err) + } + requestID, _ := ctx.Value(schemas.BifrostContextKeyRequestID).(string) + state := plugin.getCacheState(requestID) + if state == nil || state.DirectCacheID == "" { + t.Fatal("expected PreLLMHook to populate state.DirectCacheID") } - ctx.SetValue(requestStorageIDKey, directID) + directID := state.DirectCacheID content := "stored response" response := &schemas.BifrostResponse{ @@ -749,8 +829,8 @@ func TestCacheTypeDirectStoresDeterministicID(t *testing.T) { if store.addIDs[0] != directID { t.Fatalf("expected deterministic storage id %q, got %q", directID, store.addIDs[0]) } - if store.addIDs[0] == "request-uuid" { - t.Fatal("expected storage id to differ from request UUID") + if store.addIDs[0] == requestID { + t.Fatal("expected storage id to differ from request ID") } } @@ -763,6 +843,24 @@ func TestPostLLMHookUsesDeterministicStorageIDOutsideDirectMode(t *testing.T) { logger: logger, } + req := &schemas.BifrostRequest{ + RequestType: schemas.ChatCompletionRequest, + ChatRequest: CreateBasicChatRequest("What is Bifrost?", 0.7, 50), + } + + // Default mode (no CacheTypeKey) should still produce a deterministic + // storage ID via the direct-search path that PreLLMHook always runs. + ctx := CreateContextWithCacheKey(t, "default-mode-store") + if _, _, err := plugin.PreLLMHook(ctx, req); err != nil { + t.Fatalf("PreLLMHook failed: %v", err) + } + requestID, _ := ctx.Value(schemas.BifrostContextKeyRequestID).(string) + state := plugin.getCacheState(requestID) + if state == nil || state.DirectCacheID == "" { + t.Fatal("expected default-mode PreLLMHook to populate state.DirectCacheID") + } + directID := state.DirectCacheID + content := "stored response" response := &schemas.BifrostResponse{ ChatResponse: &schemas.BifrostChatResponse{ @@ -782,16 +880,6 @@ func TestPostLLMHookUsesDeterministicStorageIDOutsideDirectMode(t *testing.T) { } response.ChatResponse.ExtraFields.RequestType = schemas.ChatCompletionRequest - ctx := CreateContextWithCacheKey("default-mode-store") - ctx.SetValue(requestIDKey, "request-uuid") - ctx.SetValue(requestProviderKey, schemas.OpenAI) - ctx.SetValue(requestModelKey, "openai/gpt-4o-mini") - ctx.SetValue(requestHashKey, "request-hash") - ctx.SetValue(requestParamsHashKey, "params-hash") - - directID := plugin.generateDirectCacheID(schemas.OpenAI, "openai/gpt-4o-mini", "default-mode-store", "request-hash", "params-hash") - ctx.SetValue(requestStorageIDKey, directID) - if _, _, err := plugin.PostLLMHook(ctx, response, nil); err != nil { t.Fatalf("PostLLMHook failed: %v", err) } @@ -806,67 +894,6 @@ func TestPostLLMHookUsesDeterministicStorageIDOutsideDirectMode(t *testing.T) { } } -func TestPerformDirectSearchDisablesScanFallbackForLegacyLookup(t *testing.T) { - logger := bifrost.NewDefaultLogger(schemas.LogLevelDebug) - store := newDirectFastPathStore() - plugin := &Plugin{ - store: store, - config: getDefaultTestConfig(), - logger: logger, - } - - req := &schemas.BifrostRequest{ - RequestType: schemas.ChatCompletionRequest, - ChatRequest: CreateBasicChatRequest("What is Bifrost?", 0.7, 50), - } - - ctx := CreateContextWithCacheKey("legacy-no-scan") - _, err := plugin.performDirectSearch(ctx, req, "legacy-no-scan") - if err != nil && !errors.Is(err, vectorstore.ErrNotSupported) { - t.Fatalf("performDirectSearch failed: %v", err) - } - - if store.getAllCalls != 1 { - t.Fatalf("expected one legacy GetAll call, got %d", store.getAllCalls) - } - if !vectorstore.IsScanFallbackDisabled(store.lastGetAllCtx) { - t.Fatal("expected legacy direct lookup to disable scan fallback") - } -} - -func TestPerformLegacyDirectSearchTreatsQuerySyntaxErrorAsMiss(t *testing.T) { - logger := bifrost.NewDefaultLogger(schemas.LogLevelDebug) - store := newDirectFastPathStore() - store.getAllErr = vectorstore.ErrQuerySyntax - plugin := &Plugin{ - store: store, - config: getDefaultTestConfig(), - logger: logger, - } - - req := &schemas.BifrostRequest{ - RequestType: schemas.ChatCompletionRequest, - ChatRequest: CreateBasicChatRequest("What is Bifrost?", 0.7, 50), - } - - ctx := CreateContextWithCacheKey("legacy-query-syntax") - _, err := plugin.prepareDirectCacheLookup(ctx, req, "legacy-query-syntax") - if err != nil { - t.Fatalf("prepareDirectCacheLookup failed: %v", err) - } - - shortCircuit, err := plugin.performLegacyDirectSearch(ctx, req, "legacy-query-syntax") - if err != nil { - t.Fatalf("performLegacyDirectSearch failed: %v", err) - } - if shortCircuit != nil { - t.Fatal("expected query syntax incompatibility to be treated as a miss") - } - if store.getAllCalls != 1 { - t.Fatalf("expected one legacy GetAll call, got %d", store.getAllCalls) - } -} - func TestGetOrCreateStreamAccumulatorUsesSingleAccumulatorPerRequest(t *testing.T) { logger := bifrost.NewDefaultLogger(schemas.LogLevelDebug) plugin := &Plugin{ diff --git a/plugins/semanticcache/plugin_conversation_config_test.go b/plugins/semanticcache/plugin_conversation_config_test.go index 7c4d0e72c2..98c1398aa8 100644 --- a/plugins/semanticcache/plugin_conversation_config_test.go +++ b/plugins/semanticcache/plugin_conversation_config_test.go @@ -14,7 +14,7 @@ func TestConversationHistoryThresholdBasic(t *testing.T) { setup := CreateTestSetupWithConversationThreshold(t, 2) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("test-conversation-threshold-basic") + ctx := CreateContextWithCacheKey(t, "test-conversation-threshold-basic") // Test 1: Conversation with exactly 2 messages (should cache) conversation1 := BuildConversationHistory("", @@ -25,7 +25,7 @@ func TestConversationHistoryThresholdBasic(t *testing.T) { t.Log("Testing conversation with exactly 2 messages (at threshold)...") response1, err1 := setup.Client.ChatCompletionRequest(ctx, request1) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) // Fresh request @@ -53,17 +53,18 @@ func TestConversationHistoryThresholdBasic(t *testing.T) { t.Log("Testing conversation with 5 messages (exceeds threshold)...") response3, err3 := setup.Client.ChatCompletionRequest(ctx, request2) if err3 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err3) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}) // Should not cache WaitForCache(setup.Plugin) - // Verify it was NOT cached + // Verify it was NOT cached. The first call already succeeded, so any error + // here is a real regression rather than upstream flakiness — fail fast. t.Log("Verifying conversation exceeding threshold was not cached...") response4, err4 := setup.Client.ChatCompletionRequest(ctx, request2) if err4 != nil { - return // Test will be skipped by retry function + t.Fatalf("verification request failed: %v", err4) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response4}) // Should still be fresh (not cached) @@ -76,7 +77,7 @@ func TestConversationHistoryThresholdWithSystemPrompt(t *testing.T) { setup := CreateTestSetupWithConversationThreshold(t, 3) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("test-threshold-system-prompt") + ctx := CreateContextWithCacheKey(t, "test-threshold-system-prompt") // System prompt + 2 user/assistant pairs = 5 messages total > 3 conversation := BuildConversationHistory( @@ -89,16 +90,17 @@ func TestConversationHistoryThresholdWithSystemPrompt(t *testing.T) { t.Log("Testing conversation with system prompt (5 total messages > 3 threshold)...") response1, err1 := setup.Client.ChatCompletionRequest(ctx, request) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) // Should not cache (exceeds threshold) WaitForCache(setup.Plugin) - // Verify not cached + // Verify not cached. First call already succeeded, so failures here + // indicate a real regression rather than upstream flakiness. response2, err2 := setup.Client.ChatCompletionRequest(ctx, request) if err2 != nil { - return // Test will be skipped by retry function + t.Fatalf("verification request failed: %v", err2) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}) // Should not be cached @@ -111,7 +113,7 @@ func TestConversationHistoryThresholdWithExcludeSystemPrompt(t *testing.T) { setup := CreateTestSetupWithThresholdAndExcludeSystem(t, 3, true) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("test-threshold-exclude-system") + ctx := CreateContextWithCacheKey(t, "test-threshold-exclude-system") // Create conversation with exactly 3 non-system messages to test threshold boundary // System + 1.5 user/assistant pairs = 4 messages total @@ -133,7 +135,7 @@ func TestConversationHistoryThresholdWithExcludeSystemPrompt(t *testing.T) { response1, err1 := setup.Client.ChatCompletionRequest(ctx, request) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) // Fresh request, should not hit cache @@ -172,7 +174,7 @@ func TestConversationHistoryThresholdDifferentValues(t *testing.T) { setup := CreateTestSetupWithConversationThreshold(t, tc.threshold) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("test-threshold-" + tc.name) + ctx := CreateContextWithCacheKey(t, "test-threshold-" + tc.name) // Build conversation with specified number of messages var conversation []schemas.ChatMessage @@ -194,7 +196,7 @@ func TestConversationHistoryThresholdDifferentValues(t *testing.T) { response1, err1 := setup.Client.ChatCompletionRequest(ctx, request) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) // Always fresh first time @@ -202,7 +204,7 @@ func TestConversationHistoryThresholdDifferentValues(t *testing.T) { response2, err2 := setup.Client.ChatCompletionRequest(ctx, request) if err2 != nil { - return // Test will be skipped by retry function + t.Fatalf("verification request failed: %v", err2) } if tc.shouldCache { @@ -222,7 +224,7 @@ func TestExcludeSystemPromptBasic(t *testing.T) { setup := CreateTestSetupWithExcludeSystemPrompt(t, true) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("test-exclude-system-basic") + ctx := CreateContextWithCacheKey(t, "test-exclude-system-basic") // Create two conversations with different system prompts but same user/assistant messages conversation1 := BuildConversationHistory( @@ -241,7 +243,7 @@ func TestExcludeSystemPromptBasic(t *testing.T) { t.Log("Caching conversation with system prompt 1...") response1, err1 := setup.Client.ChatCompletionRequest(ctx, request1) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) @@ -268,7 +270,7 @@ func TestExcludeSystemPromptComparison(t *testing.T) { setup1 := CreateTestSetupWithExcludeSystemPrompt(t, false) defer setup1.Cleanup() - ctx1 := CreateContextWithCacheKey("test-exclude-system-false") + ctx1 := CreateContextWithCacheKey(t, "test-exclude-system-false") conversation1 := BuildConversationHistory( "You are helpful", @@ -286,7 +288,7 @@ func TestExcludeSystemPromptComparison(t *testing.T) { t.Log("Testing ExcludeSystemPrompt=false...") response1, err1 := setup1.Client.ChatCompletionRequest(ctx1, request1) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) @@ -315,12 +317,12 @@ func TestExcludeSystemPromptComparison(t *testing.T) { setup2 := CreateTestSetupWithExcludeSystemPrompt(t, true) defer setup2.Cleanup() - ctx2 := CreateContextWithCacheKey("test-exclude-system-true") + ctx2 := CreateContextWithCacheKey(t, "test-exclude-system-true") t.Log("Testing ExcludeSystemPrompt=true...") response3, err3 := setup2.Client.ChatCompletionRequest(ctx2, request1) if err3 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err3) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}) @@ -341,7 +343,7 @@ func TestExcludeSystemPromptWithMultipleSystemMessages(t *testing.T) { setup := CreateTestSetupWithExcludeSystemPrompt(t, true) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("test-multiple-system-messages") + ctx := CreateContextWithCacheKey(t, "test-multiple-system-messages") // Manually create conversation with multiple system messages conversation1 := []schemas.ChatMessage{ @@ -388,7 +390,7 @@ func TestExcludeSystemPromptWithMultipleSystemMessages(t *testing.T) { t.Log("Caching conversation with multiple system messages...") response1, err1 := setup.Client.ChatCompletionRequest(ctx, request1) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) @@ -414,7 +416,7 @@ func TestExcludeSystemPromptWithNoSystemMessages(t *testing.T) { setup := CreateTestSetupWithExcludeSystemPrompt(t, true) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("test-no-system-messages") + ctx := CreateContextWithCacheKey(t, "test-no-system-messages") // Conversation with no system messages conversation := []schemas.ChatMessage{ @@ -433,7 +435,7 @@ func TestExcludeSystemPromptWithNoSystemMessages(t *testing.T) { t.Log("Testing conversation with no system messages...") response1, err1 := setup.Client.ChatCompletionRequest(ctx, request) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) diff --git a/plugins/semanticcache/plugin_core_test.go b/plugins/semanticcache/plugin_core_test.go index 5bed26528d..e0f88464de 100644 --- a/plugins/semanticcache/plugin_core_test.go +++ b/plugins/semanticcache/plugin_core_test.go @@ -10,12 +10,17 @@ import ( "github.com/maximhq/bifrost/framework/vectorstore" ) -// TestSemanticCacheBasicFunctionality tests the core caching functionality +// TestSemanticCacheBasicFunctionality tests the core caching functionality. +// +// Intentionally NOT parallel: the assertions at the bottom of this function +// enforce wall-clock comparisons (cache must be faster than upstream, with at +// least 1.5× speedup). Running this in parallel with other integration tests +// causes CPU/network contention that flakes those ratios. func TestSemanticCacheBasicFunctionality(t *testing.T) { setup := NewTestSetup(t) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("test-basic-value") + ctx := CreateContextWithCacheKey(t, "test-basic-value") // Create test request testRequest := CreateBasicChatRequest( @@ -32,7 +37,7 @@ func TestSemanticCacheBasicFunctionality(t *testing.T) { duration1 := time.Since(start1) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } if response1 == nil || len(response1.Choices) == 0 || response1.Choices[0].Message.Content.ContentStr == nil { @@ -106,13 +111,14 @@ func TestSemanticCacheBasicFunctionality(t *testing.T) { // TestSemanticSearch tests the semantic similarity search functionality func TestSemanticSearch(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() // Lower threshold for more flexible matching setup.Config.Threshold = 0.5 - ctx := CreateContextWithCacheKey("semantic-test-value") + ctx := CreateContextWithCacheKey(t, "semantic-test-value") // First request - this will be cached firstRequest := CreateBasicChatRequest( @@ -127,7 +133,7 @@ func TestSemanticSearch(t *testing.T) { duration1 := time.Since(start1) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } if response1 == nil || len(response1.Choices) == 0 || response1.Choices[0].Message.Content.ContentStr == nil { @@ -209,7 +215,7 @@ func TestSemanticSearch(t *testing.T) { func TestToFloat32Embedding(t *testing.T) { input := []float64{0.12345678901234568, -0.875, 1.5} - got := toFloat32Embedding(input) + got := float64ToFloat32Embedding(input) if len(got) != len(input) { t.Fatalf("expected %d elements, got %d", len(input), len(got)) @@ -246,13 +252,14 @@ func TestFlattenToFloat32Embedding(t *testing.T) { // TestDirectVsSemanticSearch tests the difference between direct hash matching and semantic search func TestDirectVsSemanticSearch(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() // Lower threshold for more flexible semantic matching setup.Config.Threshold = 0.2 - ctx := CreateContextWithCacheKey("direct-vs-semantic-test") + ctx := CreateContextWithCacheKey(t, "direct-vs-semantic-test") // Test Case 1: Exact same request (should use direct hash matching) t.Log("=== Test Case 1: Exact Same Request (Direct Hash Match) ===") @@ -266,7 +273,7 @@ func TestDirectVsSemanticSearch(t *testing.T) { t.Log("Making first request...") _, err1 := setup.Client.ChatCompletionRequest(ctx, exactRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } WaitForCache(setup.Plugin) @@ -330,10 +337,11 @@ func TestDirectVsSemanticSearch(t *testing.T) { // TestNoCacheScenarios tests scenarios where caching should NOT occur func TestNoCacheScenarios(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("no-cache-test") + ctx := CreateContextWithCacheKey(t, "no-cache-test") // Test Case 1: Different parameters should NOT cache hit t.Log("=== Test Case 1: Different Parameters ===") @@ -344,7 +352,7 @@ func TestNoCacheScenarios(t *testing.T) { request1 := CreateBasicChatRequest(basePrompt, 0.1, 50) _, err1 := setup.Client.ChatCompletionRequest(ctx, request1) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } WaitForCache(setup.Plugin) @@ -353,7 +361,7 @@ func TestNoCacheScenarios(t *testing.T) { request2 := CreateBasicChatRequest(basePrompt, 0.9, 50) // Different temperature response2, err2 := setup.Client.ChatCompletionRequest(ctx, request2) if err2 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err2) } // Should NOT be cached @@ -365,17 +373,28 @@ func TestNoCacheScenarios(t *testing.T) { request3 := CreateBasicChatRequest(basePrompt, 0.1, 200) // Different max_tokens response3, err3 := setup.Client.ChatCompletionRequest(ctx, request3) if err3 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err3) } - - // Should NOT be cached AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}) + WaitForCache(setup.Plugin) + + // Make request3 a SECOND time. The miss above could be a miss for the + // wrong reason (e.g. caching disabled entirely). A second-call hit + // confirms (a) request3's params produce a distinct cache_key from the + // earlier requests AND (b) caching itself is functioning under this ctx. + response3Again, err := setup.Client.ChatCompletionRequest(ctx, request3) + if err != nil { + t.Fatalf("Repeat of request3 failed: %v", err) + } + AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3Again}, string(CacheTypeDirect)) + t.Log("✅ No cache scenarios test completed!") } // TestCacheConfiguration tests different cache configuration options func TestCacheConfiguration(t *testing.T) { + t.Parallel() tests := []struct { name string config *Config @@ -408,7 +427,10 @@ func TestCacheConfiguration(t *testing.T) { EmbeddingModel: "text-embedding-3-small", Dimension: 1536, Threshold: 0.8, - TTL: 1 * time.Hour, // Custom TTL + // Short TTL so the test can verify expiry is honored. + // 1h would only verify the configured value didn't crash; + // it can't distinguish "TTL applied" from "default TTL applied". + TTL: 2 * time.Second, }, expectedBehavior: "custom_ttl", }, @@ -419,19 +441,22 @@ func TestCacheConfiguration(t *testing.T) { setup := NewTestSetupWithConfig(t, tt.config) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("config-test-" + tt.name) + ctx := CreateContextWithCacheKey(t, "config-test-"+tt.name) // Basic functionality test with the configuration testRequest := CreateBasicChatRequest("Test configuration: "+tt.name, 0.5, 50) - _, err1 := setup.Client.ChatCompletionRequest(ctx, testRequest) + response1, err1 := setup.Client.ChatCompletionRequest(ctx, testRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } + AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) WaitForCache(setup.Plugin) - _, err2 := setup.Client.ChatCompletionRequest(ctx, testRequest) + // Second identical request must hit (regardless of which config — + // all three configs cache identical requests via the direct path). + response2, err2 := setup.Client.ChatCompletionRequest(ctx, testRequest) if err2 != nil { if err2.Error != nil { t.Fatalf("Second request failed: %v", err2.Error.Message) @@ -439,8 +464,32 @@ func TestCacheConfiguration(t *testing.T) { t.Fatalf("Second request failed: %v", err2) } } - - t.Logf("✅ Configuration test '%s' completed", tt.name) + AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, string(CacheTypeDirect)) + + // Per-config behavioral check. + switch tt.expectedBehavior { + case "strict_matching": + // Threshold=0.95 should still allow direct hits on identical + // content (threshold only gates semantic search). Verified above. + case "loose_matching": + // Same — direct path doesn't use threshold. The relevant check + // is that the cache actually wrote (verified above). + case "custom_ttl": + // Verify Config.TTL was actually honored: wait past expiry + // and confirm a third request misses. If the plugin had + // fallen back to the default TTL, this would still hit. + time.Sleep(tt.config.TTL + 1*time.Second) + response3, err3 := setup.Client.ChatCompletionRequest(ctx, testRequest) + if err3 != nil { + if err3.Error != nil { + t.Fatalf("Post-expiry request failed: %v", err3.Error.Message) + } else { + t.Fatalf("Post-expiry request failed: %v", err3) + } + } + AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}) + } + t.Logf("✅ Configuration test '%s' completed (cache write + read verified)", tt.name) }) } } @@ -510,7 +559,7 @@ func (m *MockUnsupportedStore) Close(ctx context.Context, namespace string) erro // TestInvalidProviderRejection tests that providers without embedding support are rejected during initialization func TestInvalidProviderRejection(t *testing.T) { - ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + ctx := newBaseTestContext() logger := bifrost.NewDefaultLogger(schemas.LogLevelDebug) // Create a mock vector store for testing @@ -551,7 +600,7 @@ func TestInvalidProviderRejection(t *testing.T) { // TestValidProviderAccepted tests that providers with embedding support are accepted during initialization func TestValidProviderAccepted(t *testing.T) { - ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + ctx := newBaseTestContext() logger := bifrost.NewDefaultLogger(schemas.LogLevelDebug) // Create a mock vector store for testing diff --git a/plugins/semanticcache/plugin_cross_cache_test.go b/plugins/semanticcache/plugin_cross_cache_test.go index 7a49389911..00f1085443 100644 --- a/plugins/semanticcache/plugin_cross_cache_test.go +++ b/plugins/semanticcache/plugin_cross_cache_test.go @@ -8,24 +8,25 @@ import ( // TestCrossCacheTypeAccessibility tests that entries cached one way are accessible another way func TestCrossCacheTypeAccessibility(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() testRequest := CreateBasicChatRequest("What is artificial intelligence?", 0.7, 100) // Test 1: Cache with default behavior (both direct + semantic) - ctx1 := CreateContextWithCacheKey("test-cross-cache-access") + ctx1 := CreateContextWithCacheKey(t, "test-cross-cache-access") t.Log("Caching with default behavior (both direct + semantic)...") response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) WaitForCache(setup.Plugin) // Test 2: Retrieve with direct-only cache type - ctx2 := CreateContextWithCacheKeyAndType("test-cross-cache-access", CacheTypeDirect) + ctx2 := CreateContextWithCacheKeyAndType(t, "test-cross-cache-access", CacheTypeDirect) t.Log("Retrieving with CacheTypeKey=direct...") response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest) if err2 != nil { @@ -38,7 +39,7 @@ func TestCrossCacheTypeAccessibility(t *testing.T) { AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "direct") // Should find direct match // Test 3: Retrieve with semantic-only cache type - ctx3 := CreateContextWithCacheKeyAndType("test-cross-cache-access", CacheTypeSemantic) + ctx3 := CreateContextWithCacheKeyAndType(t, "test-cross-cache-access", CacheTypeSemantic) t.Log("Retrieving with CacheTypeKey=semantic...") response3, err3 := setup.Client.ChatCompletionRequest(ctx3, testRequest) if err3 != nil { @@ -51,6 +52,7 @@ func TestCrossCacheTypeAccessibility(t *testing.T) { // TestCacheTypeIsolation tests that entries cached separately by type behave correctly func TestCacheTypeIsolation(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() @@ -60,22 +62,22 @@ func TestCacheTypeIsolation(t *testing.T) { clearTestKeysWithStore(t, setup.Store) // Test 1: Cache with direct-only - ctx1 := CreateContextWithCacheKeyAndType("test-cache-isolation", CacheTypeDirect) + ctx1 := CreateContextWithCacheKeyAndType(t, "test-cache-isolation", CacheTypeDirect) t.Log("Caching with CacheTypeKey=direct only...") response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) // Fresh request WaitForCache(setup.Plugin) // Test 2: Try to retrieve with semantic-only (should miss because no semantic entry) - ctx2 := CreateContextWithCacheKeyAndType("test-cache-isolation", CacheTypeSemantic) + ctx2 := CreateContextWithCacheKeyAndType(t, "test-cache-isolation", CacheTypeSemantic) t.Log("Retrieving same request with CacheTypeKey=semantic (should miss)...") response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest) if err2 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err2) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}) // Should miss - no semantic cache entry @@ -90,7 +92,7 @@ func TestCacheTypeIsolation(t *testing.T) { AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}, "direct") // Should hit direct cache // Test 4: Default behavior (should find the direct cache) - ctx4 := CreateContextWithCacheKey("test-cache-isolation") + ctx4 := CreateContextWithCacheKey(t, "test-cache-isolation") t.Log("Retrieving with default behavior (should find direct cache)...") response4, err4 := setup.Client.ChatCompletionRequest(ctx4, testRequest) if err4 != nil { @@ -103,17 +105,18 @@ func TestCacheTypeIsolation(t *testing.T) { // TestCacheTypeFallbackBehavior tests whether cache types fallback to each other func TestCacheTypeFallbackBehavior(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() // Cache an entry with default behavior originalRequest := CreateBasicChatRequest("Explain machine learning", 0.7, 100) - ctx1 := CreateContextWithCacheKey("test-fallback-behavior") + ctx1 := CreateContextWithCacheKey(t, "test-fallback-behavior") t.Log("Caching with default behavior...") response1, err1 := setup.Client.ChatCompletionRequest(ctx1, originalRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) @@ -121,19 +124,19 @@ func TestCacheTypeFallbackBehavior(t *testing.T) { // Test similar request with direct-only (should miss direct, no fallback, but should cache response) similarRequest := CreateBasicChatRequest("Explain machine learning concepts", 0.7, 100) - ctx2 := CreateContextWithCacheKeyAndType("test-fallback-behavior", CacheTypeDirect) + ctx2 := CreateContextWithCacheKeyAndType(t, "test-fallback-behavior", CacheTypeDirect) t.Log("Testing similar request with CacheTypeKey=direct (should miss, make request, cache without embeddings)...") response2, err2 := setup.Client.ChatCompletionRequest(ctx2, similarRequest) if err2 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err2) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}) // Should miss - no direct match, no semantic search WaitForCache(setup.Plugin) // Let the response get cached // Test same similar request with semantic-only (should hit original entry) - ctx3 := CreateContextWithCacheKeyAndType("test-fallback-behavior", CacheTypeSemantic) + ctx3 := CreateContextWithCacheKeyAndType(t, "test-fallback-behavior", CacheTypeSemantic) t.Log("Testing similar request with CacheTypeKey=semantic (should find semantic match from step 1)...") response3, err3 := setup.Client.ChatCompletionRequest(ctx3, similarRequest) @@ -141,8 +144,12 @@ func TestCacheTypeFallbackBehavior(t *testing.T) { t.Fatalf("Third request failed: %v", err3) } - // Should find semantic match from step 1's cached entry (which has embeddings) - if response3.ExtraFields.CacheDebug != nil && response3.ExtraFields.CacheDebug.CacheHit { + // Should find semantic match from step 1's cached entry (which has embeddings). + // Hit is similarity-dependent; CacheDebug must be stamped either way. + if response3.ExtraFields.CacheDebug == nil { + t.Fatal("expected CacheDebug to be stamped on the response") + } + if response3.ExtraFields.CacheDebug.CacheHit { AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}, "semantic") t.Log("✅ Semantic search found similar entry from step 1") } else { @@ -153,7 +160,7 @@ func TestCacheTypeFallbackBehavior(t *testing.T) { // Test a different similar request with default behavior (try both, fallback to semantic) // Use a slightly different request to avoid hitting the cached response from step 2 differentSimilarRequest := CreateBasicChatRequest("Explain the basics of machine learning", 0.7, 100) - ctx4 := CreateContextWithCacheKey("test-fallback-behavior") + ctx4 := CreateContextWithCacheKey(t, "test-fallback-behavior") t.Log("Testing different similar request with default behavior (direct miss -> semantic fallback)...") response4, err4 := setup.Client.ChatCompletionRequest(ctx4, differentSimilarRequest) @@ -161,8 +168,12 @@ func TestCacheTypeFallbackBehavior(t *testing.T) { t.Fatalf("Fourth request failed: %v", err4) } - // Should try direct first (miss), then semantic (might hit) - if response4.ExtraFields.CacheDebug != nil && response4.ExtraFields.CacheDebug.CacheHit { + // Should try direct first (miss), then semantic (might hit). CacheDebug + // must be stamped either way. + if response4.ExtraFields.CacheDebug == nil { + t.Fatal("expected CacheDebug to be stamped on the response") + } + if response4.ExtraFields.CacheDebug.CacheHit { AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response4}, "semantic") t.Log("✅ Default behavior found semantic fallback") } else { @@ -175,17 +186,18 @@ func TestCacheTypeFallbackBehavior(t *testing.T) { // TestMultipleCacheEntriesPriority tests behavior when multiple cache entries exist func TestMultipleCacheEntriesPriority(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() testRequest := CreateBasicChatRequest("What is deep learning?", 0.7, 100) // Create cache entry with default behavior first - ctx1 := CreateContextWithCacheKey("test-cache-priority") + ctx1 := CreateContextWithCacheKey(t, "test-cache-priority") t.Log("Creating cache entry with default behavior...") response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) originalContent := *response1.Choices[0].Message.Content.ContentStr @@ -211,7 +223,7 @@ func TestMultipleCacheEntriesPriority(t *testing.T) { } // Test with direct-only access - ctx2 := CreateContextWithCacheKeyAndType("test-cache-priority", CacheTypeDirect) + ctx2 := CreateContextWithCacheKeyAndType(t, "test-cache-priority", CacheTypeDirect) t.Log("Accessing with CacheTypeKey=direct...") response3, err3 := setup.Client.ChatCompletionRequest(ctx2, testRequest) if err3 != nil { @@ -220,7 +232,7 @@ func TestMultipleCacheEntriesPriority(t *testing.T) { AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}, "direct") // Should find direct cache // Test with semantic-only access - ctx3 := CreateContextWithCacheKeyAndType("test-cache-priority", CacheTypeSemantic) + ctx3 := CreateContextWithCacheKeyAndType(t, "test-cache-priority", CacheTypeSemantic) t.Log("Accessing with CacheTypeKey=semantic...") response4, err4 := setup.Client.ChatCompletionRequest(ctx3, testRequest) if err4 != nil { @@ -233,6 +245,7 @@ func TestMultipleCacheEntriesPriority(t *testing.T) { // TestCrossCacheTypeWithDifferentParameters tests cache type behavior with parameter variations func TestCrossCacheTypeWithDifferentParameters(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() @@ -240,19 +253,19 @@ func TestCrossCacheTypeWithDifferentParameters(t *testing.T) { // Cache with specific parameters request1 := CreateBasicChatRequest(baseMessage, 0.7, 100) - ctx1 := CreateContextWithCacheKey("test-cross-cache-params") + ctx1 := CreateContextWithCacheKey(t, "test-cross-cache-params") t.Log("Caching with temp=0.7, max_tokens=100...") response1, err1 := setup.Client.ChatCompletionRequest(ctx1, request1) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) WaitForCache(setup.Plugin) // Test same parameters with direct-only - ctx2 := CreateContextWithCacheKeyAndType("test-cross-cache-params", CacheTypeDirect) + ctx2 := CreateContextWithCacheKeyAndType(t, "test-cross-cache-params", CacheTypeDirect) t.Log("Retrieving same parameters with CacheTypeKey=direct...") response2, err2 := setup.Client.ChatCompletionRequest(ctx2, request1) if err2 != nil { @@ -269,18 +282,18 @@ func TestCrossCacheTypeWithDifferentParameters(t *testing.T) { t.Log("Testing different parameters (should miss)...") response3, err3 := setup.Client.ChatCompletionRequest(ctx2, request3) if err3 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err3) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}) // Should miss due to different params // Test semantic search with different parameters - ctx4 := CreateContextWithCacheKeyAndType("test-cross-cache-params", CacheTypeSemantic) + ctx4 := CreateContextWithCacheKeyAndType(t, "test-cross-cache-params", CacheTypeSemantic) similarRequest := CreateBasicChatRequest("Can you explain quantum computing", 0.5, 200) t.Log("Testing semantic search with different params and similar message...") response4, err4 := setup.Client.ChatCompletionRequest(ctx4, similarRequest) if err4 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err4) } // Should miss semantic search due to different parameters (params_hash different) AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response4}) @@ -290,26 +303,27 @@ func TestCrossCacheTypeWithDifferentParameters(t *testing.T) { // TestCacheTypeErrorHandling tests error scenarios with cache types func TestCacheTypeErrorHandling(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() testRequest := CreateBasicChatRequest("Test error handling", 0.7, 50) // Test invalid cache type (should fallback to default) - ctx1 := CreateContextWithCacheKey("test-cache-error-handling") + ctx1 := CreateContextWithCacheKey(t, "test-cache-error-handling") ctx1 = ctx1.WithValue(CacheTypeKey, "invalid_cache_type") t.Log("Testing invalid cache type (should fallback to default behavior)...") response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) // Should work with fallback behavior WaitForCache(setup.Plugin) // Test nil cache type (should use default) - ctx2 := CreateContextWithCacheKey("test-cache-error-handling") + ctx2 := CreateContextWithCacheKey(t, "test-cache-error-handling") ctx2 = ctx2.WithValue(CacheTypeKey, nil) t.Log("Testing nil cache type (should use default behavior)...") diff --git a/plugins/semanticcache/plugin_default_cache_key_test.go b/plugins/semanticcache/plugin_default_cache_key_test.go index 57cd2d6cb4..db8e78443a 100644 --- a/plugins/semanticcache/plugin_default_cache_key_test.go +++ b/plugins/semanticcache/plugin_default_cache_key_test.go @@ -1,7 +1,6 @@ package semanticcache import ( - "context" "testing" "github.com/maximhq/bifrost/core/schemas" @@ -10,21 +9,22 @@ import ( // TestDefaultCacheKey_CachesWithoutPerRequestKey verifies that when DefaultCacheKey // is configured, requests without an explicit cache key are cached automatically. func TestDefaultCacheKey_CachesWithoutPerRequestKey(t *testing.T) { + t.Parallel() config := getDefaultTestConfig() - config.DefaultCacheKey = "test-default-key" + config.DefaultCacheKey = keyForTest(t, "test-default-key") setup := NewTestSetupWithConfig(t, config) defer setup.Cleanup() // Context with NO per-request cache key - ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + ctx := newBaseTestContext() testRequest := CreateBasicChatRequest("What is Bifrost? Answer in one short sentence.", 0.7, 50) t.Log("Making first request without per-request cache key (should use default and be cached)...") response1, err1 := setup.Client.ChatCompletionRequest(ctx, testRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } if response1 == nil || len(response1.Choices) == 0 || response1.Choices[0].Message.Content.ContentStr == nil { @@ -37,7 +37,7 @@ func TestDefaultCacheKey_CachesWithoutPerRequestKey(t *testing.T) { WaitForCache(setup.Plugin) t.Log("Making second identical request without per-request cache key (should hit cache)...") - ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + ctx2 := newBaseTestContext() response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest) if err2 != nil { if err2.Error != nil { @@ -53,8 +53,9 @@ func TestDefaultCacheKey_CachesWithoutPerRequestKey(t *testing.T) { // TestDefaultCacheKey_PerRequestKeyOverridesDefault verifies that an explicit // per-request cache key takes precedence over the configured default. func TestDefaultCacheKey_PerRequestKeyOverridesDefault(t *testing.T) { + t.Parallel() config := getDefaultTestConfig() - config.DefaultCacheKey = "test-default-key" + config.DefaultCacheKey = keyForTest(t, "test-default-key") setup := NewTestSetupWithConfig(t, config) defer setup.Cleanup() @@ -62,16 +63,16 @@ func TestDefaultCacheKey_PerRequestKeyOverridesDefault(t *testing.T) { testRequest := CreateBasicChatRequest("What is the capital of France?", 0.5, 50) // Cache with the default key (no per-request key) - ctx1 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + ctx1 := newBaseTestContext() _, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } WaitForCache(setup.Plugin) // Verify the cache was actually populated with the default key - ctxDefault2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + ctxDefault2 := newBaseTestContext() responseDefault2, errDefault2 := setup.Client.ChatCompletionRequest(ctxDefault2, testRequest) if errDefault2 != nil { if errDefault2.Error != nil { @@ -82,7 +83,7 @@ func TestDefaultCacheKey_PerRequestKeyOverridesDefault(t *testing.T) { AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: responseDefault2}, string(CacheTypeDirect)) // Same request but with a DIFFERENT per-request key — should miss - ctx2 := CreateContextWithCacheKey("override-key") + ctx2 := CreateContextWithCacheKey(t, "override-key") response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest) if err2 != nil { if err2.Error != nil { @@ -98,20 +99,21 @@ func TestDefaultCacheKey_PerRequestKeyOverridesDefault(t *testing.T) { // TestDefaultCacheKey_EmptyDefault_NoCaching verifies that when DefaultCacheKey // is empty (default zero value), requests without a per-request key bypass caching. func TestDefaultCacheKey_EmptyDefault_NoCaching(t *testing.T) { + t.Parallel() config := getDefaultTestConfig() // DefaultCacheKey is intentionally left empty (zero value) setup := NewTestSetupWithConfig(t, config) defer setup.Cleanup() - ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + ctx := newBaseTestContext() testRequest := CreateBasicChatRequest("What is deep learning", 0.7, 50) t.Log("Making first request without any cache key and no default (should not cache)...") response1, err1 := setup.Client.ChatCompletionRequest(ctx, testRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) @@ -119,7 +121,7 @@ func TestDefaultCacheKey_EmptyDefault_NoCaching(t *testing.T) { WaitForCache(setup.Plugin) t.Log("Making second identical request (should still not cache)...") - ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + ctx2 := newBaseTestContext() response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest) if err2 != nil { if err2.Error != nil { diff --git a/plugins/semanticcache/plugin_edge_cases_test.go b/plugins/semanticcache/plugin_edge_cases_test.go index a99eb64ef2..946daca1a9 100644 --- a/plugins/semanticcache/plugin_edge_cases_test.go +++ b/plugins/semanticcache/plugin_edge_cases_test.go @@ -1,7 +1,6 @@ package semanticcache import ( - "context" "strings" "testing" @@ -11,6 +10,7 @@ import ( // TestParameterVariations tests that different parameters don't cache hit inappropriately func TestParameterVariations(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() @@ -45,7 +45,7 @@ func TestParameterVariations(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { // Create a fresh context for each subtest to avoid context pollution - ctx := CreateContextWithCacheKey("param-variations-test") + ctx := CreateContextWithCacheKey(t, "param-variations-test") // Clear cache for this subtest clearTestKeysWithStore(t, setup.Store) @@ -53,7 +53,7 @@ func TestParameterVariations(t *testing.T) { // Make first request _, err1 := setup.Client.ChatCompletionRequest(ctx, tt.request1) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } WaitForCache(setup.Plugin) @@ -80,10 +80,11 @@ func TestParameterVariations(t *testing.T) { // TestToolVariations tests caching behavior with different tool configurations func TestToolVariations(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("tool-variations-test") + ctx := CreateContextWithCacheKey(t, "tool-variations-test") // Base request without tools baseRequest := &schemas.BifrostChatRequest{ @@ -190,7 +191,7 @@ func TestToolVariations(t *testing.T) { t.Log("Making request with tools...") response2, err2 := setup.Client.ChatCompletionRequest(ctx, requestWithTools) if err2 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err2) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}) @@ -210,7 +211,7 @@ func TestToolVariations(t *testing.T) { t.Log("Making request with different tools...") response4, err4 := setup.Client.ChatCompletionRequest(ctx, requestWithDifferentTools) if err4 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err4) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response4}) @@ -220,6 +221,7 @@ func TestToolVariations(t *testing.T) { // TestContentVariations tests caching behavior with different content types func TestContentVariations(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() @@ -349,14 +351,13 @@ func TestContentVariations(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Logf("Testing content variation: %s", tt.name) - // Create a fresh context for each subtest to avoid context pollution - ctx := CreateContextWithCacheKey("content-variations-test") + // Use a per-subtest cache key so subtests don't share entries. + ctx := CreateContextWithCacheKey(t, "content-variations-"+tt.name) // Make first request _, err1 := setup.Client.ChatCompletionRequest(ctx, tt.request) if err1 != nil { - t.Logf("⚠️ First %s request failed: %v", tt.name, err1) - return // Skip this test case + t.Skipf("upstream request error, skipping %s: %v", tt.name, err1) } WaitForCache(setup.Plugin) @@ -376,6 +377,7 @@ func TestContentVariations(t *testing.T) { // TestBoundaryParameterValues tests edge case parameter values func TestBoundaryParameterValues(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() @@ -454,25 +456,40 @@ func TestBoundaryParameterValues(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Logf("Testing boundary parameters: %s", tt.name) - // Create a fresh context for each subtest to avoid context pollution - ctx := CreateContextWithCacheKey("boundary-params-test") + // Per-subtest cache key so subtests don't share entries. + ctx := CreateContextWithCacheKey(t, "boundary-params-"+tt.name) - _, err := setup.Client.ChatCompletionRequest(ctx, tt.request) - if err != nil { - t.Logf("⚠️ %s request failed (may be expected): %v", tt.name, err) - } else { - t.Logf("✅ %s handled gracefully", tt.name) + // First request must succeed (boundary values are valid OpenAI + // inputs); a real failure here is a regression, not "expected". + response1, err1 := setup.Client.ChatCompletionRequest(ctx, tt.request) + if err1 != nil { + t.Skipf("upstream request error, skipping %s: %v", tt.name, err1) + } + AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) + + WaitForCache(setup.Plugin) + + // Second identical request must hit — proves boundary params + // don't break cache key generation or storage. + response2, err2 := setup.Client.ChatCompletionRequest(ctx, tt.request) + if err2 != nil { + t.Fatalf("Second %s request failed: %v", tt.name, err2) } + AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, string(CacheTypeDirect)) + t.Logf("✅ %s parameters cached correctly", tt.name) }) } } // TestSemanticSimilarityEdgeCases tests edge cases in semantic similarity matching func TestSemanticSimilarityEdgeCases(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - setup.Config.Threshold = 0.9 + // Threshold tuned for the prompt pairs below; 0.9 is too strict for + // semantically-similar-but-different-phrasing pairs and produces flakes. + setup.Config.Threshold = 0.7 // Test case: Similar questions with different wording similarTests := []struct { @@ -510,7 +527,7 @@ func TestSemanticSimilarityEdgeCases(t *testing.T) { for i, test := range similarTests { t.Run(test.description, func(t *testing.T) { // Create a fresh context for each subtest to avoid context pollution - ctx := CreateContextWithCacheKey("semantic-edge-test") + ctx := CreateContextWithCacheKey(t, "semantic-edge-test") // Clear cache for this subtest clearTestKeysWithStore(t, setup.Store) @@ -519,7 +536,7 @@ func TestSemanticSimilarityEdgeCases(t *testing.T) { request1 := CreateBasicChatRequest(test.prompt1, 0.1, 50) _, err1 := setup.Client.ChatCompletionRequest(ctx, request1) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } // Wait for cache to be written @@ -558,7 +575,7 @@ func TestSemanticSimilarityEdgeCases(t *testing.T) { if semanticMatch { t.Logf("✅ Test %d: Semantic match found as expected for '%s'", i+1, test.description) } else { - t.Logf("ℹ️ Test %d: No semantic match found for '%s', check with threshold: %f and found similarity: %f", i+1, test.description, cacheThresholdFloat, cacheSimilarityFloat) + t.Errorf("❌ Test %d: Expected semantic match for '%s' but none found (threshold=%f, similarity=%f)", i+1, test.description, cacheThresholdFloat, cacheSimilarityFloat) } } else { if semanticMatch { @@ -573,6 +590,7 @@ func TestSemanticSimilarityEdgeCases(t *testing.T) { // TestErrorHandlingEdgeCases tests various error scenarios func TestErrorHandlingEdgeCases(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() @@ -580,23 +598,33 @@ func TestErrorHandlingEdgeCases(t *testing.T) { // Test without cache key (should not crash and bypass cache) t.Run("Request without cache key", func(t *testing.T) { - ctxNoKey := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + ctxNoKey := newBaseTestContext() - response, err := setup.Client.ChatCompletionRequest(ctxNoKey, testRequest) + response1, err := setup.Client.ChatCompletionRequest(ctxNoKey, testRequest) if err != nil { t.Errorf("Request without cache key failed: %v", err) return } + AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) - // Should bypass cache since there's no cache key - AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response}) - t.Log("✅ Request without cache key correctly bypassed cache") + WaitForCache(setup.Plugin) + + // Second identical request must also miss — proves the first wasn't + // silently cached against a default key. + ctxNoKey2 := newBaseTestContext() + response2, err := setup.Client.ChatCompletionRequest(ctxNoKey2, testRequest) + if err != nil { + t.Errorf("Second request without cache key failed: %v", err) + return + } + AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}) + t.Log("✅ Request without cache key correctly bypassed cache (verified across two calls)") }) // Test with invalid cache key type t.Run("Request with invalid cache key type", func(t *testing.T) { // First establish a cached response with valid context - validCtx := CreateContextWithCacheKey("error-handling-test") + validCtx := CreateContextWithCacheKey(t, "error-handling-test") _, err := setup.Client.ChatCompletionRequest(validCtx, testRequest) if err != nil { t.Fatalf("First request with valid cache key failed: %v", err) @@ -605,7 +633,7 @@ func TestErrorHandlingEdgeCases(t *testing.T) { WaitForCache(setup.Plugin) // Now test with invalid key type - should bypass cache - ctxInvalidKey := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline).WithValue(CacheKey, 12345) + ctxInvalidKey := newBaseTestContext().WithValue(CacheKey, 12345) response, err := setup.Client.ChatCompletionRequest(ctxInvalidKey, testRequest) if err != nil { diff --git a/plugins/semanticcache/plugin_embedding_test.go b/plugins/semanticcache/plugin_embedding_test.go index c5487a8510..e42f71c63c 100644 --- a/plugins/semanticcache/plugin_embedding_test.go +++ b/plugins/semanticcache/plugin_embedding_test.go @@ -9,10 +9,11 @@ import ( // TestEmbeddingRequestsCaching tests that embedding requests are properly cached using direct hash matching func TestEmbeddingRequestsCaching(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("test-embedding-cache") + ctx := CreateContextWithCacheKey(t, "test-embedding-cache") // Create embedding request embeddingRequest := CreateEmbeddingRequest([]string{ @@ -28,7 +29,7 @@ func TestEmbeddingRequestsCaching(t *testing.T) { duration1 := time.Since(start1) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } if response1 == nil || len(response1.Data) == 0 { @@ -76,33 +77,48 @@ func TestEmbeddingRequestsCaching(t *testing.T) { // TestEmbeddingRequestsNoCacheWithoutCacheKey tests that embedding requests without cache key are not cached func TestEmbeddingRequestsNoCacheWithoutCacheKey(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - // Don't set cache key in context - ctx := CreateContextWithCacheKey("") + // Don't set cache key in context. CreateContextWithCacheKey(t, "") would + // still populate CacheKey from t.Name() and turn this into a keyed + // request — using a base context keeps CacheKey unset so we exercise + // the cache-disabled path. + ctx := newBaseTestContext() embeddingRequest := CreateEmbeddingRequest([]string{"Test embedding without cache key"}) - t.Log("Making embedding request without cache key...") - - response, err := setup.Client.EmbeddingRequest(ctx, embeddingRequest) + t.Log("Making first embedding request without cache key...") + response1, err := setup.Client.EmbeddingRequest(ctx, embeddingRequest) if err != nil { t.Fatalf("Embedding request failed: %v", err) } + AssertNoCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response1}) + + WaitForCache(setup.Plugin) - // Should not be cached - AssertNoCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response}) + // Real check: a second identical request must ALSO miss. If the cache + // silently keyed off something else (e.g. a default key), this would + // surface as a hit and fail the assertion. + t.Log("Making second identical request — must also miss because nothing was cached...") + ctx2 := newBaseTestContext() + response2, err := setup.Client.EmbeddingRequest(ctx2, embeddingRequest) + if err != nil { + t.Fatalf("Second embedding request failed: %v", err) + } + AssertNoCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response2}) t.Log("✅ Embedding requests without cache key are properly not cached") } // TestEmbeddingRequestsDifferentTexts tests that different embedding texts produce different cache entries func TestEmbeddingRequestsDifferentTexts(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("test-embedding-different") + ctx := CreateContextWithCacheKey(t, "test-embedding-different") // Create two different embedding requests request1 := CreateEmbeddingRequest([]string{"First set of texts"}) @@ -111,7 +127,7 @@ func TestEmbeddingRequestsDifferentTexts(t *testing.T) { t.Log("Making first embedding request...") response1, err1 := setup.Client.EmbeddingRequest(ctx, request1) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response1}) @@ -120,7 +136,7 @@ func TestEmbeddingRequestsDifferentTexts(t *testing.T) { t.Log("Making second different embedding request...") response2, err2 := setup.Client.EmbeddingRequest(ctx, request2) if err2 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err2) } // Should not be a cache hit since texts are different AssertNoCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response2}) @@ -130,19 +146,20 @@ func TestEmbeddingRequestsDifferentTexts(t *testing.T) { // TestEmbeddingRequestsCacheExpiration tests TTL functionality for embedding requests func TestEmbeddingRequestsCacheExpiration(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() // Set very short TTL for testing shortTTL := 5 * time.Second - ctx := CreateContextWithCacheKeyAndTTL("test-embedding-ttl", shortTTL) + ctx := CreateContextWithCacheKeyAndTTL(t, "test-embedding-ttl", shortTTL) embeddingRequest := CreateEmbeddingRequest([]string{"TTL test embedding"}) t.Log("Making first embedding request with short TTL...") response1, err1 := setup.Client.EmbeddingRequest(ctx, embeddingRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response1}) @@ -160,12 +177,15 @@ func TestEmbeddingRequestsCacheExpiration(t *testing.T) { AssertCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response2}, "direct") t.Logf("Waiting for TTL expiration (%v)...", shortTTL) - time.Sleep(shortTTL + 1*time.Second) // Wait for TTL to expire + // expires_at is stored at second-precision Unix(); a 1s buffer can land + // on the same boundary as the entry's expiry under load. 2s is the + // minimum margin that's robust to seconds-level rounding + a slow CI. + time.Sleep(shortTTL + 2*time.Second) t.Log("Making third request after TTL expiration...") response3, err3 := setup.Client.EmbeddingRequest(ctx, embeddingRequest) if err3 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err3) } // Should not be a cache hit since TTL expired AssertNoCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response3}) diff --git a/plugins/semanticcache/plugin_image_generation_test.go b/plugins/semanticcache/plugin_image_generation_test.go index a65c06e81b..c6dee8d347 100644 --- a/plugins/semanticcache/plugin_image_generation_test.go +++ b/plugins/semanticcache/plugin_image_generation_test.go @@ -10,6 +10,10 @@ import ( // TestImageGenerationCacheBasicFunctionality tests basic image generation caching func TestImageGenerationCacheBasicFunctionality(t *testing.T) { + if testing.Short() { + t.Skipf("skipping %s in -short mode (gpt-image-1 calls take ~15-65s)", "TestImageGenerationCacheBasicFunctionality") + } + t.Parallel() if testing.Short() { t.Skip("skipping integration test in -short mode") } @@ -19,7 +23,7 @@ func TestImageGenerationCacheBasicFunctionality(t *testing.T) { setup := NewTestSetup(t) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("test-image-gen-value") + ctx := CreateContextWithCacheKey(t, "test-image-gen-value") // Create test image generation request testRequest := CreateImageGenerationRequest( @@ -116,6 +120,10 @@ func TestImageGenerationCacheBasicFunctionality(t *testing.T) { // TestImageGenerationSemanticSearch tests semantic similarity search for image generation func TestImageGenerationSemanticSearch(t *testing.T) { + if testing.Short() { + t.Skipf("skipping %s in -short mode (gpt-image-1 calls take ~15-65s)", "TestImageGenerationSemanticSearch") + } + t.Parallel() if testing.Short() { t.Skip("skipping integration test in -short mode") } @@ -132,7 +140,7 @@ func TestImageGenerationSemanticSearch(t *testing.T) { setup := NewTestSetupWithConfig(t, config) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("image-semantic-test-value") + ctx := CreateContextWithCacheKey(t, "image-semantic-test-value") // First request - this will be cached firstRequest := CreateImageGenerationRequest( @@ -234,6 +242,10 @@ func TestImageGenerationSemanticSearch(t *testing.T) { // TestImageGenerationDifferentParameters tests that different parameters are cached separately func TestImageGenerationDifferentParameters(t *testing.T) { + if testing.Short() { + t.Skipf("skipping %s in -short mode (gpt-image-1 calls take ~15-65s)", "TestImageGenerationDifferentParameters") + } + t.Parallel() if testing.Short() { t.Skip("skipping integration test in -short mode") } @@ -243,7 +255,7 @@ func TestImageGenerationDifferentParameters(t *testing.T) { setup := NewTestSetup(t) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("image-params-test") + ctx := CreateContextWithCacheKey(t, "image-params-test") basePrompt := "A cute cat sitting on a windowsill" @@ -292,6 +304,10 @@ func TestImageGenerationDifferentParameters(t *testing.T) { // TestImageGenerationStreamCaching tests streaming image generation caching func TestImageGenerationStreamCaching(t *testing.T) { + if testing.Short() { + t.Skipf("skipping %s in -short mode (gpt-image-1 calls take ~15-65s)", "TestImageGenerationStreamCaching") + } + t.Parallel() if testing.Short() { t.Skip("skipping integration test in -short mode") } @@ -301,7 +317,7 @@ func TestImageGenerationStreamCaching(t *testing.T) { setup := NewTestSetup(t) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("image-stream-test") + ctx := CreateContextWithCacheKey(t, "image-stream-test") // Create test image generation request testRequest := CreateImageGenerationRequest( diff --git a/plugins/semanticcache/plugin_integration_test.go b/plugins/semanticcache/plugin_integration_test.go index 58ab9d04c3..c153928972 100644 --- a/plugins/semanticcache/plugin_integration_test.go +++ b/plugins/semanticcache/plugin_integration_test.go @@ -1,7 +1,6 @@ package semanticcache import ( - "context" "strings" "testing" "time" @@ -13,11 +12,12 @@ import ( // TestSemanticCacheBasicFlow tests the complete semantic cache flow func TestSemanticCacheBasicFlow(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) - ctx.SetValue(CacheKey, "test-cache-enabled") + ctx := newBaseTestContext() + ctx.SetValue(CacheKey, keyForTest(t, "test-cache-enabled")) // Test request request := &schemas.BifrostRequest{ @@ -107,8 +107,8 @@ func TestSemanticCacheBasicFlow(t *testing.T) { t.Log("Testing second identical request (expecting cache hit)...") // Reset context for second request - ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) - ctx2.SetValue(CacheKey, "test-cache-enabled") + ctx2 := newBaseTestContext() + ctx2.SetValue(CacheKey, keyForTest(t, "test-cache-enabled")) modifiedReq2, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request) if err != nil { @@ -158,11 +158,12 @@ func TestSemanticCacheBasicFlow(t *testing.T) { // TestSemanticCacheStrictFiltering tests that the cache respects parameter differences func TestSemanticCacheStrictFiltering(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) - ctx.SetValue(CacheKey, "test-cache-enabled") + ctx := newBaseTestContext() + ctx.SetValue(CacheKey, keyForTest(t, "test-cache-enabled")) // Base request baseRequest := &schemas.BifrostRequest{ @@ -231,8 +232,8 @@ func TestSemanticCacheStrictFiltering(t *testing.T) { // Second request with different temperature - should be cache miss t.Log("Testing second request with temperature=0.5 (expecting cache miss)...") - ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) - ctx2.SetValue(CacheKey, "test-cache-enabled") + ctx2 := newBaseTestContext() + ctx2.SetValue(CacheKey, keyForTest(t, "test-cache-enabled")) modifiedRequest := &schemas.BifrostRequest{ RequestType: schemas.ChatCompletionRequest, @@ -268,8 +269,8 @@ func TestSemanticCacheStrictFiltering(t *testing.T) { // Third request with different model - should be cache miss t.Log("Testing third request with different model (expecting cache miss)...") - ctx3 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) - ctx3.SetValue(CacheKey, "test-cache-enabled") + ctx3 := newBaseTestContext() + ctx3.SetValue(CacheKey, keyForTest(t, "test-cache-enabled")) modifiedRequest2 := &schemas.BifrostRequest{ RequestType: schemas.ChatCompletionRequest, @@ -306,11 +307,12 @@ func TestSemanticCacheStrictFiltering(t *testing.T) { // TestSemanticCacheStreamingFlow tests streaming response caching func TestSemanticCacheStreamingFlow(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) - ctx.SetValue(CacheKey, "test-cache-enabled") + ctx := newBaseTestContext() + ctx.SetValue(CacheKey, keyForTest(t, "test-cache-enabled")) request := &schemas.BifrostRequest{ RequestType: schemas.ChatCompletionStreamRequest, @@ -356,10 +358,20 @@ func TestSemanticCacheStreamingFlow(t *testing.T) { for i, chunk := range chunks { var finishReason *string - if i == len(chunks)-1 { + isFinal := i == len(chunks)-1 + if isFinal { finishReason = bifrost.Ptr("stop") } + // Bifrost's stream pipeline sets this on the final chunk before + // invoking PostLLMHook (see core/bifrost.go where it stamps + // BifrostContextKeyStreamEndIndicator=true). The cache plugin's + // PostLLMHook flushes the accumulator only when IsFinalChunk(ctx) + // returns true, so a hand-rolled stream simulation must mirror + // that — otherwise the entry is never written and the second + // request misses. + ctx.SetValue(schemas.BifrostContextKeyStreamEndIndicator, isFinal) + chunkResponse := &schemas.BifrostResponse{ ChatResponse: &schemas.BifrostChatResponse{ ID: uuid.New().String(), @@ -395,8 +407,8 @@ func TestSemanticCacheStreamingFlow(t *testing.T) { // Test cache retrieval for streaming t.Log("Testing streaming cache retrieval...") - ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) - ctx2.SetValue(CacheKey, "test-cache-enabled") + ctx2 := newBaseTestContext() + ctx2.SetValue(CacheKey, keyForTest(t, "test-cache-enabled")) _, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request) if err != nil { @@ -404,10 +416,8 @@ func TestSemanticCacheStreamingFlow(t *testing.T) { } if shortCircuit2 == nil { - t.Log("⚠️ Expected streaming cache hit, but got cache miss - this may be expected with the new unified storage") - return + t.Fatal("expected streaming cache hit on identical second request after the first stream was fully accumulated and stored") } - if shortCircuit2.Stream == nil { t.Fatal("Cache hit but stream is nil") } @@ -434,12 +444,13 @@ func TestSemanticCacheStreamingFlow(t *testing.T) { // TestSemanticCache_NoCacheWhenKeyMissing verifies cache is disabled when cache key is missing from context func TestSemanticCache_NoCacheWhenKeyMissing(t *testing.T) { + t.Parallel() t.Log("Testing cache behavior when cache key is missing...") setup := NewTestSetup(t) defer setup.Cleanup() - ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + ctx := newBaseTestContext() // Don't set the cache key - cache should be disabled request := &schemas.BifrostRequest{ @@ -473,12 +484,13 @@ func TestSemanticCache_NoCacheWhenKeyMissing(t *testing.T) { // TestSemanticCache_CustomTTLHandling verifies cache respects custom TTL values from context func TestSemanticCache_CustomTTLHandling(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() // Configure plugin with custom TTL key - ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) - ctx.SetValue(CacheKey, "test-cache-enabled") + ctx := newBaseTestContext() + ctx.SetValue(CacheKey, keyForTest(t, "test-cache-enabled")) ctx.SetValue(CacheTTLKey, 1*time.Minute) // Custom TTL request := &schemas.BifrostRequest{ @@ -538,20 +550,37 @@ func TestSemanticCache_CustomTTLHandling(t *testing.T) { WaitForCache(setup.Plugin) - t.Log("✅ Custom TTL configuration test passed!") + // Read back: a second identical request must hit AND the entry's TTL + // must reflect the per-request override (1 minute), not the plugin + // default (5 minutes). expires_at is exposed via cache_debug isn't + // directly readable, but we can confirm the entry is present. + ctx2 := newBaseTestContext() + ctx2.SetValue(CacheKey, keyForTest(t, "test-cache-enabled")) + ctx2.SetValue(CacheTTLKey, 1*time.Minute) + _, sc2, err := setup.Plugin.PreLLMHook(ctx2, request) + if err != nil { + t.Fatalf("Second PreLLMHook failed: %v", err) + } + if sc2 == nil || sc2.Response == nil { + t.Fatal("expected cache hit on second identical request with custom TTL") + } + if cd := sc2.Response.GetExtraFields().CacheDebug; cd == nil || !cd.CacheHit { + t.Fatal("expected CacheDebug.CacheHit=true on hit") + } + t.Log("✅ Custom TTL configuration test passed (entry written and retrievable)") } // TestSemanticCache_CustomThresholdHandling verifies cache respects custom similarity threshold from context func TestSemanticCache_CustomThresholdHandling(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - // Configure plugin with custom threshold key - ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) - ctx.SetValue(CacheKey, "test-cache-enabled") - ctx.SetValue(CacheThresholdKey, 0.95) // Very high threshold - - request := &schemas.BifrostRequest{ + // Seed an entry with the DEFAULT threshold (0.8) so a follow-up + // request can attempt semantic search against it. + seedCtx := newBaseTestContext() + seedCtx.SetValue(CacheKey, keyForTest(t, "threshold-seed")) + seedReq := &schemas.BifrostRequest{ RequestType: schemas.ChatCompletionRequest, ChatRequest: &schemas.BifrostChatRequest{ Provider: schemas.OpenAI, @@ -567,21 +596,57 @@ func TestSemanticCache_CustomThresholdHandling(t *testing.T) { }, } - // Test that custom threshold is used (this would need semantic search to be fully testable) - _, shortCircuit, err := setup.Plugin.PreLLMHook(ctx, request) + _, sc1, err := setup.Plugin.PreLLMHook(seedCtx, seedReq) if err != nil { - t.Fatalf("PreLLMHook failed: %v", err) + t.Fatalf("seed PreLLMHook failed: %v", err) } - - if shortCircuit != nil { - t.Fatal("Expected cache miss with high threshold, but got cache hit") + if sc1 != nil { + t.Fatal("Expected initial cache miss") + } + seedRes := &schemas.BifrostResponse{ + ChatResponse: &schemas.BifrostChatResponse{ + ID: "threshold-test", + Choices: []schemas.BifrostResponseChoice{{ + ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{ + Message: &schemas.ChatMessage{ + Role: "assistant", + Content: &schemas.ChatMessageContent{ContentStr: bifrost.Ptr("seed response")}, + }, + }, + }}, + ExtraFields: schemas.BifrostResponseExtraFields{ + Provider: schemas.OpenAI, OriginalModelRequested: "gpt-4o-mini", RequestType: schemas.ChatCompletionRequest, + }, + }, } + if _, _, err := setup.Plugin.PostLLMHook(seedCtx, seedRes, nil); err != nil { + t.Fatalf("seed PostLLMHook failed: %v", err) + } + WaitForCache(setup.Plugin) - t.Log("✅ Custom threshold configuration test passed!") + // Identical-content request with a HIGH threshold (0.95) MUST still hit + // via the direct path (direct hashing ignores threshold). Threshold only + // gates semantic search; a same-input request matches the deterministic + // directCacheID regardless. This proves the override doesn't break direct. + hitCtx := newBaseTestContext() + hitCtx.SetValue(CacheKey, keyForTest(t, "threshold-seed")) + hitCtx.SetValue(CacheThresholdKey, 0.95) + _, sc2, err := setup.Plugin.PreLLMHook(hitCtx, seedReq) + if err != nil { + t.Fatalf("hit PreLLMHook failed: %v", err) + } + if sc2 == nil || sc2.Response == nil { + t.Fatal("expected direct cache hit even with high threshold (direct ignores threshold)") + } + if cd := sc2.Response.GetExtraFields().CacheDebug; cd == nil || cd.HitType == nil || *cd.HitType != string(CacheTypeDirect) { + t.Fatalf("expected hit_type=direct, got cache_debug=%+v", cd) + } + t.Log("✅ Custom threshold override tracked through PreLLMHook without breaking direct path") } // TestSemanticCache_ProviderModelCachingFlags verifies cache behavior with provider/model caching flags func TestSemanticCache_ProviderModelCachingFlags(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() @@ -589,8 +654,8 @@ func TestSemanticCache_ProviderModelCachingFlags(t *testing.T) { setup.Config.CacheByProvider = bifrost.Ptr(false) setup.Config.CacheByModel = bifrost.Ptr(false) - ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) - ctx.SetValue(CacheKey, "test-cache-enabled") + ctx := newBaseTestContext() + ctx.SetValue(CacheKey, keyForTest(t, "test-cache-enabled")) request1 := &schemas.BifrostRequest{ RequestType: schemas.ChatCompletionRequest, @@ -666,29 +731,36 @@ func TestSemanticCache_ProviderModelCachingFlags(t *testing.T) { }, } - ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) - ctx2.SetValue(CacheKey, "test-cache-enabled") + ctx2 := newBaseTestContext() + ctx2.SetValue(CacheKey, keyForTest(t, "test-cache-enabled")) _, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request2) if err != nil { t.Fatalf("Second PreLLMHook failed: %v", err) } - // With provider/model caching disabled, we might get cache hits across different providers/models - // This behavior depends on the exact implementation of hash generation - t.Logf("Cache behavior with disabled provider/model flags: hit=%v", shortCircuit2 != nil) - - t.Log("✅ Provider/model caching flags test passed!") + // CacheByProvider=false + CacheByModel=false means provider and model are + // stripped from the directCacheID input. Same content + same cache_key + // must produce the SAME directCacheID, so the second request MUST hit + // even though it specifies a completely different provider/model. + if shortCircuit2 == nil || shortCircuit2.Response == nil { + t.Fatal("expected cache hit across providers/models when CacheByProvider+CacheByModel=false") + } + if cd := shortCircuit2.Response.GetExtraFields().CacheDebug; cd == nil || !cd.CacheHit { + t.Fatalf("expected CacheDebug.CacheHit=true, got %+v", cd) + } + t.Log("✅ CacheByProvider=false + CacheByModel=false correctly shares entries across providers/models") } // TestSemanticCache_ConfigurationEdgeCases verifies edge cases in configuration handling func TestSemanticCache_ConfigurationEdgeCases(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() // Test with invalid TTL type in context - ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) - ctx.SetValue(CacheKey, "test-cache-enabled") + ctx := newBaseTestContext() + ctx.SetValue(CacheKey, keyForTest(t, "test-cache-enabled")) ctx.SetValue(CacheTTLKey, "not-a-duration") // Invalid TTL type request := &schemas.BifrostRequest{ @@ -712,25 +784,63 @@ func TestSemanticCache_ConfigurationEdgeCases(t *testing.T) { if err != nil { t.Fatalf("PreLLMHook failed with invalid TTL: %v", err) } - if shortCircuit != nil { t.Fatal("Unexpected cache hit with invalid TTL") } - // Test with invalid threshold type - ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) - ctx2.SetValue(CacheKey, "test-cache-enabled") - ctx2.SetValue(CacheThresholdKey, "not-a-float") // Invalid threshold type + // Plugin must FALL BACK to its default TTL — verify by writing then + // reading the entry. If the invalid TTL caused caching to silently + // disable, the second request would miss. + res := &schemas.BifrostResponse{ + ChatResponse: &schemas.BifrostChatResponse{ + ID: "edge-ttl", + Choices: []schemas.BifrostResponseChoice{{ + ChatNonStreamResponseChoice: &schemas.ChatNonStreamResponseChoice{ + Message: &schemas.ChatMessage{Role: "assistant", Content: &schemas.ChatMessageContent{ContentStr: bifrost.Ptr("ok")}}, + }, + }}, + ExtraFields: schemas.BifrostResponseExtraFields{Provider: schemas.OpenAI, OriginalModelRequested: "gpt-4o-mini", RequestType: schemas.ChatCompletionRequest}, + }, + } + if _, _, err := setup.Plugin.PostLLMHook(ctx, res, nil); err != nil { + t.Fatalf("PostLLMHook failed: %v", err) + } + WaitForCache(setup.Plugin) - // Should handle invalid threshold gracefully - _, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request) + ctxRead := newBaseTestContext() + ctxRead.SetValue(CacheKey, keyForTest(t, "test-cache-enabled")) + ctxRead.SetValue(CacheTTLKey, "not-a-duration") + if _, sc, err := setup.Plugin.PreLLMHook(ctxRead, request); err != nil { + t.Fatalf("read PreLLMHook failed: %v", err) + } else if sc == nil { + t.Fatal("expected cache hit — invalid TTL should have fallen back to default and entry should be retrievable") + } + + // Test with invalid threshold type — same expectation: fallback works. + ctx2 := newBaseTestContext() + ctx2.SetValue(CacheKey, keyForTest(t, "test-cache-threshold-edge")) + ctx2.SetValue(CacheThresholdKey, "not-a-float") + + _, sc2, err := setup.Plugin.PreLLMHook(ctx2, request) if err != nil { t.Fatalf("PreLLMHook failed with invalid threshold: %v", err) } + if sc2 != nil { + t.Fatal("Unexpected cache hit on first call with invalid threshold") + } + if _, _, err := setup.Plugin.PostLLMHook(ctx2, res, nil); err != nil { + t.Fatalf("PostLLMHook failed: %v", err) + } + WaitForCache(setup.Plugin) - if shortCircuit2 != nil { - t.Fatal("Unexpected cache hit with invalid threshold") + ctx2Read := newBaseTestContext() + ctx2Read.SetValue(CacheKey, keyForTest(t, "test-cache-threshold-edge")) + ctx2Read.SetValue(CacheThresholdKey, "still-not-a-float") + if _, sc, err := setup.Plugin.PreLLMHook(ctx2Read, request); err != nil { + t.Fatalf("threshold read PreLLMHook failed: %v", err) + } else if sc == nil { + t.Fatal("expected cache hit — invalid threshold should have fallen back to default") } - t.Log("✅ Configuration edge cases test passed!") + t.Log("✅ Configuration edge cases test passed (invalid TTL/threshold fall back gracefully)") } diff --git a/plugins/semanticcache/plugin_nil_content_test.go b/plugins/semanticcache/plugin_nil_content_test.go index db34034458..8337beb943 100644 --- a/plugins/semanticcache/plugin_nil_content_test.go +++ b/plugins/semanticcache/plugin_nil_content_test.go @@ -1,6 +1,7 @@ package semanticcache import ( + "strings" "testing" bifrost "github.com/maximhq/bifrost/core" @@ -87,18 +88,33 @@ func TestExtractTextForEmbedding_NilContent(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - // This should not panic - text, hash, err := plugin.extractTextForEmbedding(tt.request) - // We don't care about the error — the important thing is no panic - t.Logf("text=%q, hash=%q, err=%v", text, hash, err) + // Primary contract: must not panic on nil-content messages. + // Secondary: returned text must not contain stringification + // artifacts, and the all-nil case must surface as an error. + text, err := plugin.extractTextForEmbedding(nil, tt.request) + if strings.Contains(text, "") || strings.Contains(text, "%!") { + t.Fatalf("extractTextForEmbedding produced a stringification artifact: %q", text) + } + if tt.name == "ChatRequest where all messages have nil Content" { + if err == nil { + t.Fatalf("expected error when no message has text content, got text=%q", text) + } + if text != "" { + t.Fatalf("expected empty text when all content is nil, got %q", text) + } + } }) } } -func TestPrepareDirectCacheLookup_ResponsesStreamRequest(t *testing.T) { +// TestPreLLMHookSeedsDirectCacheIDForResponsesStream verifies the streaming +// Responses path runs through PreLLMHook → performDirectSearch and stamps a +// deterministic DirectCacheID on the per-request cacheState. +func TestPreLLMHookSeedsDirectCacheIDForResponsesStream(t *testing.T) { plugin := &Plugin{ config: getDefaultTestConfig(), logger: bifrost.NewDefaultLogger(schemas.LogLevelDebug), + store: newDirectFastPathStore(), } req := &schemas.BifrostRequest{ @@ -106,26 +122,32 @@ func TestPrepareDirectCacheLookup_ResponsesStreamRequest(t *testing.T) { ResponsesRequest: CreateStreamingResponsesRequest("Explain cache invalidation", 0.2, 200), } - ctx := CreateContextWithCacheKey("responses-stream-direct") - directID, err := plugin.prepareDirectCacheLookup(ctx, req, "responses-stream-direct") - if err != nil { - t.Fatalf("prepareDirectCacheLookup failed: %v", err) + ctx := CreateContextWithCacheKeyAndType(t, "responses-stream-direct", CacheTypeDirect) + if _, _, err := plugin.PreLLMHook(ctx, req); err != nil { + t.Fatalf("PreLLMHook failed: %v", err) } - if directID == "" { - t.Fatal("expected deterministic direct cache id") + + requestID, _ := ctx.Value(schemas.BifrostContextKeyRequestID).(string) + state := plugin.getCacheState(requestID) + if state == nil { + t.Fatal("expected cache state to be created") } - if got, _ := ctx.Value(requestHashKey).(string); got == "" { - t.Fatal("expected request hash to be stored in context") + if state.DirectCacheID == "" { + t.Fatal("expected DirectCacheID to be populated by direct search") } - if got, _ := ctx.Value(requestParamsHashKey).(string); got == "" { - t.Fatal("expected params hash to be stored in context") + if state.ParamsHash == "" { + t.Fatal("expected ParamsHash to be populated") } } -func TestPrepareDirectCacheLookup_UnsupportedRequestTypeFailsClosed(t *testing.T) { +// TestPreLLMHookFailsClosedForUnsupportedRequestType verifies the plugin +// short-circuits early for unsupported request types and never populates +// state fields that downstream caching logic would read. +func TestPreLLMHookFailsClosedForUnsupportedRequestType(t *testing.T) { plugin := &Plugin{ config: getDefaultTestConfig(), logger: bifrost.NewDefaultLogger(schemas.LogLevelDebug), + store: newDirectFastPathStore(), } req := &schemas.BifrostRequest{ @@ -138,29 +160,36 @@ func TestPrepareDirectCacheLookup_UnsupportedRequestTypeFailsClosed(t *testing.T }, } - ctx := CreateContextWithCacheKey("unsupported-direct") - directID, err := plugin.prepareDirectCacheLookup(ctx, req, "unsupported-direct") - if err == nil { - t.Fatal("expected prepareDirectCacheLookup to reject unsupported request type") - } - if directID != "" { - t.Fatalf("expected no direct cache id, got %q", directID) - } - if got, _ := ctx.Value(requestHashKey).(string); got != "" { - t.Fatalf("expected request hash to remain unset, got %q", got) - } - if got, _ := ctx.Value(requestParamsHashKey).(string); got != "" { - t.Fatalf("expected params hash to remain unset, got %q", got) + ctx := CreateContextWithCacheKey(t, "unsupported-direct") + if _, shortCircuit, err := plugin.PreLLMHook(ctx, req); err != nil || shortCircuit != nil { + t.Fatalf("PreLLMHook unexpected: shortCircuit=%v err=%v", shortCircuit, err) } - if got, _ := ctx.Value(requestStorageIDKey).(string); got != "" { - t.Fatalf("expected storage id to remain unset, got %q", got) + + requestID, _ := ctx.Value(schemas.BifrostContextKeyRequestID).(string) + state := plugin.getCacheState(requestID) + // Unsupported types create the state slot (reset happens up front) but + // never populate the caching fields. + if state != nil { + if state.DirectCacheID != "" { + t.Fatalf("expected DirectCacheID unset, got %q", state.DirectCacheID) + } + if state.ParamsHash != "" { + t.Fatalf("expected ParamsHash unset, got %q", state.ParamsHash) + } + if state.Embeddings != nil { + t.Fatalf("expected Embeddings unset, got %v", state.Embeddings) + } } } +// TestPreLLMHookSkipsUnsupportedCountTokensRequest verifies CountTokensRequest +// (which is not in the supported set) flows through PreLLMHook without +// short-circuiting and without populating cache fields. func TestPreLLMHookSkipsUnsupportedCountTokensRequest(t *testing.T) { plugin := &Plugin{ config: getDefaultTestConfig(), logger: bifrost.NewDefaultLogger(schemas.LogLevelDebug), + store: newDirectFastPathStore(), } req := &schemas.BifrostRequest{ @@ -179,18 +208,7 @@ func TestPreLLMHookSkipsUnsupportedCountTokensRequest(t *testing.T) { }, } - ctx := CreateContextWithCacheKey("count-tokens-test") - ctx.SetValue(requestIDKey, "stale-request-id") - ctx.SetValue(requestStorageIDKey, "stale-storage-id") - ctx.SetValue(requestHashKey, "stale-request-hash") - ctx.SetValue(requestParamsHashKey, "stale-params-hash") - ctx.SetValue(requestModelKey, "stale-model") - ctx.SetValue(requestProviderKey, schemas.OpenAI) - ctx.SetValue(requestEmbeddingKey, []float32{1, 2, 3}) - ctx.SetValue(requestEmbeddingTokensKey, 99) - ctx.SetValue(isCacheHitKey, true) - ctx.SetValue(cacheHitTypeKey, CacheTypeDirect) - + ctx := CreateContextWithCacheKey(t, "count-tokens-test") modifiedReq, shortCircuit, err := plugin.PreLLMHook(ctx, req) if err != nil { t.Fatalf("PreLLMHook failed: %v", err) @@ -201,35 +219,12 @@ func TestPreLLMHookSkipsUnsupportedCountTokensRequest(t *testing.T) { if shortCircuit != nil { t.Fatal("expected no short-circuit for unsupported count tokens request") } - if got, _ := ctx.Value(requestIDKey).(string); got != "" { - t.Fatalf("expected requestIDKey to remain unset, got %q", got) - } - if got, _ := ctx.Value(requestHashKey).(string); got != "" { - t.Fatalf("expected requestHashKey to remain unset, got %q", got) - } - if got, _ := ctx.Value(requestParamsHashKey).(string); got != "" { - t.Fatalf("expected requestParamsHashKey to remain unset, got %q", got) - } - if got, _ := ctx.Value(requestStorageIDKey).(string); got != "" { - t.Fatalf("expected requestStorageIDKey to remain unset, got %q", got) - } - if got, _ := ctx.Value(requestModelKey).(string); got != "" { - t.Fatalf("expected requestModelKey to remain unset, got %q", got) - } - if got, ok := ctx.Value(requestProviderKey).(schemas.ModelProvider); ok && got != "" { - t.Fatalf("expected requestProviderKey to remain unset, got %q", got) - } - if got := ctx.Value(requestEmbeddingKey); got != nil { - t.Fatalf("expected requestEmbeddingKey to remain unset, got %#v", got) - } - if got, ok := ctx.Value(requestEmbeddingTokensKey).(int); ok && got != 0 { - t.Fatalf("expected requestEmbeddingTokensKey to remain unset, got %d", got) - } - if got, ok := ctx.Value(isCacheHitKey).(bool); ok && got { - t.Fatal("expected isCacheHitKey to remain unset") - } - if got, ok := ctx.Value(cacheHitTypeKey).(CacheType); ok && got != "" { - t.Fatalf("expected cacheHitTypeKey to remain unset, got %q", got) + + requestID, _ := ctx.Value(schemas.BifrostContextKeyRequestID).(string) + if state := plugin.getCacheState(requestID); state != nil { + if state.DirectCacheID != "" || state.ParamsHash != "" || state.Embeddings != nil { + t.Fatalf("expected unsupported request to leave state empty, got %+v", state) + } } } @@ -276,9 +271,19 @@ func TestGetNormalizedInputForCaching_NilContent(t *testing.T) { }, } - // This should not panic + // Must not panic, and must return a non-nil filtered messages slice + // of the right element type (we built a ChatCompletionRequest). result := plugin.getNormalizedInputForCaching(request) - t.Logf("result type: %T", result) + if result == nil { + t.Fatal("getNormalizedInputForCaching returned nil for a valid Chat request") + } + msgs, ok := result.([]schemas.ChatMessage) + if !ok { + t.Fatalf("expected []schemas.ChatMessage, got %T", result) + } + if len(msgs) != len(request.ChatRequest.Input) { + t.Fatalf("normalized message count %d differs from input %d (filtering changed unexpectedly)", len(msgs), len(request.ChatRequest.Input)) + } } // createResponsesRequestWithNilContent builds a BifrostResponsesRequest with a nil Content message for testing. diff --git a/plugins/semanticcache/plugin_no_mutation_test.go b/plugins/semanticcache/plugin_no_mutation_test.go new file mode 100644 index 0000000000..340b4fdd9a --- /dev/null +++ b/plugins/semanticcache/plugin_no_mutation_test.go @@ -0,0 +1,198 @@ +package semanticcache + +import ( + "context" + "encoding/json" + "os" + "reflect" + "sync" + "testing" + + bifrost "github.com/maximhq/bifrost/core" + "github.com/maximhq/bifrost/core/schemas" + "github.com/maximhq/bifrost/framework/vectorstore" +) + +// requestCapturer is an LLMPlugin that records the request it sees in +// PreLLMHook. Placed AFTER semantic_cache in the plugin chain it observes +// the request post-cache-plugin-mutation; we then assert that nothing +// landed in the request that originated from cache-side normalization +// (lowercase, whitespace-trim, system-prompt filtering, etc.). +// +// This complements the in-process unit tests because those exercise the +// helpers that DO normalize (getNormalizedInputForCaching) — what we want +// here is a contract test on the request that flows downstream. +type requestCapturer struct { + mu sync.Mutex + captured *schemas.BifrostRequest +} + +func (p *requestCapturer) GetName() string { return "test-request-capturer" } +func (p *requestCapturer) Cleanup() error { return nil } + +func (p *requestCapturer) PreLLMHook(ctx *schemas.BifrostContext, req *schemas.BifrostRequest) (*schemas.BifrostRequest, *schemas.LLMPluginShortCircuit, error) { + p.mu.Lock() + // Snapshot the request via JSON round-trip so any later mutation by the + // pipeline (none expected, but be defensive) can't retroactively change + // what the test sees. + data, err := json.Marshal(req) + if err == nil { + var snapshot schemas.BifrostRequest + if jerr := json.Unmarshal(data, &snapshot); jerr == nil { + p.captured = &snapshot + } + } + if p.captured == nil { + p.captured = req // fall back to direct reference + } + p.mu.Unlock() + return req, nil, nil +} + +func (p *requestCapturer) PostLLMHook(_ *schemas.BifrostContext, resp *schemas.BifrostResponse, e *schemas.BifrostError) (*schemas.BifrostResponse, *schemas.BifrostError, error) { + return resp, e, nil +} + +// TestCachingDoesNotMutateRequestSentToProvider runs through the full plugin +// pipeline against the real OpenAI API and asserts that nothing the cache +// plugin does internally (text normalization, system-prompt filtering, +// metadata extraction, embedding generation) leaks into the request that +// reaches the provider. +// +// The test is gated on OPENAI_API_KEY because we need a real round-trip; the +// in-process mocker would short-circuit before the request body is finalized. +func TestCachingDoesNotMutateRequestSentToProvider(t *testing.T) { + if testing.Short() { + t.Skip("skipping real-LLM test in -short mode") + } + if os.Getenv("OPENAI_API_KEY") == "" { + t.Skip("OPENAI_API_KEY not set; needed for live LLM contract test") + } + t.Parallel() + + // Stand up the cache plugin against the shared Weaviate test namespace, + // same as the rest of the integration suite. + logger := bifrost.NewDefaultLogger(schemas.LogLevelError) + store, err := vectorstore.NewVectorStore(context.Background(), &vectorstore.Config{ + Type: vectorstore.VectorStoreTypeWeaviate, + Config: getWeaviateConfigFromEnv(), + Enabled: true, + }, logger) + if err != nil { + t.Skipf("Weaviate not available: %v", err) + } + cfg := &Config{ + Provider: schemas.OpenAI, + EmbeddingModel: "text-embedding-3-small", + Dimension: 1536, + Threshold: 0.8, + ConversationHistoryThreshold: DefaultConversationHistoryThreshold, + VectorStoreNamespace: SharedTestNamespace, + // Do NOT clean up on shutdown — other parallel tests share the namespace. + CleanUpOnShutdown: false, + } + if err := ensureSharedTestNamespace(context.Background(), store, cfg.Dimension); err != nil { + t.Fatalf("ensureSharedTestNamespace: %v", err) + } + cachePlugin, err := Init(schemas.NewBifrostContext(context.Background(), schemas.NoDeadline), cfg, logger, store) + if err != nil { + t.Fatalf("cache plugin Init: %v", err) + } + + capturer := &requestCapturer{} + + // Real OpenAI provider, no mocker — the request must travel end-to-end. + bctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) + client, err := bifrost.Init(bctx, schemas.BifrostConfig{ + Account: &BaseAccount{}, + // Order matters: cache runs first, capturer second so it sees the + // request as it flows out of the cache plugin. + LLMPlugins: []schemas.LLMPlugin{cachePlugin, capturer}, + Logger: logger, + }) + if err != nil { + t.Fatalf("bifrost.Init: %v", err) + } + defer client.Shutdown() + cachePlugin.(*Plugin).SetEmbeddingRequestExecutor(client.EmbeddingRequest) + + // Content carefully chosen to surface normalization if it ever leaks: + // - leading/trailing whitespace (would be stripped by strings.TrimSpace) + // - mixed case (would be lowercased) + // - a system prompt (would be stripped if ExcludeSystemPrompt leaked) + systemContent := " RESPOND with a SINGLE word. " + userContent := " Hello, World! PRESERVE_THIS_VERBATIM. " + + chatReq := &schemas.BifrostChatRequest{ + Provider: schemas.OpenAI, + Model: "gpt-4o-mini", + Input: []schemas.ChatMessage{ + { + Role: schemas.ChatMessageRoleSystem, + Content: &schemas.ChatMessageContent{ + ContentStr: bifrost.Ptr(systemContent), + }, + }, + { + Role: schemas.ChatMessageRoleUser, + Content: &schemas.ChatMessageContent{ + ContentStr: bifrost.Ptr(userContent), + }, + }, + }, + Params: &schemas.ChatParameters{ + Temperature: bifrost.Ptr(0.0), + MaxCompletionTokens: bifrost.Ptr(5), + }, + } + + ctx := newBaseTestContext() + ctx.SetValue(CacheKey, keyForTest(t, "")) + + // Take a JSON snapshot of the original input as the test sent it. + originalJSON, err := json.Marshal(chatReq) + if err != nil { + t.Fatalf("marshal original: %v", err) + } + + if _, llmErr := client.ChatCompletionRequest(ctx, chatReq); llmErr != nil { + // Even if OpenAI errors, the request was already captured by the + // time the provider call fired. Continue with the assertion. + t.Logf("upstream LLM error (expected to still proceed with assertion): %v", llmErr) + } + + capturer.mu.Lock() + captured := capturer.captured + capturer.mu.Unlock() + if captured == nil { + t.Fatal("capturer never recorded a request — pipeline order or plugin wiring is wrong") + } + + // 1) The chat input the provider saw must be byte-for-byte identical to + // what the caller passed in. + capturedJSON, err := json.Marshal(captured.ChatRequest) + if err != nil { + t.Fatalf("marshal captured: %v", err) + } + var origMap, capMap map[string]any + _ = json.Unmarshal(originalJSON, &origMap) + _ = json.Unmarshal(capturedJSON, &capMap) + if !reflect.DeepEqual(origMap["input"], capMap["input"]) { + t.Fatalf("chat input mutated by cache plugin\noriginal: %s\ncaptured: %s", originalJSON, capturedJSON) + } + + // 2) Belt-and-suspenders: explicit spot checks on the fields most likely + // to be mangled by normalization regressions, with clear failure messages. + if len(captured.ChatRequest.Input) != len(chatReq.Input) { + t.Fatalf("system prompt was filtered out: captured=%d messages, original=%d", len(captured.ChatRequest.Input), len(chatReq.Input)) + } + if got := *captured.ChatRequest.Input[0].Content.ContentStr; got != systemContent { + t.Fatalf("system content was modified: got %q, want %q", got, systemContent) + } + if got := *captured.ChatRequest.Input[1].Content.ContentStr; got != userContent { + t.Fatalf("user content was modified: got %q, want %q", got, userContent) + } + if captured.ChatRequest.Input[0].Role != schemas.ChatMessageRoleSystem { + t.Fatalf("system role was rewritten: got %q", captured.ChatRequest.Input[0].Role) + } +} diff --git a/plugins/semanticcache/plugin_no_store_test.go b/plugins/semanticcache/plugin_no_store_test.go index 7e9ab296c2..aef75171ff 100644 --- a/plugins/semanticcache/plugin_no_store_test.go +++ b/plugins/semanticcache/plugin_no_store_test.go @@ -8,17 +8,18 @@ import ( // TestCacheNoStoreBasicFunctionality tests that CacheNoStoreKey prevents caching func TestCacheNoStoreBasicFunctionality(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() testRequest := CreateBasicChatRequest("What is artificial intelligence?", 0.7, 100) // Test 1: Normal caching (control test) - ctx1 := CreateContextWithCacheKey("test-no-store-control") + ctx1 := CreateContextWithCacheKey(t, "test-no-store-control") t.Log("Making normal request (should be cached)...") response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) // Fresh request @@ -37,11 +38,11 @@ func TestCacheNoStoreBasicFunctionality(t *testing.T) { AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "direct") // Should be cached // Test 2: NoStore = true (should not cache) - ctx2 := CreateContextWithCacheKeyAndNoStore("test-no-store-disabled", true) + ctx2 := CreateContextWithCacheKeyAndNoStore(t, "test-no-store-disabled", true) t.Log("Making request with CacheNoStoreKey=true (should not be cached)...") response3, err3 := setup.Client.ChatCompletionRequest(ctx2, testRequest) if err3 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err3) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}) // Fresh request @@ -51,16 +52,16 @@ func TestCacheNoStoreBasicFunctionality(t *testing.T) { t.Log("Verifying no-store request was not cached...") response4, err4 := setup.Client.ChatCompletionRequest(ctx2, testRequest) if err4 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err4) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response4}) // Should still be fresh (not cached) // Test 3: NoStore = false (should cache normally) - ctx3 := CreateContextWithCacheKeyAndNoStore("test-no-store-enabled", false) + ctx3 := CreateContextWithCacheKeyAndNoStore(t, "test-no-store-enabled", false) t.Log("Making request with CacheNoStoreKey=false (should be cached)...") response5, err5 := setup.Client.ChatCompletionRequest(ctx3, testRequest) if err5 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err5) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response5}) // Fresh request @@ -79,6 +80,7 @@ func TestCacheNoStoreBasicFunctionality(t *testing.T) { // TestCacheNoStoreWithDifferentRequestTypes tests NoStore with various request types func TestCacheNoStoreWithDifferentRequestTypes(t *testing.T) { + t.Parallel() t.Skip("Skipping Embedding Tests") setup := NewTestSetup(t) @@ -86,12 +88,12 @@ func TestCacheNoStoreWithDifferentRequestTypes(t *testing.T) { // Test with chat completion chatRequest := CreateBasicChatRequest("Test no-store with chat", 0.7, 50) - ctx1 := CreateContextWithCacheKeyAndNoStore("test-no-store-chat", true) + ctx1 := CreateContextWithCacheKeyAndNoStore(t, "test-no-store-chat", true) t.Log("Testing no-store with chat completion...") response1, err1 := setup.Client.ChatCompletionRequest(ctx1, chatRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) @@ -100,18 +102,18 @@ func TestCacheNoStoreWithDifferentRequestTypes(t *testing.T) { // Verify not cached response2, err2 := setup.Client.ChatCompletionRequest(ctx1, chatRequest) if err2 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err2) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}) // Should not be cached // Test with embedding request embeddingRequest := CreateEmbeddingRequest([]string{"Test no-store with embeddings"}) - ctx2 := CreateContextWithCacheKeyAndNoStore("test-no-store-embedding", true) + ctx2 := CreateContextWithCacheKeyAndNoStore(t, "test-no-store-embedding", true) t.Log("Testing no-store with embedding request...") response3, err3 := setup.Client.EmbeddingRequest(ctx2, embeddingRequest) if err3 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err3) } AssertNoCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response3}) @@ -120,7 +122,7 @@ func TestCacheNoStoreWithDifferentRequestTypes(t *testing.T) { // Verify not cached response4, err4 := setup.Client.EmbeddingRequest(ctx2, embeddingRequest) if err4 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err4) } AssertNoCacheHit(t, &schemas.BifrostResponse{EmbeddingResponse: response4}) // Should not be cached @@ -129,6 +131,7 @@ func TestCacheNoStoreWithDifferentRequestTypes(t *testing.T) { // TestCacheNoStoreWithConversationHistory tests NoStore with conversation context func TestCacheNoStoreWithConversationHistory(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() @@ -141,12 +144,12 @@ func TestCacheNoStoreWithConversationHistory(t *testing.T) { request := CreateConversationRequest(messages, 0.7, 100) // Test with no-store enabled - ctx := CreateContextWithCacheKeyAndNoStore("test-no-store-conversation", true) + ctx := CreateContextWithCacheKeyAndNoStore(t, "test-no-store-conversation", true) t.Log("Testing no-store with conversation history...") response1, err1 := setup.Client.ChatCompletionRequest(ctx, request) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) @@ -155,7 +158,7 @@ func TestCacheNoStoreWithConversationHistory(t *testing.T) { // Verify not cached (same conversation should not hit cache) response2, err2 := setup.Client.ChatCompletionRequest(ctx, request) if err2 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err2) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}) // Should not be cached due to no-store @@ -164,20 +167,21 @@ func TestCacheNoStoreWithConversationHistory(t *testing.T) { // TestCacheNoStoreWithCacheTypes tests NoStore interaction with CacheTypeKey func TestCacheNoStoreWithCacheTypes(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() testRequest := CreateBasicChatRequest("Test no-store with cache types", 0.7, 50) // Test no-store with direct cache type - ctx1 := CreateContextWithCacheKey("test-no-store-cache-types") + ctx1 := CreateContextWithCacheKey(t, "test-no-store-cache-types") ctx1 = ctx1.WithValue(CacheNoStoreKey, true) ctx1 = ctx1.WithValue(CacheTypeKey, CacheTypeDirect) t.Log("Testing no-store with CacheTypeKey=direct...") response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) @@ -186,19 +190,19 @@ func TestCacheNoStoreWithCacheTypes(t *testing.T) { // Should not be cached response2, err2 := setup.Client.ChatCompletionRequest(ctx1, testRequest) if err2 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err2) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}) // No-store should override cache type // Test no-store with semantic cache type - ctx2 := CreateContextWithCacheKey("test-no-store-cache-types") + ctx2 := CreateContextWithCacheKey(t, "test-no-store-cache-types") ctx2 = ctx2.WithValue(CacheNoStoreKey, true) ctx2 = ctx2.WithValue(CacheTypeKey, CacheTypeSemantic) t.Log("Testing no-store with CacheTypeKey=semantic...") response3, err3 := setup.Client.ChatCompletionRequest(ctx2, testRequest) if err3 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err3) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}) @@ -207,7 +211,7 @@ func TestCacheNoStoreWithCacheTypes(t *testing.T) { // Should not be cached response4, err4 := setup.Client.ChatCompletionRequest(ctx2, testRequest) if err4 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err4) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response4}) // No-store should override cache type @@ -216,19 +220,20 @@ func TestCacheNoStoreWithCacheTypes(t *testing.T) { // TestCacheNoStoreErrorHandling tests error scenarios with NoStore func TestCacheNoStoreErrorHandling(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() testRequest := CreateBasicChatRequest("Test no-store error handling", 0.7, 50) // Test with invalid no-store value (non-boolean) - ctx1 := CreateContextWithCacheKey("test-no-store-errors") + ctx1 := CreateContextWithCacheKey(t, "test-no-store-errors") ctx1 = ctx1.WithValue(CacheNoStoreKey, "invalid") t.Log("Testing no-store with invalid value (should cache normally)...") response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) @@ -246,13 +251,13 @@ func TestCacheNoStoreErrorHandling(t *testing.T) { AssertCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}, "direct") // Should be cached (invalid value ignored) // Test with nil value (should cache normally) - ctx2 := CreateContextWithCacheKey("test-no-store-nil") + ctx2 := CreateContextWithCacheKey(t, "test-no-store-nil") ctx2 = ctx2.WithValue(CacheNoStoreKey, nil) t.Log("Testing no-store with nil value (should cache normally)...") response3, err3 := setup.Client.ChatCompletionRequest(ctx2, testRequest) if err3 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err3) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response3}) @@ -270,24 +275,25 @@ func TestCacheNoStoreErrorHandling(t *testing.T) { // TestCacheNoStoreReadButNoWrite tests that NoStore allows reading cache but prevents writing func TestCacheNoStoreReadButNoWrite(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() testRequest := CreateBasicChatRequest("Describe Isaac Newton's three laws of motion", 0.7, 50) // Step 1: Cache a response normally - ctx1 := CreateContextWithCacheKey("test-no-store-read") + ctx1 := CreateContextWithCacheKey(t, "test-no-store-read") t.Log("Caching response normally...") response1, err1 := setup.Client.ChatCompletionRequest(ctx1, testRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) WaitForCache(setup.Plugin) // Step 2: Try to read with no-store enabled (should still read from cache) - ctx2 := CreateContextWithCacheKeyAndNoStore("test-no-store-read", true) + ctx2 := CreateContextWithCacheKeyAndNoStore(t, "test-no-store-read", true) t.Log("Reading with no-store enabled (should still hit cache for reads)...") response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest) if err2 != nil { diff --git a/plugins/semanticcache/plugin_normalization_test.go b/plugins/semanticcache/plugin_normalization_test.go index a2bbe68aec..a2c90b1666 100644 --- a/plugins/semanticcache/plugin_normalization_test.go +++ b/plugins/semanticcache/plugin_normalization_test.go @@ -9,6 +9,7 @@ import ( // TestTextNormalizationDirectCache tests that text normalization works correctly // for direct cache (hash-based) matching across all input types func TestTextNormalizationDirectCache(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() @@ -22,7 +23,7 @@ func TestTextNormalizationDirectCache(t *testing.T) { } func testChatCompletionNormalization(t *testing.T, setup *TestSetup) { - ctx := CreateContextWithCacheKey("test-chat-normalization") + ctx := CreateContextWithCacheKey(t, "test-chat-normalization") // Test cases with different case and whitespace variations testCases := []struct { @@ -93,7 +94,10 @@ func testChatCompletionNormalization(t *testing.T, setup *TestSetup) { t.Logf("Making first request with user: '%s', system: '%s'", testCases[0].userMsg, testCases[0].systemMsg) response1, err1 := setup.Client.ChatCompletionRequest(ctx, requests[0]) if err1 != nil { - return // Test will be skipped by retry function + if isTransientUpstreamError(err1) { + t.Skipf("transient upstream error, skipping test: %v", err1) + } + t.Fatalf("upstream request failed: %v", err1) } if response1 == nil || len(response1.Choices) == 0 { @@ -124,7 +128,7 @@ func testChatCompletionNormalization(t *testing.T, setup *TestSetup) { } func testSpeechNormalization(t *testing.T, setup *TestSetup) { - ctx := CreateContextWithCacheKey("test-speech-normalization") + ctx := CreateContextWithCacheKey(t, "test-speech-normalization") // Test cases with different case and whitespace variations for speech input testCases := []struct { @@ -151,7 +155,10 @@ func testSpeechNormalization(t *testing.T, setup *TestSetup) { t.Logf("Making first speech request with: '%s'", testCases[0].input) response1, err1 := setup.Client.SpeechRequest(ctx, requests[0]) if err1 != nil { - return // Test will be skipped by retry function + if isTransientUpstreamError(err1) { + t.Skipf("transient upstream error, skipping test: %v", err1) + } + t.Fatalf("upstream request failed: %v", err1) } if response1 == nil { @@ -183,10 +190,11 @@ func testSpeechNormalization(t *testing.T, setup *TestSetup) { // TestChatCompletionContentBlocksNormalization tests normalization for content blocks func TestChatCompletionContentBlocksNormalization(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("test-content-blocks-normalization") + ctx := CreateContextWithCacheKey(t, "test-content-blocks-normalization") // Test cases with content blocks having different text normalization testCases := []struct { @@ -245,7 +253,10 @@ func TestChatCompletionContentBlocksNormalization(t *testing.T) { t.Logf("Making first request with content blocks: %v", testCases[0].textBlocks) response1, err1 := setup.Client.ChatCompletionRequest(ctx, requests[0]) if err1 != nil { - return // Test will be skipped by retry function + if isTransientUpstreamError(err1) { + t.Skipf("transient upstream error, skipping test: %v", err1) + } + t.Fatalf("upstream request failed: %v", err1) } if response1 == nil || len(response1.Choices) == 0 { @@ -277,17 +288,21 @@ func TestChatCompletionContentBlocksNormalization(t *testing.T) { // TestNormalizationWithSemanticCache tests that normalization works with semantic cache as well func TestNormalizationWithSemanticCache(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("test-normalization-semantic") + ctx := CreateContextWithCacheKey(t, "test-normalization-semantic") // Make first request with original text originalRequest := CreateBasicChatRequest("What is Machine Learning?", 0.5, 50) t.Log("Making first request with original text...") response1, err1 := setup.Client.ChatCompletionRequest(ctx, originalRequest) if err1 != nil { - return // Test will be skipped by retry function + if isTransientUpstreamError(err1) { + t.Skipf("transient upstream error, skipping test: %v", err1) + } + t.Fatalf("upstream request failed: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response1}) diff --git a/plugins/semanticcache/plugin_paths_test.go b/plugins/semanticcache/plugin_paths_test.go new file mode 100644 index 0000000000..5ca1ac8c7a --- /dev/null +++ b/plugins/semanticcache/plugin_paths_test.go @@ -0,0 +1,572 @@ +package semanticcache + +import ( + "context" + "encoding/json" + "reflect" + "strings" + "sync" + "sync/atomic" + "testing" + "time" + + bifrost "github.com/maximhq/bifrost/core" + "github.com/maximhq/bifrost/core/schemas" + "github.com/maximhq/bifrost/framework/vectorstore" +) + +// ----------------------------------------------------------------------------- +// PostLLMHook error path +// ----------------------------------------------------------------------------- + +func TestPostLLMHook_SkipsOnBifrostError(t *testing.T) { + store := newObservableStore() + plugin := newTestPlugin(t, store, false) + + ctx := newBaseTestContext() + ctx.SetValue(CacheKey, keyForTest(t, "")) + + // Drive a normal PreLLMHook so cacheState exists. + req := &schemas.BifrostRequest{ + RequestType: schemas.ChatCompletionRequest, + ChatRequest: CreateBasicChatRequest("hello", 0.7, 50), + } + if _, _, err := plugin.PreLLMHook(ctx, req); err != nil { + t.Fatalf("PreLLMHook failed: %v", err) + } + + // Pass a non-nil bifrost error to PostLLMHook. + bifrostErr := &schemas.BifrostError{ + Error: &schemas.ErrorField{Message: "upstream blew up"}, + } + res := &schemas.BifrostResponse{ + ChatResponse: &schemas.BifrostChatResponse{ + ExtraFields: schemas.BifrostResponseExtraFields{RequestType: schemas.ChatCompletionRequest}, + }, + } + if _, _, err := plugin.PostLLMHook(ctx, res, bifrostErr); err != nil { + t.Fatalf("PostLLMHook failed: %v", err) + } + plugin.WaitForPendingOperations() + + store.mu.Lock() + defer store.mu.Unlock() + if len(store.addIDs) != 0 { + t.Fatalf("expected zero cache writes on error response, got %d", len(store.addIDs)) + } +} + +// ----------------------------------------------------------------------------- +// shouldSkipCaching paths +// ----------------------------------------------------------------------------- + +func TestShouldSkipCaching_LargePayloadMode(t *testing.T) { + plugin := newTestPlugin(t, newObservableStore(), false) + + ctx := newBaseTestContext() + ctx.SetValue(schemas.BifrostContextKeyLargePayloadMode, true) + res := &schemas.BifrostResponse{ChatResponse: &schemas.BifrostChatResponse{}} + + if !plugin.shouldSkipCaching(ctx, res) { + t.Fatal("expected LargePayloadMode to skip caching") + } +} + +func TestShouldSkipCaching_LargeResponseMode(t *testing.T) { + plugin := newTestPlugin(t, newObservableStore(), false) + + ctx := newBaseTestContext() + ctx.SetValue(schemas.BifrostContextKeyLargeResponseMode, true) + res := &schemas.BifrostResponse{ChatResponse: &schemas.BifrostChatResponse{}} + + if !plugin.shouldSkipCaching(ctx, res) { + t.Fatal("expected LargeResponseMode to skip caching") + } +} + +func TestShouldSkipCaching_CacheHitReplay(t *testing.T) { + plugin := newTestPlugin(t, newObservableStore(), false) + + ctx := newBaseTestContext() + res := &schemas.BifrostResponse{ + ChatResponse: &schemas.BifrostChatResponse{ + ExtraFields: schemas.BifrostResponseExtraFields{ + CacheDebug: &schemas.BifrostCacheDebug{CacheHit: true}, + }, + }, + } + + if !plugin.shouldSkipCaching(ctx, res) { + t.Fatal("expected cache-hit replay to skip re-caching") + } +} + +func TestShouldSkipCaching_NoStoreFlag(t *testing.T) { + plugin := newTestPlugin(t, newObservableStore(), false) + + ctx := newBaseTestContext() + ctx.SetValue(CacheNoStoreKey, true) + res := &schemas.BifrostResponse{ChatResponse: &schemas.BifrostChatResponse{}} + + if !plugin.shouldSkipCaching(ctx, res) { + t.Fatal("expected CacheNoStoreKey=true to skip caching") + } +} + +// ----------------------------------------------------------------------------- +// Init validation +// ----------------------------------------------------------------------------- + +func TestInit_RejectsNilConfig(t *testing.T) { + if _, err := Init(context.Background(), nil, bifrost.NewDefaultLogger(schemas.LogLevelError), newObservableStore()); err == nil { + t.Fatal("expected error for nil config") + } +} + +func TestInit_RejectsNilStore(t *testing.T) { + cfg := &Config{Provider: schemas.OpenAI, EmbeddingModel: "text-embedding-3-small", Dimension: 1536} + if _, err := Init(context.Background(), cfg, bifrost.NewDefaultLogger(schemas.LogLevelError), nil); err == nil { + t.Fatal("expected error for nil store") + } +} + +func TestInit_RejectsNegativeDimension(t *testing.T) { + cfg := &Config{Dimension: -1} + if _, err := Init(context.Background(), cfg, bifrost.NewDefaultLogger(schemas.LogLevelError), newObservableStore()); err == nil || !strings.Contains(err.Error(), "dimension") { + t.Fatalf("expected dimension error, got %v", err) + } +} + +func TestInit_RejectsZeroDimensionWithProvider(t *testing.T) { + cfg := &Config{Provider: schemas.OpenAI, EmbeddingModel: "text-embedding-3-small", Dimension: 0} + if _, err := Init(context.Background(), cfg, bifrost.NewDefaultLogger(schemas.LogLevelError), newObservableStore()); err == nil || !strings.Contains(err.Error(), "dimension") { + t.Fatalf("expected dimension error when provider set with zero dimension, got %v", err) + } +} + +func TestInit_AllowsDirectOnlyMode(t *testing.T) { + // Provider="" + Dimension=1 is the documented direct-only mode. + cfg := &Config{Dimension: 1} + plugin, err := Init(context.Background(), cfg, bifrost.NewDefaultLogger(schemas.LogLevelError), newObservableStore()) + if err != nil { + t.Fatalf("expected direct-only mode to init successfully, got %v", err) + } + if plugin == nil { + t.Fatal("expected non-nil plugin in direct-only mode") + } + _ = plugin.Cleanup() +} + +// ----------------------------------------------------------------------------- +// PreLLMHook fallback when embedding executor missing +// ----------------------------------------------------------------------------- + +func TestPreLLMHook_FallsBackToDirectWhenExecutorMissing(t *testing.T) { + plugin := newTestPlugin(t, newObservableStore(), false) + // Intentionally do NOT set plugin.embeddingRequestExecutor. + + req := &schemas.BifrostRequest{ + RequestType: schemas.ChatCompletionRequest, + ChatRequest: CreateBasicChatRequest("hello", 0.7, 50), + } + ctx := CreateContextWithCacheKey(t, "") + + // PreLLMHook should not error, should not panic, and direct search should + // still populate state.DirectCacheID. + _, sc, err := plugin.PreLLMHook(ctx, req) + if err != nil { + t.Fatalf("PreLLMHook failed: %v", err) + } + if sc != nil { + t.Fatalf("expected miss (empty store), got short-circuit %+v", sc) + } + + requestID, _ := ctx.Value(schemas.BifrostContextKeyRequestID).(string) + state := plugin.getCacheState(requestID) + if state == nil || state.DirectCacheID == "" { + t.Fatal("expected DirectCacheID populated even without embedding executor") + } + if state.Embeddings != nil { + t.Fatalf("expected no embedding generated when executor missing, got %v", state.Embeddings) + } +} + +// ----------------------------------------------------------------------------- +// Expired-entry full lifecycle +// ----------------------------------------------------------------------------- + +func TestExpiredEntry_DetectedAndDeleted(t *testing.T) { + store := newObservableStore() + plugin := newTestPlugin(t, store, false) + + // Plant an already-expired entry under a deterministic ID. + expiredID := "expired-id-1" + chunkJSON, _ := json.Marshal(&schemas.BifrostResponse{ + ChatResponse: &schemas.BifrostChatResponse{}, + }) + store.chunks[expiredID] = vectorstore.SearchResult{ + ID: expiredID, + Properties: map[string]interface{}{ + "response": string(chunkJSON), + "expires_at": time.Now().Add(-1 * time.Minute).Unix(), + }, + } + + req := &schemas.BifrostRequest{ + RequestType: schemas.ChatCompletionRequest, + ChatRequest: CreateBasicChatRequest("hi", 0.7, 50), + } + ctx := newBaseTestContext() + state := &cacheState{} + + sc, err := plugin.buildResponseFromResult( + ctx, state, req, + store.chunks[expiredID], + CacheTypeDirect, nil, nil, + ) + if err != nil { + t.Fatalf("buildResponseFromResult failed: %v", err) + } + if sc != nil { + t.Fatal("expected expired entry to surface as a miss (nil short-circuit)") + } + + // The async delete is tracked on writersWg, so this drain must observe it. + plugin.WaitForPendingOperations() + + store.mu.Lock() + defer store.mu.Unlock() + found := false + for _, id := range store.deleteIDs { + if id == expiredID { + found = true + break + } + } + if !found { + t.Fatalf("expected expired entry %q to be deleted, got delete log %v", expiredID, store.deleteIDs) + } +} + +// ----------------------------------------------------------------------------- +// WebSocketResponsesRequest support +// ----------------------------------------------------------------------------- + +func TestIsSemanticCacheSupportedRequestType_WebSocket(t *testing.T) { + if !isSemanticCacheSupportedRequestType(schemas.WebSocketResponsesRequest) { + t.Fatal("WebSocketResponsesRequest should be supported") + } +} + +// ----------------------------------------------------------------------------- +// UnmarshalJSON rejection paths +// ----------------------------------------------------------------------------- + +func TestUnmarshalJSON_RejectsUnsupportedTTLType(t *testing.T) { + var c Config + if err := c.UnmarshalJSON([]byte(`{"provider":"openai","ttl":true}`)); err == nil { + t.Fatal("expected error for boolean TTL") + } +} + +func TestUnmarshalJSON_RejectsNegativeTTL(t *testing.T) { + var c Config + if err := c.UnmarshalJSON([]byte(`{"provider":"openai","ttl":-5}`)); err == nil || !strings.Contains(err.Error(), "non-negative") { + t.Fatalf("expected non-negative TTL error, got %v", err) + } +} + +func TestUnmarshalJSON_RejectsMalformedJSON(t *testing.T) { + var c Config + if err := c.UnmarshalJSON([]byte(`{not valid json`)); err == nil { + t.Fatal("expected error for malformed JSON") + } +} + +func TestUnmarshalJSON_RejectsBadDurationString(t *testing.T) { + var c Config + if err := c.UnmarshalJSON([]byte(`{"provider":"openai","ttl":"forever"}`)); err == nil { + t.Fatal("expected error for unparseable duration string") + } +} + +// ----------------------------------------------------------------------------- +// Stream replay cancellation variants +// ----------------------------------------------------------------------------- + +func TestStreamReplay_CancelImmediately(t *testing.T) { + plugin := newTestPlugin(t, newObservableStore(), false) + chunk := `{"chat_response":{"choices":[]}}` + streamArray := []string{chunk, chunk, chunk} + + req := &schemas.BifrostRequest{ + RequestType: schemas.ChatCompletionStreamRequest, + ChatRequest: CreateBasicChatRequest("hi", 0.7, 50), + } + ctx := newBaseTestContext() + state := &cacheState{} + + sc, err := plugin.buildStreamingResponseFromResult( + ctx, state, req, + vectorstore.SearchResult{ID: "stream-1"}, + streamArray, CacheTypeSemantic, nil, nil, nil, + ) + if err != nil { + t.Fatalf("buildStreamingResponseFromResult failed: %v", err) + } + ctx.Cancel() // cancel before reading any chunks + + // Channel must close within a short window. + timeout := time.After(2 * time.Second) + for { + select { + case _, ok := <-sc.Stream: + if !ok { + return // channel closed cleanly + } + case <-timeout: + t.Fatal("replay goroutine did not exit after immediate cancel") + } + } +} + +func TestStreamReplay_FullDrain(t *testing.T) { + plugin := newTestPlugin(t, newObservableStore(), false) + chunk := `{"chat_response":{"choices":[]}}` + streamArray := []string{chunk, chunk, chunk} + + req := &schemas.BifrostRequest{ + RequestType: schemas.ChatCompletionStreamRequest, + ChatRequest: CreateBasicChatRequest("hi", 0.7, 50), + } + ctx := newBaseTestContext() + state := &cacheState{} + + sc, err := plugin.buildStreamingResponseFromResult( + ctx, state, req, + vectorstore.SearchResult{ID: "stream-2"}, + streamArray, CacheTypeSemantic, nil, nil, nil, + ) + if err != nil { + t.Fatalf("buildStreamingResponseFromResult failed: %v", err) + } + + count := 0 + for chunk := range sc.Stream { + if chunk == nil { + t.Fatal("received nil chunk") + } + count++ + } + if count != len(streamArray) { + t.Fatalf("expected %d chunks, got %d", len(streamArray), count) + } +} + +// ----------------------------------------------------------------------------- +// Plugin-log emission on failure paths (ctx.Log) +// ----------------------------------------------------------------------------- + +// scopedTestContext returns a plugin-scoped BifrostContext so ctx.Log entries +// land on the per-request log store and can be inspected via GetPluginLogs. +// In production the framework wraps every plugin hook this way. +func scopedTestContext(t testing.TB, suffix string) *schemas.BifrostContext { + t.Helper() + root := CreateContextWithCacheKey(t, suffix) + name := PluginName + return root.WithPluginScope(&name) +} + +func TestPreLLMHook_EmitsPluginLogOnEmbeddingFailure(t *testing.T) { + store := newObservableStore() + plugin := newTestPlugin(t, store, false) + plugin.SetEmbeddingRequestExecutor(func(_ *schemas.BifrostContext, _ *schemas.BifrostEmbeddingRequest) (*schemas.BifrostEmbeddingResponse, *schemas.BifrostError) { + return nil, &schemas.BifrostError{Error: &schemas.ErrorField{Message: "rate limit exceeded"}} + }) + + req := &schemas.BifrostRequest{ + RequestType: schemas.ChatCompletionRequest, + ChatRequest: CreateBasicChatRequest("test prompt", 0.7, 50), + } + ctx := scopedTestContext(t, "") + + if _, _, err := plugin.PreLLMHook(ctx, req); err != nil { + t.Fatalf("PreLLMHook failed: %v", err) + } + + logs := ctx.GetPluginLogs() + if len(logs) == 0 { + t.Fatal("expected at least one plugin log entry on embedding failure, got none") + } + var found bool + for _, l := range logs { + if l.PluginName != PluginName { + continue + } + if strings.Contains(l.Message, "semantic search skipped") && strings.Contains(l.Message, "rate limit") { + if l.Level != schemas.LogLevelWarn { + t.Errorf("expected Warn level for embedding failure, got %s", l.Level) + } + found = true + } + } + if !found { + t.Fatalf("expected a Warn plugin log mentioning semantic search skipped + the upstream error, got %+v", logs) + } +} + +// pluginLogContains is a small assertion helper: returns true if any log +// entry from PluginName matches the substring at the given level (or any +// level if level is ""). +func pluginLogContains(logs []schemas.PluginLogEntry, level schemas.LogLevel, substr string) bool { + for _, l := range logs { + if l.PluginName != PluginName { + continue + } + if level != "" && l.Level != level { + continue + } + if strings.Contains(l.Message, substr) { + return true + } + } + return false +} + +func TestPreLLMHook_NoDebugLogsOnFlow(t *testing.T) { + // We deliberately do not emit Debug-level plugin logs for normal cache + // flow (hit/miss). cache_debug already conveys that. Only Warn-level + // failure logs should appear on the response. + store := newObservableStore() + plugin := newTestPlugin(t, store, false) + + req := &schemas.BifrostRequest{ + RequestType: schemas.ChatCompletionRequest, + ChatRequest: CreateBasicChatRequest("first request", 0.7, 50), + } + ctx := scopedTestContext(t, "") + if _, _, err := plugin.PreLLMHook(ctx, req); err != nil { + t.Fatalf("PreLLMHook failed: %v", err) + } + + logs := ctx.GetPluginLogs() + for _, l := range logs { + if l.PluginName != PluginName { + continue + } + if l.Level == schemas.LogLevelDebug { + t.Fatalf("expected no Debug plugin logs on normal flow, got %+v", l) + } + } +} + +func TestResolveCacheTypes_EmitsPluginLogOnInvalidValue(t *testing.T) { + plugin := newTestPlugin(t, newObservableStore(), false) + ctx := scopedTestContext(t, "") + ctx.SetValue(CacheTypeKey, "not-a-cache-type") // wrong type + + plugin.resolveCacheTypes(ctx) + + logs := ctx.GetPluginLogs() + var found bool + for _, l := range logs { + if l.PluginName == PluginName && strings.Contains(l.Message, "CacheTypeKey is not a CacheType") { + found = true + } + } + if !found { + t.Fatalf("expected plugin log warning about invalid CacheTypeKey, got %+v", logs) + } +} + +// ----------------------------------------------------------------------------- +// generateEmbedding handles all EmbeddingStruct representations +// ----------------------------------------------------------------------------- + +func TestGenerateEmbedding_AcceptsInt8Array(t *testing.T) { + plugin := newTestPlugin(t, newObservableStore(), false) + plugin.SetEmbeddingRequestExecutor(func(_ *schemas.BifrostContext, _ *schemas.BifrostEmbeddingRequest) (*schemas.BifrostEmbeddingResponse, *schemas.BifrostError) { + return &schemas.BifrostEmbeddingResponse{ + Data: []schemas.EmbeddingData{{ + Embedding: schemas.EmbeddingStruct{ + EmbeddingInt8Array: []int8{-128, -1, 0, 1, 127}, + }, + }}, + }, nil + }) + + ctx := scopedTestContext(t, "") + emb, _, err := plugin.generateEmbedding(ctx, "anything") + if err != nil { + t.Fatalf("generateEmbedding failed for int8 input: %v", err) + } + want := []float32{-128, -1, 0, 1, 127} + if !reflect.DeepEqual(emb, want) { + t.Fatalf("int8 → float32 conversion: want %v, got %v", want, emb) + } +} + +func TestGenerateEmbedding_AcceptsInt32Array(t *testing.T) { + plugin := newTestPlugin(t, newObservableStore(), false) + plugin.SetEmbeddingRequestExecutor(func(_ *schemas.BifrostContext, _ *schemas.BifrostEmbeddingRequest) (*schemas.BifrostEmbeddingResponse, *schemas.BifrostError) { + return &schemas.BifrostEmbeddingResponse{ + Data: []schemas.EmbeddingData{{ + Embedding: schemas.EmbeddingStruct{ + EmbeddingInt32Array: []int32{0, 100000, -100000}, + }, + }}, + }, nil + }) + + ctx := scopedTestContext(t, "") + emb, _, err := plugin.generateEmbedding(ctx, "anything") + if err != nil { + t.Fatalf("generateEmbedding failed for int32 input: %v", err) + } + want := []float32{0, 100000, -100000} + if !reflect.DeepEqual(emb, want) { + t.Fatalf("int32 → float32 conversion: want %v, got %v", want, emb) + } +} + +// ----------------------------------------------------------------------------- +// Concurrent PreLLMHook on same requestID — last writer wins, no panic +// ----------------------------------------------------------------------------- + +func TestPreLLMHook_ConcurrentSameRequestID(t *testing.T) { + plugin := newTestPlugin(t, newObservableStore(), false) + + req := &schemas.BifrostRequest{ + RequestType: schemas.ChatCompletionRequest, + ChatRequest: CreateBasicChatRequest("hi", 0.7, 50), + } + + requestID := "shared-request-id" + const N = 8 + var wg sync.WaitGroup + var panics atomic.Int32 + wg.Add(N) + for i := 0; i < N; i++ { + go func() { + defer wg.Done() + defer func() { + if r := recover(); r != nil { + panics.Add(1) + } + }() + ctx := newBaseTestContext() + ctx.SetValue(schemas.BifrostContextKeyRequestID, requestID) + ctx.SetValue(CacheKey, keyForTest(t, "")) + _, _, _ = plugin.PreLLMHook(ctx, req) + }() + } + wg.Wait() + + if panics.Load() != 0 { + t.Fatalf("expected zero panics under concurrent PreLLMHook, got %d", panics.Load()) + } + // State for the shared requestID should exist (one of them won). + if state := plugin.getCacheState(requestID); state == nil { + t.Fatal("expected cache state to exist after concurrent PreLLMHook") + } +} diff --git a/plugins/semanticcache/plugin_responses_test.go b/plugins/semanticcache/plugin_responses_test.go index f7af0580cc..2474ea88c1 100644 --- a/plugins/semanticcache/plugin_responses_test.go +++ b/plugins/semanticcache/plugin_responses_test.go @@ -9,10 +9,11 @@ import ( // TestResponsesAPIBasicFunctionality tests the core caching functionality with Responses API func TestResponsesAPIBasicFunctionality(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("test-responses-basic") + ctx := CreateContextWithCacheKey(t, "test-responses-basic") // Create test request testRequest := CreateBasicResponsesRequest( @@ -29,7 +30,7 @@ func TestResponsesAPIBasicFunctionality(t *testing.T) { duration1 := time.Since(start1) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } if response1 == nil || len(response1.Output) == 0 { @@ -94,10 +95,11 @@ func TestResponsesAPIBasicFunctionality(t *testing.T) { // TestResponsesAPIDifferentParameters tests that different parameters produce different cache entries func TestResponsesAPIDifferentParameters(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("test-responses-params") + ctx := CreateContextWithCacheKey(t, "test-responses-params") basePrompt := "Explain quantum computing" tests := []struct { @@ -140,7 +142,7 @@ func TestResponsesAPIDifferentParameters(t *testing.T) { // Make first request _, err1 := setup.Client.ResponsesRequest(ctx, tt.request1) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } WaitForCache(setup.Plugin) @@ -168,17 +170,18 @@ func TestResponsesAPIDifferentParameters(t *testing.T) { // TestResponsesAPISemanticMatching tests semantic similarity matching with Responses API func TestResponsesAPISemanticMatching(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - ctx := CreateContextWithCacheKeyAndType("test-responses-semantic", CacheTypeSemantic) + ctx := CreateContextWithCacheKeyAndType(t, "test-responses-semantic", CacheTypeSemantic) // First request originalRequest := CreateBasicResponsesRequest("What is machine learning?", 0.5, 500) t.Log("Making first Responses request with original text...") response1, err1 := setup.Client.ResponsesRequest(ctx, originalRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response1}) @@ -203,10 +206,11 @@ func TestResponsesAPISemanticMatching(t *testing.T) { // TestResponsesAPIWithInstructions tests caching with system instructions func TestResponsesAPIWithInstructions(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("test-responses-instructions") + ctx := CreateContextWithCacheKey(t, "test-responses-instructions") // Create request with instructions request1 := CreateResponsesRequestWithInstructions( @@ -219,7 +223,7 @@ func TestResponsesAPIWithInstructions(t *testing.T) { t.Log("Making first Responses request with instructions...") response1, err1 := setup.Client.ResponsesRequest(ctx, request1) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response1}) @@ -250,19 +254,20 @@ func TestResponsesAPIWithInstructions(t *testing.T) { // TestResponsesAPICacheExpiration tests TTL functionality for Responses API requests func TestResponsesAPICacheExpiration(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() // Set very short TTL for testing shortTTL := 5 * time.Second - ctx := CreateContextWithCacheKeyAndTTL("test-responses-ttl", shortTTL) + ctx := CreateContextWithCacheKeyAndTTL(t, "test-responses-ttl", shortTTL) responsesRequest := CreateBasicResponsesRequest("TTL test for Responses API", 0.5, 500) t.Log("Making first Responses request with short TTL...") response1, err1 := setup.Client.ResponsesRequest(ctx, responsesRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response1}) @@ -285,7 +290,7 @@ func TestResponsesAPICacheExpiration(t *testing.T) { t.Log("Making third Responses request after TTL expiration...") response3, err3 := setup.Client.ResponsesRequest(ctx, responsesRequest) if err3 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err3) } // Should not be a cache hit since TTL expired AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response3}) @@ -295,39 +300,52 @@ func TestResponsesAPICacheExpiration(t *testing.T) { // TestResponsesAPIWithoutCacheKey tests that Responses requests without cache key are not cached func TestResponsesAPIWithoutCacheKey(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - // Don't set cache key in context - ctx := CreateContextWithCacheKey("") + // Don't set cache key in context. CreateContextWithCacheKey(t, "") would + // still populate CacheKey from t.Name(); using a base context keeps it + // unset so we exercise the cache-disabled path. + ctx := newBaseTestContext() responsesRequest := CreateBasicResponsesRequest("Test Responses without cache key", 0.5, 500) - t.Log("Making Responses request without cache key...") - - response, err := setup.Client.ResponsesRequest(ctx, responsesRequest) + t.Log("Making first Responses request without cache key...") + response1, err := setup.Client.ResponsesRequest(ctx, responsesRequest) if err != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err) } + AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response1}) - // Should not be cached - AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response}) + WaitForCache(setup.Plugin) + + // A second identical request must also miss — proves the first one + // was not silently cached against some default key. + t.Log("Making second identical request — must also miss because nothing was cached...") + ctx2 := newBaseTestContext() + response2, err := setup.Client.ResponsesRequest(ctx2, responsesRequest) + if err != nil { + t.Skipf("upstream request error, skipping test: %v", err) + } + AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2}) t.Log("✅ Responses requests without cache key are properly not cached") } // TestResponsesAPINoStoreFlag tests that Responses requests with no-store flag are not cached func TestResponsesAPINoStoreFlag(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() responsesRequest := CreateBasicResponsesRequest("Test no-store with Responses API", 0.7, 500) - ctx := CreateContextWithCacheKeyAndNoStore("test-no-store-responses", true) + ctx := CreateContextWithCacheKeyAndNoStore(t, "test-no-store-responses", true) t.Log("Testing no-store with Responses API...") response1, err1 := setup.Client.ResponsesRequest(ctx, responsesRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response1}) @@ -336,79 +354,86 @@ func TestResponsesAPINoStoreFlag(t *testing.T) { // Verify not cached response2, err2 := setup.Client.ResponsesRequest(ctx, responsesRequest) if err2 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err2) } AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response2}) // Should not be cached t.Log("✅ Responses API no-store flag working correctly") } -// TestResponsesAPIStreaming tests streaming Responses API requests +// TestResponsesAPIStreaming tests streaming Responses API caching by warming +// the cache with a streaming request and replaying it with a second identical +// streaming request that must be served from cache. func TestResponsesAPIStreaming(t *testing.T) { - t.Log("Responses streaming not supported yet") - + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("test-responses-streaming") + ctx := CreateContextWithCacheKey(t, "test-responses-streaming") prompt := "Explain the basics of quantum computing in simple terms" - // Make non-streaming request first - t.Log("Making non-streaming Responses request...") - nonStreamRequest := CreateBasicResponsesRequest(prompt, 0.5, 500) - _, err1 := setup.Client.ResponsesRequest(ctx, nonStreamRequest) + // Warm the cache with a streaming request — the plugin accumulates the + // chunks and stores them on the final chunk. + t.Log("Warming cache with first streaming Responses request...") + streamRequest := CreateStreamingResponsesRequest(prompt, 0.5, 500) + stream1, err1 := setup.Client.ResponsesStreamRequest(ctx, streamRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) + } + chunkCount1 := 0 + for streamMsg := range stream1 { + if streamMsg.BifrostError != nil { + t.Fatalf("Error in first stream: %v", streamMsg.BifrostError) + } + if streamMsg.BifrostResponsesStreamResponse != nil { + chunkCount1++ + } + } + if chunkCount1 == 0 { + t.Fatal("first streaming request produced no chunks") } WaitForCache(setup.Plugin) - // Make streaming request with same prompt and parameters - t.Log("Making streaming Responses request with same prompt...") - streamRequest := CreateStreamingResponsesRequest(prompt, 0.5, 500) - stream, err2 := setup.Client.ResponsesStreamRequest(ctx, streamRequest) + // Second identical streaming request — must be served from cache. We + // require AT LEAST ONE chunk with CacheHit=true (the final chunk gets + // the cache_debug stamp during replay). + t.Log("Replaying — second identical streaming request must serve from cache...") + ctx2 := CreateContextWithCacheKey(t, "test-responses-streaming") + stream2, err2 := setup.Client.ResponsesStreamRequest(ctx2, streamRequest) if err2 != nil { - t.Fatalf("Streaming Responses request failed: %v", err2) + t.Fatalf("Second streaming Responses request failed: %v", err2) } - var streamResponses []schemas.BifrostResponsesStreamResponse - for streamMsg := range stream { + cacheHitFound := false + chunkCount2 := 0 + for streamMsg := range stream2 { if streamMsg.BifrostError != nil { - t.Fatalf("Error in Responses stream: %v", streamMsg.BifrostError) + t.Fatalf("Error in second stream: %v", streamMsg.BifrostError) } if streamMsg.BifrostResponsesStreamResponse != nil { - streamResponses = append(streamResponses, *streamMsg.BifrostResponsesStreamResponse) + chunkCount2++ + if cd := streamMsg.BifrostResponsesStreamResponse.ExtraFields.CacheDebug; cd != nil && cd.CacheHit { + cacheHitFound = true + } } } - - if len(streamResponses) == 0 { - t.Fatal("No streaming responses received") - } - - // Check if any of the streaming responses was served from cache - cacheHitFound := false - for _, resp := range streamResponses { - if resp.ExtraFields.CacheDebug != nil && resp.ExtraFields.CacheDebug.CacheHit { - cacheHitFound = true - break - } + if chunkCount2 == 0 { + t.Fatal("replay produced no chunks") } - if !cacheHitFound { - t.Log("⚠️ No cache hit detected in streaming responses - this could be expected behavior") - } else { - t.Log("✓ Cache hit detected in streaming Responses API") + t.Fatal("expected at least one chunk with CacheDebug.CacheHit=true on streaming replay") } - - t.Log("✅ Streaming Responses API test completed") + t.Log("✅ Streaming Responses API replay served from cache") } // TestResponsesAPIComplexParameters tests complex parameter handling func TestResponsesAPIComplexParameters(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("test-responses-complex-params") + ctx := CreateContextWithCacheKey(t, "test-responses-complex-params") // Create request with various complex parameters request := CreateBasicResponsesRequest("Test complex parameters", 0.8, 500) @@ -421,7 +446,7 @@ func TestResponsesAPIComplexParameters(t *testing.T) { t.Log("Making first Responses request with complex parameters...") response1, err1 := setup.Client.ResponsesRequest(ctx, request) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } AssertNoCacheHit(t, &schemas.BifrostResponse{ResponsesResponse: response1}) diff --git a/plugins/semanticcache/plugin_streaming_test.go b/plugins/semanticcache/plugin_streaming_test.go index f029564055..7a85717c7f 100644 --- a/plugins/semanticcache/plugin_streaming_test.go +++ b/plugins/semanticcache/plugin_streaming_test.go @@ -9,10 +9,11 @@ import ( // TestStreamingCacheBasicFunctionality tests streaming response caching func TestStreamingCacheBasicFunctionality(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("test-stream-value") + ctx := CreateContextWithCacheKey(t, "test-stream-value") // Create a test streaming request testRequest := CreateStreamingChatRequest( @@ -27,7 +28,7 @@ func TestStreamingCacheBasicFunctionality(t *testing.T) { start1 := time.Now() stream1, err1 := setup.Client.ChatCompletionStreamRequest(ctx, testRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } var responses1 []schemas.BifrostChatResponse @@ -115,10 +116,11 @@ func TestStreamingCacheBasicFunctionality(t *testing.T) { // TestStreamingVsNonStreaming tests that streaming and non-streaming requests are cached separately func TestStreamingVsNonStreaming(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("stream-vs-non-test") + ctx := CreateContextWithCacheKey(t, "stream-vs-non-test") prompt := "What is the meaning of life?" @@ -127,7 +129,7 @@ func TestStreamingVsNonStreaming(t *testing.T) { nonStreamRequest := CreateBasicChatRequest(prompt, 0.5, 50) nonStreamResponse, err1 := setup.Client.ChatCompletionRequest(ctx, nonStreamRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } WaitForCache(setup.Plugin) @@ -184,10 +186,11 @@ func TestStreamingVsNonStreaming(t *testing.T) { // TestStreamingChunkOrdering tests that cached streaming responses maintain proper chunk ordering func TestStreamingChunkOrdering(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("chunk-order-test") + ctx := CreateContextWithCacheKey(t, "chunk-order-test") // Request that should generate multiple chunks testRequest := CreateStreamingChatRequest( @@ -199,7 +202,7 @@ func TestStreamingChunkOrdering(t *testing.T) { t.Log("Making first streaming request to establish cache...") stream1, err1 := setup.Client.ChatCompletionStreamRequest(ctx, testRequest) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } var originalChunks []schemas.BifrostChatResponse @@ -213,6 +216,9 @@ func TestStreamingChunkOrdering(t *testing.T) { } if len(originalChunks) < 2 { + // Stream chunking is at the provider's discretion — under load OpenAI + // occasionally bundles a short reply into a single delivered chunk. + // Ordering is not testable in that case; skip rather than fail. t.Skipf("Need at least 2 chunks to test ordering, got %d", len(originalChunks)) } @@ -273,10 +279,11 @@ func TestStreamingChunkOrdering(t *testing.T) { // TestSpeechSynthesisStreaming tests speech synthesis streaming caching func TestSpeechSynthesisStreaming(t *testing.T) { + t.Parallel() setup := NewTestSetup(t) defer setup.Cleanup() - ctx := CreateContextWithCacheKey("speech-stream-test") + ctx := CreateContextWithCacheKey(t, "speech-stream-test") // Create speech synthesis request speechRequest := CreateSpeechRequest( @@ -290,7 +297,7 @@ func TestSpeechSynthesisStreaming(t *testing.T) { duration1 := time.Since(start1) if err1 != nil { - return // Test will be skipped by retry function + t.Skipf("upstream request error, skipping test: %v", err1) } if response1 == nil { diff --git a/plugins/semanticcache/plugin_vectorstore_test.go b/plugins/semanticcache/plugin_vectorstore_test.go index f4ac8130f2..6d29f08c8b 100644 --- a/plugins/semanticcache/plugin_vectorstore_test.go +++ b/plugins/semanticcache/plugin_vectorstore_test.go @@ -1,7 +1,6 @@ package semanticcache import ( - "context" "os" "strings" "testing" @@ -47,27 +46,31 @@ func getVectorStoreTestCases() []VectorStoreTestCase { } } -// getDefaultTestConfig returns the default test configuration +// getDefaultTestConfig returns the default test configuration. Mirrors the +// defaults Init applies, which matters for unit tests that construct Plugin +// directly without going through Init. func getDefaultTestConfig() *Config { return &Config{ - Provider: schemas.OpenAI, - EmbeddingModel: "text-embedding-3-small", - Dimension: 1536, - Threshold: 0.8, - CleanUpOnShutdown: true, + Provider: schemas.OpenAI, + EmbeddingModel: "text-embedding-3-small", + Dimension: 1536, + Threshold: 0.8, + CleanUpOnShutdown: true, + ConversationHistoryThreshold: DefaultConversationHistoryThreshold, } } // TestSemanticCache_AllVectorStores_BasicFlow tests the basic cache flow across all vector stores func TestSemanticCache_AllVectorStores_BasicFlow(t *testing.T) { + t.Parallel() for _, tc := range getVectorStoreTestCases() { t.Run(tc.Name, func(t *testing.T) { skipIfNoAPIKey(t, tc.StoreType) setup := NewTestSetupWithVectorStore(t, getDefaultTestConfig(), tc.StoreType) defer setup.Cleanup() - ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) - ctx.SetValue(CacheKey, "test-"+strings.ToLower(tc.Name)+"-basic") + ctx := newBaseTestContext() + ctx.SetValue(CacheKey, keyForTest(t, "test-"+strings.ToLower(tc.Name)+"-basic")) // Test request request := &schemas.BifrostRequest{ @@ -146,8 +149,8 @@ func TestSemanticCache_AllVectorStores_BasicFlow(t *testing.T) { // Second request - should be a cache hit t.Logf("[%s] Testing second identical request (expecting cache hit)...", tc.Name) - ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) - ctx2.SetValue(CacheKey, "test-"+strings.ToLower(tc.Name)+"-basic") + ctx2 := newBaseTestContext() + ctx2.SetValue(CacheKey, keyForTest(t, "test-"+strings.ToLower(tc.Name)+"-basic")) _, shortCircuit2, err := setup.Plugin.PreLLMHook(ctx2, request) if err != nil { @@ -170,6 +173,7 @@ func TestSemanticCache_AllVectorStores_BasicFlow(t *testing.T) { // TestSemanticCache_AllVectorStores_DirectHashMatch tests direct hash matching across all vector stores func TestSemanticCache_AllVectorStores_DirectHashMatch(t *testing.T) { + t.Parallel() for _, tc := range getVectorStoreTestCases() { t.Run(tc.Name, func(t *testing.T) { skipIfNoAPIKey(t, tc.StoreType) @@ -181,7 +185,7 @@ func TestSemanticCache_AllVectorStores_DirectHashMatch(t *testing.T) { testRunID := uuid.New().String()[:8] cacheKey := "test-" + strings.ToLower(tc.Name) + "-direct-" + testRunID - ctx := CreateContextWithCacheKeyAndType(cacheKey, CacheTypeDirect) + ctx := CreateContextWithCacheKeyAndType(t, cacheKey, CacheTypeDirect) testRequest := CreateBasicChatRequest("Direct hash test for "+tc.Name+" "+testRunID, 0.7, 50) @@ -196,7 +200,7 @@ func TestSemanticCache_AllVectorStores_DirectHashMatch(t *testing.T) { WaitForCache(setup.Plugin) // Second request with direct-only cache type - ctx2 := CreateContextWithCacheKeyAndType(cacheKey, CacheTypeDirect) + ctx2 := CreateContextWithCacheKeyAndType(t, cacheKey, CacheTypeDirect) t.Logf("[%s] Making second request with CacheTypeDirect...", tc.Name) response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest) @@ -212,6 +216,7 @@ func TestSemanticCache_AllVectorStores_DirectHashMatch(t *testing.T) { // TestSemanticCache_AllVectorStores_NamespaceIsolation tests that different cache keys are isolated func TestSemanticCache_AllVectorStores_NamespaceIsolation(t *testing.T) { + t.Parallel() for _, tc := range getVectorStoreTestCases() { t.Run(tc.Name, func(t *testing.T) { skipIfNoAPIKey(t, tc.StoreType) @@ -225,7 +230,7 @@ func TestSemanticCache_AllVectorStores_NamespaceIsolation(t *testing.T) { cacheKey2 := "test-" + strings.ToLower(tc.Name) + "-namespace-2-" + testRunID // Cache with first key - ctx1 := CreateContextWithCacheKey(cacheKey1) + ctx1 := CreateContextWithCacheKey(t, cacheKey1) testRequest := CreateBasicChatRequest("Namespace isolation test for "+tc.Name+" "+testRunID, 0.7, 50) t.Logf("[%s] Making request with cache key 1...", tc.Name) @@ -239,7 +244,7 @@ func TestSemanticCache_AllVectorStores_NamespaceIsolation(t *testing.T) { WaitForCache(setup.Plugin) // Try with different cache key - should miss - ctx2 := CreateContextWithCacheKey(cacheKey2) + ctx2 := CreateContextWithCacheKey(t, cacheKey2) t.Logf("[%s] Making same request with different cache key (expecting miss)...", tc.Name) response2, err2 := setup.Client.ChatCompletionRequest(ctx2, testRequest) @@ -251,7 +256,7 @@ func TestSemanticCache_AllVectorStores_NamespaceIsolation(t *testing.T) { AssertNoCacheHit(t, &schemas.BifrostResponse{ChatResponse: response2}) // Try with original key - should hit - ctx3 := CreateContextWithCacheKey(cacheKey1) + ctx3 := CreateContextWithCacheKey(t, cacheKey1) t.Logf("[%s] Making same request with original cache key (expecting hit)...", tc.Name) response3, err3 := setup.Client.ChatCompletionRequest(ctx3, testRequest) @@ -267,14 +272,15 @@ func TestSemanticCache_AllVectorStores_NamespaceIsolation(t *testing.T) { // TestSemanticCache_AllVectorStores_ParameterFiltering tests that different parameters don't share cache func TestSemanticCache_AllVectorStores_ParameterFiltering(t *testing.T) { + t.Parallel() for _, tc := range getVectorStoreTestCases() { t.Run(tc.Name, func(t *testing.T) { skipIfNoAPIKey(t, tc.StoreType) setup := NewTestSetupWithVectorStore(t, getDefaultTestConfig(), tc.StoreType) defer setup.Cleanup() - ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) - ctx.SetValue(CacheKey, "test-"+strings.ToLower(tc.Name)+"-params") + ctx := newBaseTestContext() + ctx.SetValue(CacheKey, keyForTest(t, "test-"+strings.ToLower(tc.Name)+"-params")) // First request with temperature=0.7 request1 := &schemas.BifrostRequest{ @@ -342,8 +348,8 @@ func TestSemanticCache_AllVectorStores_ParameterFiltering(t *testing.T) { // Second request with different temperature - should be cache miss t.Logf("[%s] Testing second request with temperature=0.5 (expecting cache miss)...", tc.Name) - ctx2 := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) - ctx2.SetValue(CacheKey, "test-"+strings.ToLower(tc.Name)+"-params") + ctx2 := newBaseTestContext() + ctx2.SetValue(CacheKey, keyForTest(t, "test-"+strings.ToLower(tc.Name)+"-params")) request2 := &schemas.BifrostRequest{ RequestType: schemas.ChatCompletionRequest, @@ -381,6 +387,7 @@ func TestSemanticCache_AllVectorStores_ParameterFiltering(t *testing.T) { // TestSemanticCache_AllVectorStores_EmbeddingRequest tests embedding request caching across all vector stores func TestSemanticCache_AllVectorStores_EmbeddingRequest(t *testing.T) { + t.Parallel() for _, tc := range getVectorStoreTestCases() { t.Run(tc.Name, func(t *testing.T) { skipIfNoAPIKey(t, tc.StoreType) @@ -395,7 +402,7 @@ func TestSemanticCache_AllVectorStores_EmbeddingRequest(t *testing.T) { embeddingRequest := CreateEmbeddingRequest([]string{"Test embedding with " + tc.Name + " " + testRunID}) // Cache first request - ctx1 := CreateContextWithCacheKey(cacheKey) + ctx1 := CreateContextWithCacheKey(t, cacheKey) t.Logf("[%s] Making first embedding request...", tc.Name) response1, err1 := setup.Client.EmbeddingRequest(ctx1, embeddingRequest) if err1 != nil { @@ -407,7 +414,7 @@ func TestSemanticCache_AllVectorStores_EmbeddingRequest(t *testing.T) { WaitForCache(setup.Plugin) // Second request - should be cache hit - ctx2 := CreateContextWithCacheKey(cacheKey) + ctx2 := CreateContextWithCacheKey(t, cacheKey) t.Logf("[%s] Making second embedding request (expecting cache hit)...", tc.Name) response2, err2 := setup.Client.EmbeddingRequest(ctx2, embeddingRequest) if err2 != nil { diff --git a/plugins/semanticcache/search.go b/plugins/semanticcache/search.go index 6e8a2cf6a7..79b4c1b32a 100644 --- a/plugins/semanticcache/search.go +++ b/plugins/semanticcache/search.go @@ -9,89 +9,32 @@ import ( "strings" "time" + "github.com/cespare/xxhash/v2" + "github.com/google/uuid" bifrost "github.com/maximhq/bifrost/core" "github.com/maximhq/bifrost/core/schemas" "github.com/maximhq/bifrost/framework/vectorstore" ) -func (plugin *Plugin) prepareDirectCacheLookup(ctx *schemas.BifrostContext, req *schemas.BifrostRequest, cacheKey string) (string, error) { - hash, err := plugin.generateRequestHash(req) +// performDirectSearch does an O(1) point fetch on the deterministic directCacheID +// derived from (provider, model, cacheKey, request_hash, params_hash). Caller +// supplies the prebuilt metadata + paramsHash so we don't recompute them when +// semantic search runs as well. +func (plugin *Plugin) performDirectSearch(ctx *schemas.BifrostContext, state *cacheState, req *schemas.BifrostRequest, cacheKey string, metadata map[string]interface{}, paramsHash string) (*schemas.LLMPluginShortCircuit, error) { + requestHash, err := plugin.generateRequestHash(req, metadata) if err != nil { - return "", fmt.Errorf("failed to generate request hash: %w", err) + return nil, fmt.Errorf("failed to generate request hash: %w", err) } - plugin.logger.Debug(PluginLoggerPrefix + " Generated Hash for Request: " + hash) - - paramsHash, err := plugin.computeRequestParamsHash(req) - if err != nil { - return "", fmt.Errorf("failed to compute direct lookup params hash: %w", err) - } - - ctx.SetValue(requestHashKey, hash) - ctx.SetValue(requestParamsHashKey, paramsHash) - - provider, model, _ := req.GetRequestFields() - directCacheID := plugin.generateDirectCacheID(provider, model, cacheKey, hash, paramsHash) - - return directCacheID, nil -} - -func (plugin *Plugin) performLegacyDirectSearch(ctx *schemas.BifrostContext, req *schemas.BifrostRequest, cacheKey string) (*schemas.LLMPluginShortCircuit, error) { - hash, _ := ctx.Value(requestHashKey).(string) - paramsHash, _ := ctx.Value(requestParamsHashKey).(string) - provider, model, _ := req.GetRequestFields() - - filters := []vectorstore.Query{ - {Field: "request_hash", Operator: vectorstore.QueryOperatorEqual, Value: hash}, - {Field: "cache_key", Operator: vectorstore.QueryOperatorEqual, Value: cacheKey}, - {Field: "params_hash", Operator: vectorstore.QueryOperatorEqual, Value: paramsHash}, - {Field: "from_bifrost_semantic_cache_plugin", Operator: vectorstore.QueryOperatorEqual, Value: true}, - } - - if plugin.config.CacheByProvider != nil && *plugin.config.CacheByProvider { - filters = append(filters, vectorstore.Query{Field: "provider", Operator: vectorstore.QueryOperatorEqual, Value: string(provider)}) - } - if plugin.config.CacheByModel != nil && *plugin.config.CacheByModel { - filters = append(filters, vectorstore.Query{Field: "model", Operator: vectorstore.QueryOperatorEqual, Value: model}) - } - - plugin.logger.Debug(fmt.Sprintf("%s Searching for legacy direct hash match with %d filters", PluginLoggerPrefix, len(filters))) - - selectFields := append([]string(nil), SelectFields...) - if bifrost.IsStreamRequestType(req.RequestType) { - selectFields = removeField(selectFields, "response") - } else { - selectFields = removeField(selectFields, "stream_chunks") - } - - searchCtx := vectorstore.WithDisableScanFallback(ctx) - var cursor *string - results, _, err := plugin.store.GetAll(searchCtx, plugin.config.VectorStoreNamespace, filters, selectFields, cursor, 1) - if err != nil { - if errors.Is(err, vectorstore.ErrNotFound) || errors.Is(err, vectorstore.ErrQuerySyntax) { - return nil, nil - } - return nil, fmt.Errorf("failed to search for legacy direct hash match: %w", err) - } - - if len(results) == 0 { - plugin.logger.Debug(PluginLoggerPrefix + " No legacy direct hash match found") - return nil, nil - } - - result := results[0] - plugin.logger.Debug(fmt.Sprintf("%s Found legacy direct hash match with ID: %s", PluginLoggerPrefix, result.ID)) - return plugin.buildResponseFromResult(ctx, req, result, CacheTypeDirect, 1.0, 0) -} - -func (plugin *Plugin) performDirectChunkLookup(ctx *schemas.BifrostContext, req *schemas.BifrostRequest, cacheKey string) (*schemas.LLMPluginShortCircuit, error) { - directCacheID, err := plugin.prepareDirectCacheLookup(ctx, req, cacheKey) + directCacheID, err := plugin.generateDirectCacheID(provider, model, cacheKey, requestHash, paramsHash) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to generate direct cache ID: %w", err) } - ctx.SetValue(requestStorageIDKey, directCacheID) + state.DirectCacheID = directCacheID + // All filters (cacheKey, provider, model, requestHash, paramsHash) are + // encoded into directCacheID, so a Get-by-ID is sufficient. result, err := plugin.store.GetChunk(ctx, plugin.config.VectorStoreNamespace, directCacheID) if err != nil { errMsg := strings.ToLower(err.Error()) @@ -99,93 +42,46 @@ func (plugin *Plugin) performDirectChunkLookup(ctx *schemas.BifrostContext, req strings.Contains(errMsg, "not found") || strings.Contains(errMsg, "status code: 404") if isMiss { - plugin.logger.Debug(PluginLoggerPrefix + " No direct chunk match found") return nil, nil } return nil, fmt.Errorf("failed to fetch direct cache chunk: %w", err) } - - plugin.logger.Debug(fmt.Sprintf("%s Found direct chunk match with ID: %s", PluginLoggerPrefix, result.ID)) - return plugin.buildResponseFromResult(ctx, req, result, CacheTypeDirect, 1.0, 0) -} - -func (plugin *Plugin) performDirectSearch(ctx *schemas.BifrostContext, req *schemas.BifrostRequest, cacheKey string) (*schemas.LLMPluginShortCircuit, error) { - shortCircuit, err := plugin.performDirectChunkLookup(ctx, req, cacheKey) - if err != nil { - return nil, err - } - if shortCircuit != nil { - return shortCircuit, nil - } - - return plugin.performLegacyDirectSearch(ctx, req, cacheKey) -} - -// generateEmbeddingsForStorage generates embeddings and stores them in context for PostHook storage. -// This is used when the vector store requires vectors but we're in direct-only cache mode. -// Unlike performSemanticSearch, this function does not perform any search - it only generates -// and stores embeddings so they can be persisted with the cache entry. -func (plugin *Plugin) generateEmbeddingsForStorage(ctx *schemas.BifrostContext, req *schemas.BifrostRequest) error { - // Extract text and metadata for embedding - text, paramsHash, err := plugin.extractTextForEmbedding(req) - if err != nil { - return fmt.Errorf("failed to extract text for embedding: %w", err) - } - - // Generate embedding - embedding, inputTokens, err := plugin.generateEmbedding(ctx, text) - if err != nil { - return fmt.Errorf("failed to generate embedding: %w", err) - } - - // Store embedding and metadata in context for PostHook - ctx.SetValue(requestEmbeddingKey, embedding) - ctx.SetValue(requestEmbeddingTokensKey, inputTokens) - ctx.SetValue(requestParamsHashKey, paramsHash) - - return nil + return plugin.buildResponseFromResult(ctx, state, req, result, CacheTypeDirect, nil, nil) } // performSemanticSearch performs semantic similarity search and returns matching response if found. -func (plugin *Plugin) performSemanticSearch(ctx *schemas.BifrostContext, req *schemas.BifrostRequest, cacheKey string) (*schemas.LLMPluginShortCircuit, error) { - // Extract text and metadata for embedding - text, paramsHash, err := plugin.extractTextForEmbedding(req) +// Caller supplies the prebuilt paramsHash so it isn't recomputed. +func (plugin *Plugin) performSemanticSearch(ctx *schemas.BifrostContext, state *cacheState, req *schemas.BifrostRequest, cacheKey string, paramsHash string) (*schemas.LLMPluginShortCircuit, error) { + text, err := plugin.extractTextForEmbedding(state, req) if err != nil { return nil, fmt.Errorf("failed to extract text for embedding: %w", err) } - // Generate embedding embedding, inputTokens, err := plugin.generateEmbedding(ctx, text) if err != nil { + // Note: silent skip — provider misconfig or transient embedding errors + // fall through to the upstream LLM call. return nil, fmt.Errorf("failed to generate embedding: %w", err) } - // Store embedding and metadata in context for PostLLMHook - ctx.SetValue(requestEmbeddingKey, embedding) - ctx.SetValue(requestEmbeddingTokensKey, inputTokens) - ctx.SetValue(requestParamsHashKey, paramsHash) + state.Embeddings = embedding + state.EmbeddingsInputTokens = inputTokens cacheThreshold := plugin.config.Threshold - - thresholdValue := ctx.Value(CacheThresholdKey) - if thresholdValue != nil { - threshold, ok := thresholdValue.(float64) - if !ok { - plugin.logger.Warn(PluginLoggerPrefix + " Threshold is not a float64, using default threshold") - } else { + if v := ctx.Value(CacheThresholdKey); v != nil { + if threshold, ok := v.(float64); ok { cacheThreshold = threshold + } else { + plugin.logger.Warn("Threshold is not a float64, using default threshold") } } provider, model, _ := req.GetRequestFields() - - // Build strict metadata filters as Query slices (provider, model, and all params) strictFilters := []vectorstore.Query{ {Field: "cache_key", Operator: vectorstore.QueryOperatorEqual, Value: cacheKey}, {Field: "params_hash", Operator: vectorstore.QueryOperatorEqual, Value: paramsHash}, {Field: "from_bifrost_semantic_cache_plugin", Operator: vectorstore.QueryOperatorEqual, Value: true}, } - if plugin.config.CacheByProvider != nil && *plugin.config.CacheByProvider { strictFilters = append(strictFilters, vectorstore.Query{Field: "provider", Operator: vectorstore.QueryOperatorEqual, Value: string(provider)}) } @@ -193,96 +89,175 @@ func (plugin *Plugin) performSemanticSearch(ctx *schemas.BifrostContext, req *sc strictFilters = append(strictFilters, vectorstore.Query{Field: "model", Operator: vectorstore.QueryOperatorEqual, Value: model}) } - plugin.logger.Debug(fmt.Sprintf("%s Performing semantic search with %d metadata filters", PluginLoggerPrefix, len(strictFilters))) - - // Make a full copy so we don't mutate the original backing array - selectFields := append([]string(nil), SelectFields...) - if bifrost.IsStreamRequestType(req.RequestType) { - selectFields = removeField(selectFields, "response") - } else { - selectFields = removeField(selectFields, "stream_chunks") - } - - // For semantic search, we want semantic similarity in content but exact parameter matching + selectFields := selectFieldsForRequest(req.RequestType) results, err := plugin.store.GetNearest(ctx, plugin.config.VectorStoreNamespace, embedding, strictFilters, selectFields, cacheThreshold, 1) if err != nil { return nil, fmt.Errorf("failed to search semantic cache: %w", err) } - if len(results) == 0 { - plugin.logger.Debug(PluginLoggerPrefix + " No semantic match found") return nil, nil } + return plugin.buildResponseFromResult(ctx, state, req, results[0], CacheTypeSemantic, &cacheThreshold, &inputTokens) +} - // Found a semantically similar entry - result := results[0] - plugin.logger.Debug(fmt.Sprintf("%s Found semantic match with ID: %s, Score: %f", PluginLoggerPrefix, result.ID, *result.Score)) +// selectFieldsStream / selectFieldsNonStream are precomputed at package init +// because selectFieldsForRequest is called on every cache lookup. +var ( + selectFieldsStream = filterSelectFields("response") + selectFieldsNonStream = filterSelectFields("stream_chunks") +) - // Build response from cached result - return plugin.buildResponseFromResult(ctx, req, result, CacheTypeSemantic, cacheThreshold, inputTokens) +// filterSelectFields returns SelectFields with the named field removed. Used +// at package init to precompute the per-request projection lists. +func filterSelectFields(skip string) []string { + out := make([]string, 0, len(SelectFields)) + for _, f := range SelectFields { + if f != skip { + out = append(out, f) + } + } + return out } -// buildResponseFromResult constructs a LLMPluginShortCircuit response from a cached VectorEntry result -func (plugin *Plugin) buildResponseFromResult(ctx *schemas.BifrostContext, req *schemas.BifrostRequest, result vectorstore.SearchResult, cacheType CacheType, threshold float64, inputTokens int) (*schemas.LLMPluginShortCircuit, error) { - // Extract response data from the result properties - properties := result.Properties - if properties == nil { - return nil, fmt.Errorf("no properties found in cached result") +// selectFieldsForRequest returns the projection list trimmed to the response +// shape we actually need (single response vs stream chunks). +func selectFieldsForRequest(requestType schemas.RequestType) []string { + if bifrost.IsStreamRequestType(requestType) { + return selectFieldsStream } + return selectFieldsNonStream +} - // Check TTL - if entry has expired, delete it and return cache miss - if expiresAtRaw, exists := properties["expires_at"]; exists && expiresAtRaw != nil { - var expiresAt int64 - var validType bool - switch v := expiresAtRaw.(type) { - case string: - var err error - expiresAt, err = strconv.ParseInt(v, 10, 64) - if err != nil { - validType = false - } else { - validType = true - } - case float64: - expiresAt = int64(v) - validType = true - case int64: - expiresAt = v - validType = true - case int: - expiresAt = int64(v) - validType = true - } - if validType { - currentTime := time.Now().Unix() - if expiresAt < currentTime { - // Entry has expired, delete it asynchronously - go func() { - deleteCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - err := plugin.store.Delete(deleteCtx, plugin.config.VectorStoreNamespace, result.ID) - if err != nil { - plugin.logger.Warn("%s Failed to delete expired entry %s: %v", PluginLoggerPrefix, result.ID, err) - } - }() - // Return nil to indicate cache miss - return nil, nil - } +// generateEmbedding generates an embedding for the given text using the configured provider. +func (plugin *Plugin) generateEmbedding(ctx *schemas.BifrostContext, text string) ([]float32, int, error) { + embeddingReq := &schemas.BifrostEmbeddingRequest{ + Provider: plugin.config.Provider, + Model: plugin.config.EmbeddingModel, + Input: &schemas.EmbeddingInput{ + Text: &text, + }, + } + + embeddingCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline) + // Cancel the derived context once we're done. NewBifrostContext starts a + // watchCancellation goroutine that holds a reference to ctx (the scoped + // plugin context). Without this, that goroutine outlives the plugin call + // and may dereference fields on a parent context that has already been + // released back to its sync.Pool — see core/schemas.ReleasePluginScope. + defer embeddingCtx.Cancel() + embeddingCtx.SetValue(schemas.BifrostContextKeySkipPluginPipeline, true) + if plugin.embeddingRequestExecutor == nil { + return nil, 0, fmt.Errorf("embedding request executor is not configured") + } + response, err := plugin.embeddingRequestExecutor(embeddingCtx, embeddingReq) + if err != nil { + return nil, 0, fmt.Errorf("failed to generate embedding: %v", err) + } + + if len(response.Data) == 0 { + return nil, 0, fmt.Errorf("no embeddings returned from provider") + } + + embedding := response.Data[0].Embedding + inputTokens := 0 + if response.Usage != nil { + inputTokens = response.Usage.TotalTokens + } + + switch { + case embedding.EmbeddingStr != nil: + var vals []float32 + if err := json.Unmarshal([]byte(*embedding.EmbeddingStr), &vals); err != nil { + return nil, 0, fmt.Errorf("failed to parse string embedding: %w", err) } + return vals, inputTokens, nil + case embedding.EmbeddingArray != nil: + return float64ToFloat32Embedding(embedding.EmbeddingArray), inputTokens, nil + case len(embedding.Embedding2DArray) > 0: + return flattenToFloat32Embedding(embedding.Embedding2DArray), inputTokens, nil + case embedding.EmbeddingInt8Array != nil: + // Quantized int8/binary embedding format. Promote to float32 so the + // cosine-similarity path treats it uniformly. + return int8ToFloat32Embedding(embedding.EmbeddingInt8Array), inputTokens, nil + case embedding.EmbeddingInt32Array != nil: + return int32ToFloat32Embedding(embedding.EmbeddingInt32Array), inputTokens, nil + } + return nil, 0, fmt.Errorf("embedding data is not in expected format") +} + +// generateRequestHash creates an xxhash of the (normalized input, params). +// Fallbacks are excluded since they only affect error handling. +func (plugin *Plugin) generateRequestHash(req *schemas.BifrostRequest, params map[string]interface{}) (string, error) { + hashInput := map[string]interface{}{ + "input": plugin.getNormalizedInputForCaching(req), + "params": params, } + jsonData, err := schemas.MarshalDeeplySorted(hashInput) + if err != nil { + return "", fmt.Errorf("failed to marshal request for hashing: %w", err) + } + return fmt.Sprintf("%x", xxhash.Sum64(jsonData)), nil +} - // Check if this is a streaming response - need to check for non-null values - streamResponses, hasStreamingResponse := properties["stream_chunks"] - singleResponse, hasSingleResponse := properties["response"] +// generateDirectCacheID returns a deterministic UUIDv5 derived from the cache +// key, request hash, params hash, and (optionally) provider/model. The same +// inputs always produce the same ID, which is what makes the direct path an +// O(1) point fetch. +func (plugin *Plugin) generateDirectCacheID(provider schemas.ModelProvider, model string, cacheKey string, requestHash string, paramsHash string) (string, error) { + idInput := struct { + CacheKey string `json:"cache_key"` + RequestHash string `json:"request_hash"` + ParamsHash string `json:"params_hash"` + Provider string `json:"provider,omitempty"` + Model string `json:"model,omitempty"` + }{ + CacheKey: cacheKey, + RequestHash: requestHash, + ParamsHash: paramsHash, + } + if plugin.config.CacheByProvider != nil && *plugin.config.CacheByProvider { + idInput.Provider = string(provider) + } + if plugin.config.CacheByModel != nil && *plugin.config.CacheByModel { + idInput.Model = model + } + data, err := schemas.MarshalDeeplySorted(idInput) + if err != nil { + return "", err + } + return uuid.NewSHA1(directCacheNamespace, data).String(), nil +} - // Consider fields present only if they're not null - hasValidSingleResponse := hasSingleResponse && singleResponse != nil - hasValidStreamingResponse := hasStreamingResponse && streamResponses != nil +// buildResponseFromResult constructs a LLMPluginShortCircuit response from a cached VectorEntry result. +// +// Return contract: +// - (shortCircuit, nil): cache hit — caller should return shortCircuit to short-circuit upstream. +// - (nil, nil): treat as a miss. Used for both genuine misses and "soft" misses +// (expired entry, unparseable expires_at, format mismatch). Caller proceeds to upstream. +// - (nil, err): hard error worth logging; caller logs and proceeds to upstream. +func (plugin *Plugin) buildResponseFromResult(ctx *schemas.BifrostContext, state *cacheState, req *schemas.BifrostRequest, result vectorstore.SearchResult, cacheType CacheType, threshold *float64, inputTokens *int) (*schemas.LLMPluginShortCircuit, error) { + properties := result.Properties + if properties == nil { + return nil, fmt.Errorf("no properties found in cached result") + } - // Parse stream_chunks - streamChunks, err := plugin.parseStreamChunks(streamResponses) - if err != nil || len(streamChunks) == 0 { - hasValidStreamingResponse = false + if expired, miss := isExpiredEntry(properties); expired { + // Async best-effort cleanup of the stale entry. Tracked on writersWg + // so WaitForPendingOperations + Cleanup block until it finishes, + // avoiding a delete racing with namespace teardown. + plugin.writersWg.Add(1) + go func() { + defer plugin.writersWg.Done() + deleteCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := plugin.store.Delete(deleteCtx, plugin.config.VectorStoreNamespace, result.ID); err != nil { + plugin.logger.Warn("Failed to delete expired entry %s: %v", result.ID, err) + } + }() + return nil, nil + } else if miss { + // Unparseable expires_at — treat as miss to be safe. + return nil, nil } similarity := 0.0 @@ -290,134 +265,118 @@ func (plugin *Plugin) buildResponseFromResult(ctx *schemas.BifrostContext, req * similarity = *result.Score } - isStreamRequest := bifrost.IsStreamRequestType(req.RequestType) - - if isStreamRequest && hasValidStreamingResponse { - return plugin.buildStreamingResponseFromResult(ctx, req, result, streamChunks, cacheType, threshold, similarity, inputTokens) - } else if !isStreamRequest && hasValidSingleResponse { - return plugin.buildSingleResponseFromResult(ctx, req, result, singleResponse, cacheType, threshold, similarity, inputTokens) + isStream := bifrost.IsStreamRequestType(req.RequestType) + if isStream { + streamResponses, ok := properties["stream_chunks"] + if ok && streamResponses != nil { + streamChunks, err := plugin.parseStreamChunks(streamResponses) + if err == nil && len(streamChunks) > 0 { + return plugin.buildStreamingResponseFromResult(ctx, state, req, result, streamChunks, cacheType, threshold, &similarity, inputTokens) + } + } } else { - plugin.logger.Warn("%s Cache entry format mismatch for request %s (isStream=%t, hasSingle=%t, hasStream=%t), treating as miss", - PluginLoggerPrefix, result.ID, isStreamRequest, hasValidSingleResponse, hasValidStreamingResponse) - return nil, nil + singleResponse, ok := properties["response"] + if ok && singleResponse != nil { + return plugin.buildNonStreamingResponseFromResult(ctx, state, req, result, singleResponse, cacheType, threshold, &similarity, inputTokens) + } + } + + msg := fmt.Sprintf("cache entry %s format mismatch (isStream=%t), treating as miss — entry may be corrupt", result.ID, isStream) + plugin.logger.Warn(msg) + ctx.Log(schemas.LogLevelWarn, msg) + return nil, nil +} + +// isExpiredEntry returns (expired, parseFailed). A nil/missing expires_at is +// treated as never-expires. +func isExpiredEntry(properties map[string]interface{}) (bool, bool) { + expiresAtRaw, exists := properties["expires_at"] + if !exists || expiresAtRaw == nil { + return false, false } + var expiresAt int64 + switch v := expiresAtRaw.(type) { + case string: + parsed, err := strconv.ParseInt(v, 10, 64) + if err != nil { + return false, true + } + expiresAt = parsed + case float64: + expiresAt = int64(v) + case int64: + expiresAt = v + case int: + expiresAt = int64(v) + default: + return false, true + } + return expiresAt < time.Now().Unix(), false } -// buildSingleResponseFromResult constructs a single response from cached data -func (plugin *Plugin) buildSingleResponseFromResult(ctx *schemas.BifrostContext, req *schemas.BifrostRequest, result vectorstore.SearchResult, responseData interface{}, cacheType CacheType, threshold float64, similarity float64, inputTokens int) (*schemas.LLMPluginShortCircuit, error) { +// buildNonStreamingResponseFromResult constructs a single response from cached data. +func (plugin *Plugin) buildNonStreamingResponseFromResult(ctx *schemas.BifrostContext, state *cacheState, req *schemas.BifrostRequest, result vectorstore.SearchResult, responseData interface{}, cacheType CacheType, threshold *float64, similarity *float64, inputTokens *int) (*schemas.LLMPluginShortCircuit, error) { requestedProvider, requestedModel, _ := req.GetRequestFields() responseStr, ok := responseData.(string) if !ok { return nil, fmt.Errorf("cached response is not a string") } - - // Unmarshal the cached response var cachedResponse schemas.BifrostResponse if err := json.Unmarshal([]byte(responseStr), &cachedResponse); err != nil { return nil, fmt.Errorf("failed to unmarshal cached response: %w", err) } - extraFields := cachedResponse.GetExtraFields() - - if extraFields.CacheDebug == nil { - extraFields.CacheDebug = &schemas.BifrostCacheDebug{} - } - extraFields.CacheDebug.CacheHit = true - extraFields.CacheDebug.HitType = bifrost.Ptr(string(cacheType)) - extraFields.CacheDebug.CacheID = bifrost.Ptr(result.ID) - extraFields.CacheDebug.RequestedProvider = bifrost.Ptr(string(requestedProvider)) - extraFields.CacheDebug.RequestedModel = bifrost.Ptr(requestedModel) - if cacheType == CacheTypeSemantic { - extraFields.CacheDebug.ProviderUsed = bifrost.Ptr(string(plugin.config.Provider)) - extraFields.CacheDebug.ModelUsed = bifrost.Ptr(plugin.config.EmbeddingModel) - extraFields.CacheDebug.Threshold = &threshold - extraFields.CacheDebug.Similarity = &similarity - extraFields.CacheDebug.InputTokens = &inputTokens - } else { - extraFields.CacheDebug.ProviderUsed = nil - extraFields.CacheDebug.ModelUsed = nil - extraFields.CacheDebug.Threshold = nil - extraFields.CacheDebug.Similarity = nil - extraFields.CacheDebug.InputTokens = nil - } - - ctx.SetValue(isCacheHitKey, true) - ctx.SetValue(cacheHitTypeKey, cacheType) - - return &schemas.LLMPluginShortCircuit{ - Response: &cachedResponse, - }, nil + plugin.stampCacheDebugForHit(state, cachedResponse.GetExtraFields(), result.ID, requestedProvider, requestedModel, cacheType, threshold, similarity, inputTokens) + state.ShortCircuited = true + return &schemas.LLMPluginShortCircuit{Response: &cachedResponse}, nil } -// buildStreamingResponseFromResult constructs a streaming response from cached data -func (plugin *Plugin) buildStreamingResponseFromResult(ctx *schemas.BifrostContext, req *schemas.BifrostRequest, result vectorstore.SearchResult, streamArray []interface{}, cacheType CacheType, threshold float64, similarity float64, inputTokens int) (*schemas.LLMPluginShortCircuit, error) { +// buildStreamingResponseFromResult constructs a streaming response from cached data. +// The replay goroutine guards every send with ctx.Done() so a dropped consumer +// can't leak the goroutine (and its captured chunks) for the lifetime of the +// process. +func (plugin *Plugin) buildStreamingResponseFromResult(ctx *schemas.BifrostContext, state *cacheState, req *schemas.BifrostRequest, result vectorstore.SearchResult, streamArray []string, cacheType CacheType, threshold *float64, similarity *float64, inputTokens *int) (*schemas.LLMPluginShortCircuit, error) { requestedProvider, requestedModel, _ := req.GetRequestFields() - - // Mark cache-hit once to avoid concurrent ctx writes - ctx.SetValue(isCacheHitKey, true) - ctx.SetValue(cacheHitTypeKey, cacheType) - - // Create stream channel streamChan := make(chan *schemas.BifrostStreamChunk) + done := ctx.Done() + // We deliberately do NOT pre-decode all chunks up front — that would + // add O(N) latency before the first chunk is delivered, defeating the + // purpose of streaming for long responses. A malformed chunk is + // extremely unlikely (we wrote it as JSON ourselves), and on the rare + // occasion it happens we log+skip rather than truncate the user's view. go func() { defer close(streamChan) - - // Set cache-hit markers inside the streaming goroutine to avoid races - ctx.SetValue(isCacheHitKey, true) - ctx.SetValue(cacheHitTypeKey, cacheType) - - // Process each stream chunk - for i, chunkData := range streamArray { - chunkStr, ok := chunkData.(string) - if !ok { - plugin.logger.Warn("%s Stream chunk %d is not a string, skipping", PluginLoggerPrefix, i) - continue - } - - // Unmarshal the chunk as BifrostResponse + for i, chunkStr := range streamArray { var cachedResponse schemas.BifrostResponse if err := json.Unmarshal([]byte(chunkStr), &cachedResponse); err != nil { - plugin.logger.Warn("%s Failed to unmarshal stream chunk %d, skipping: %v", PluginLoggerPrefix, i, err) + plugin.logger.Warn("Failed to unmarshal stream chunk %d, skipping: %v", i, err) continue } // Ensure RequestType is set on every chunk so downstream consumers - // (logging, telemetry, etc.) correctly identify this as a streaming response. + // (logging, telemetry) correctly identify this as a streaming response. if ef := cachedResponse.GetExtraFields(); ef != nil && ef.RequestType == "" { ef.RequestType = req.RequestType } - // Add cache debug to only the last chunk if i == len(streamArray)-1 { - ctx.SetValue(schemas.BifrostContextKeyStreamEndIndicator, true) - extraFields := cachedResponse.GetExtraFields() - cacheDebug := schemas.BifrostCacheDebug{ - CacheHit: true, - HitType: bifrost.Ptr(string(cacheType)), - CacheID: bifrost.Ptr(result.ID), - RequestedProvider: bifrost.Ptr(string(requestedProvider)), - RequestedModel: bifrost.Ptr(requestedModel), - } - if cacheType == CacheTypeSemantic { - cacheDebug.ProviderUsed = bifrost.Ptr(string(plugin.config.Provider)) - cacheDebug.ModelUsed = bifrost.Ptr(plugin.config.EmbeddingModel) - cacheDebug.Threshold = &threshold - cacheDebug.Similarity = &similarity - cacheDebug.InputTokens = &inputTokens - } else { - cacheDebug.ProviderUsed = nil - cacheDebug.ModelUsed = nil - cacheDebug.Threshold = nil - cacheDebug.Similarity = nil - cacheDebug.InputTokens = nil - } - extraFields.CacheDebug = &cacheDebug + // stampCacheDebugForHit marks this chunk as the cache-hit final + // chunk; cache.PostLLMHook keys off CacheDebug.CacheHit=true to + // set BifrostContextKeyStreamEndIndicator on the root ctx + // synchronously (same goroutine as logging.PostLLMHook). + // + // We deliberately do NOT call ctx.Root().SetValue here. Doing + // so races against the receiver's PostLLMHook for the previous + // chunk: the cache replay can advance to iteration N (and + // write the indicator) while the receiver is still running + // PostLLMHooks for chunk N-1, poisoning that chunk's + // IsFinalChunk read and causing duplicate "final" events. + plugin.stampCacheDebugForHit(state, cachedResponse.GetExtraFields(), result.ID, requestedProvider, requestedModel, cacheType, threshold, similarity, inputTokens) } - // Send chunk to stream - streamChan <- &schemas.BifrostStreamChunk{ + chunk := &schemas.BifrostStreamChunk{ BifrostTextCompletionResponse: cachedResponse.TextCompletionResponse, BifrostChatResponse: cachedResponse.ChatResponse, BifrostResponsesStreamResponse: cachedResponse.ResponsesStreamResponse, @@ -425,44 +384,63 @@ func (plugin *Plugin) buildStreamingResponseFromResult(ctx *schemas.BifrostConte BifrostTranscriptionStreamResponse: cachedResponse.TranscriptionStreamResponse, BifrostImageGenerationStreamResponse: cachedResponse.ImageGenerationStreamResponse, } + + select { + case streamChan <- chunk: + case <-done: + return + } } }() - return &schemas.LLMPluginShortCircuit{ - Stream: streamChan, - }, nil + state.ShortCircuited = true + return &schemas.LLMPluginShortCircuit{Stream: streamChan}, nil } -// parseStreamChunks parses stream_chunks data from various formats into []interface{} -// Handles []interface{}, []string, and JSON string formats -func (plugin *Plugin) parseStreamChunks(streamData interface{}) ([]interface{}, error) { - if streamData == nil { - return nil, fmt.Errorf("stream data is nil") - } - - switch v := streamData.(type) { - case []interface{}: - return v, nil - case []string: - // Convert []string to []interface{} - result := make([]interface{}, len(v)) - for i, s := range v { - result[i] = s - } - return result, nil - case string: - // Parse JSON string from Redis - var stringArray []string - if err := json.Unmarshal([]byte(v), &stringArray); err != nil { - return nil, fmt.Errorf("failed to parse JSON string: %w", err) - } - // Convert to []interface{} - result := make([]interface{}, len(stringArray)) - for i, s := range stringArray { - result[i] = s - } - return result, nil - default: - return nil, fmt.Errorf("unsupported stream data type: %T", streamData) +// stampCacheDebugForHit stamps the cache-hit telemetry on the response. For +// CacheTypeDirect, the embedding-related fields are explicitly cleared so +// stale carry-over from semantic hits never leaks through. CacheHitLatency +// is computed from state.CreatedAt (set at PreLLMHook entry) so consumers +// can distinguish cache-serve time from the original provider latency +// preserved in the cached response. +func (plugin *Plugin) stampCacheDebugForHit( + state *cacheState, + extraFields *schemas.BifrostResponseExtraFields, + cacheID string, + requestedProvider schemas.ModelProvider, + requestedModel string, + cacheType CacheType, + threshold *float64, + similarity *float64, + inputTokens *int, +) { + // GetExtraFields() can return nil for older/corrupted cache entries that + // were written without ExtraFields populated. Bail rather than panic — + // the chunk will still be delivered, just without CacheDebug telemetry. + if extraFields == nil { + return + } + if extraFields.CacheDebug == nil { + extraFields.CacheDebug = &schemas.BifrostCacheDebug{} + } + cd := extraFields.CacheDebug + cd.CacheHit = true + cd.HitType = bifrost.Ptr(string(cacheType)) + cd.CacheID = bifrost.Ptr(cacheID) + cd.RequestedProvider = bifrost.Ptr(string(requestedProvider)) + cd.RequestedModel = bifrost.Ptr(requestedModel) + cd.CacheHitLatency = bifrost.Ptr(time.Since(state.CreatedAt).Milliseconds()) + if cacheType == CacheTypeSemantic { + cd.ProviderUsed = bifrost.Ptr(string(plugin.config.Provider)) + cd.ModelUsed = bifrost.Ptr(plugin.config.EmbeddingModel) + cd.Threshold = threshold + cd.Similarity = similarity + cd.InputTokens = inputTokens + } else { + cd.ProviderUsed = nil + cd.ModelUsed = nil + cd.Threshold = nil + cd.Similarity = nil + cd.InputTokens = nil } } diff --git a/plugins/semanticcache/state.go b/plugins/semanticcache/state.go new file mode 100644 index 0000000000..489c329076 --- /dev/null +++ b/plugins/semanticcache/state.go @@ -0,0 +1,110 @@ +package semanticcache + +import ( + "time" +) + +// cacheState holds per-request state for the semantic cache plugin. It's +// keyed by the request ID and lives between PreLLMHook (where it's populated) +// and PostLLMHook (where it's consumed and cleared). +// +// Centralizes what used to be a set of stringly-typed BifrostContext keys +// (directCacheID, paramsHash, embeddings, embedding input tokens) into one +// struct so the lifecycle is explicit and consumers don't have to chase +// ctx.Value/SetValue calls across files. +// +// No mutex is needed: per-request access is serialized — PreLLMHook runs once, +// PostLLMHook runs once per chunk in order, and the only async path +// (PostLLMHook's storage goroutine) snapshots the values it needs into locals +// before launching. +type cacheState struct { + DirectCacheID string + ParamsHash string + Embeddings []float32 + EmbeddingsInputTokens int + + // FilteredInput caches getInputForCaching(req) so attachment extraction, + // embedding text extraction, and history-threshold checks reuse the same + // filtered slice instead of re-filtering on each call. + FilteredInput interface{} + + // ShortCircuited is set when PreLLMHook served the response from cache + // (returned a non-nil LLMPluginShortCircuit). PostLLMHook uses this to + // skip the entire cache-write path: only the FINAL replay chunk carries + // CacheDebug.CacheHit=true, so shouldSkipCaching() can't catch the + // non-final chunks on its own — without this flag they'd flow into + // addStreamingResponse and trigger a duplicate write at the same + // directCacheID (Weaviate 422 "id already exists"). + ShortCircuited bool + + CreatedAt time.Time +} + +// cacheStateMaxAge bounds how long an orphaned cacheState may live in memory +// before being reaped. +const cacheStateMaxAge = 60 * time.Minute + +// cacheStateCleanupInterval bounds the worst-case staleness of an orphaned +// state to ~maxAge + interval. +const cacheStateCleanupInterval = 5 * time.Minute + +// createCacheState writes a fresh state for requestID, overwriting any prior. +// PreLLMHook calls this at the top so retries / reused requestIDs don't +// inherit stale fields. +func (p *Plugin) createCacheState(requestID string) *cacheState { + state := &cacheState{CreatedAt: time.Now()} + p.cacheStates.Store(requestID, state) + return state +} + +// getCacheState returns the cacheState for requestID, or nil if none exists. +func (p *Plugin) getCacheState(requestID string) *cacheState { + if v, ok := p.cacheStates.Load(requestID); ok { + return v.(*cacheState) + } + return nil +} + +// clearCacheState drops the cacheState entry for requestID. It's safe to call +// when no entry exists. +func (p *Plugin) clearCacheState(requestID string) { + p.cacheStates.Delete(requestID) +} + +// runCacheStateCleanupLoop reaps stale cacheStates on a ticker until stopCh +// is closed. Started by Init, stopped by Cleanup. +func (p *Plugin) runCacheStateCleanupLoop() { + defer p.cleanupWg.Done() + ticker := time.NewTicker(cacheStateCleanupInterval) + defer ticker.Stop() + for { + select { + case <-p.stopCh: + return + case <-ticker.C: + p.cleanupOldCacheStates() + } + } +} + +// cleanupOldCacheStates deletes every cacheState whose CreatedAt is older +// than cacheStateMaxAge. Entries this old indicate a request that never +// reached PostLLMHook (client disconnect, framework bug); reaping them +// bounds memory under abnormal traffic. +func (p *Plugin) cleanupOldCacheStates() { + cutoff := time.Now().Add(-cacheStateMaxAge) + var toDelete []string + p.cacheStates.Range(func(key, value interface{}) bool { + state := value.(*cacheState) + if state.CreatedAt.Before(cutoff) { + toDelete = append(toDelete, key.(string)) + } + return true + }) + for _, k := range toDelete { + p.cacheStates.Delete(k) + } + if len(toDelete) > 0 { + p.logger.Debug("Reaped %d stale cache states", len(toDelete)) + } +} diff --git a/plugins/semanticcache/stream.go b/plugins/semanticcache/stream.go index e2d3c02526..f8c3fd7b3a 100644 --- a/plugins/semanticcache/stream.go +++ b/plugins/semanticcache/stream.go @@ -5,65 +5,81 @@ import ( "encoding/json" "fmt" "sort" - "sync" "time" ) -// Streaming State Management Methods +// chunkSortKey returns the (Index, ChunkIndex) tuple used to order +// accumulated stream chunks before flush. Image-generation responses use +// both fields; every other response shape uses ChunkIndex with Index=0. +// Nil chunks/responses sort to the end via a max-int sentinel so they're +// dropped deterministically by the consumer. +func chunkSortKey(c *StreamChunk) (int, int) { + const sentinel = int(^uint(0) >> 1) // math.MaxInt without the import + if c == nil || c.Response == nil { + return sentinel, sentinel + } + r := c.Response + switch { + case r.TextCompletionResponse != nil: + return 0, r.TextCompletionResponse.ExtraFields.ChunkIndex + case r.ChatResponse != nil: + return 0, r.ChatResponse.ExtraFields.ChunkIndex + case r.ResponsesResponse != nil: + return 0, r.ResponsesResponse.ExtraFields.ChunkIndex + case r.ResponsesStreamResponse != nil: + return 0, r.ResponsesStreamResponse.ExtraFields.ChunkIndex + case r.SpeechResponse != nil: + return 0, r.SpeechResponse.ExtraFields.ChunkIndex + case r.SpeechStreamResponse != nil: + return 0, r.SpeechStreamResponse.ExtraFields.ChunkIndex + case r.TranscriptionResponse != nil: + return 0, r.TranscriptionResponse.ExtraFields.ChunkIndex + case r.TranscriptionStreamResponse != nil: + return 0, r.TranscriptionStreamResponse.ExtraFields.ChunkIndex + case r.ImageGenerationStreamResponse != nil: + return r.ImageGenerationStreamResponse.Index, r.ImageGenerationStreamResponse.ChunkIndex + } + return sentinel, sentinel +} -// createStreamAccumulator creates a new stream accumulator for a request -func (plugin *Plugin) createStreamAccumulator(requestID string, storageID string, embedding []float32, metadata map[string]interface{}, ttl time.Duration) *StreamAccumulator { - return &StreamAccumulator{ +// getOrCreateStreamAccumulator returns the StreamAccumulator for requestID, +// creating one if none exists. Concurrency-safe: the underlying sync.Map's +// LoadOrStore guarantees a single accumulator per request even under racing +// PostLLMHook invocations. +func (plugin *Plugin) getOrCreateStreamAccumulator(requestID string, storageID string, embedding []float32, metadata map[string]interface{}, ttl time.Duration) *StreamAccumulator { + if existing, ok := plugin.streamAccumulators.Load(requestID); ok { + return existing.(*StreamAccumulator) + } + newAccumulator := &StreamAccumulator{ RequestID: requestID, StorageID: storageID, Chunks: make([]*StreamChunk, 0), - IsComplete: false, + LastSeenAt: time.Now(), Embedding: embedding, Metadata: metadata, TTL: ttl, - mu: sync.Mutex{}, } -} - -// getOrCreateStreamAccumulator gets or creates a stream accumulator for a request -func (plugin *Plugin) getOrCreateStreamAccumulator(requestID string, storageID string, embedding []float32, metadata map[string]interface{}, ttl time.Duration) *StreamAccumulator { - if existing, ok := plugin.streamAccumulators.Load(requestID); ok { - return existing.(*StreamAccumulator) - } - - newAccumulator := plugin.createStreamAccumulator(requestID, storageID, embedding, metadata, ttl) actual, _ := plugin.streamAccumulators.LoadOrStore(requestID, newAccumulator) return actual.(*StreamAccumulator) } -// addStreamChunk adds a chunk to the stream accumulator -func (plugin *Plugin) addStreamChunk(requestID string, chunk *StreamChunk, isFinalChunk bool) error { - // Get accumulator (should exist if properly initialized) +// addStreamChunk appends a chunk to the request's accumulator and refreshes +// LastSeenAt so the reaper treats the stream as still active. +func (plugin *Plugin) addStreamChunk(requestID string, chunk *StreamChunk) error { accumulatorInterface, exists := plugin.streamAccumulators.Load(requestID) if !exists { return fmt.Errorf("stream accumulator not found for request %s", requestID) } - accumulator := accumulatorInterface.(*StreamAccumulator) accumulator.mu.Lock() defer accumulator.mu.Unlock() - - // Add chunk to the list (chunks arrive in order) accumulator.Chunks = append(accumulator.Chunks, chunk) - - // Set FinalTimestamp when FinishReason is present - // This handles both normal completion chunks and usage-only last chunks - if isFinalChunk { - accumulator.FinalTimestamp = chunk.Timestamp - } - - plugin.logger.Debug(fmt.Sprintf("%s Added chunk to stream accumulator for request %s", PluginLoggerPrefix, requestID)) - + accumulator.LastSeenAt = chunk.Timestamp return nil } -// processAccumulatedStream processes all accumulated chunks and caches the complete stream -// Flow: Collect everything → Check for ANY errors → If no errors, order and send to .Add() → If any errors, drop operation +// processAccumulatedStream serializes and stores the accumulated chunks as a +// single cache entry. Called once per stream when the final chunk arrives. func (plugin *Plugin) processAccumulatedStream(ctx context.Context, requestID string) error { accumulatorInterface, exists := plugin.streamAccumulators.Load(requestID) if !exists { @@ -72,130 +88,106 @@ func (plugin *Plugin) processAccumulatedStream(ctx context.Context, requestID st accumulator := accumulatorInterface.(*StreamAccumulator) accumulator.mu.Lock() - - // Ensure unlock happens after cleanup defer accumulator.mu.Unlock() - // Ensure cleanup happens defer plugin.cleanupStreamAccumulator(requestID) - // STEP 1: Check if any chunk in the entire stream had an error - if accumulator.HasError { - plugin.logger.Debug(fmt.Sprintf("%s Stream for request %s had errors, dropping entire operation (not caching)", PluginLoggerPrefix, requestID)) - return nil - } - - // STEP 2: All chunks are clean, now sort and build ordered stream for caching - plugin.logger.Debug(fmt.Sprintf("%s Stream for request %s completed successfully, processing %d chunks for caching", PluginLoggerPrefix, requestID, len(accumulator.Chunks))) - - // Sort chunks by their ChunkIndex to ensure proper order (stable + nil-safe) sort.SliceStable(accumulator.Chunks, func(i, j int) bool { - if accumulator.Chunks[i].Response == nil || accumulator.Chunks[j].Response == nil { - // Push nils to the end deterministically - return accumulator.Chunks[j].Response != nil - } - if accumulator.Chunks[i].Response.TextCompletionResponse != nil { - return accumulator.Chunks[i].Response.TextCompletionResponse.ExtraFields.ChunkIndex < accumulator.Chunks[j].Response.TextCompletionResponse.ExtraFields.ChunkIndex - } - if accumulator.Chunks[i].Response.ChatResponse != nil { - return accumulator.Chunks[i].Response.ChatResponse.ExtraFields.ChunkIndex < accumulator.Chunks[j].Response.ChatResponse.ExtraFields.ChunkIndex - } - if accumulator.Chunks[i].Response.ResponsesResponse != nil { - return accumulator.Chunks[i].Response.ResponsesResponse.ExtraFields.ChunkIndex < accumulator.Chunks[j].Response.ResponsesResponse.ExtraFields.ChunkIndex - } - if accumulator.Chunks[i].Response.ResponsesStreamResponse != nil { - return accumulator.Chunks[i].Response.ResponsesStreamResponse.ExtraFields.ChunkIndex < accumulator.Chunks[j].Response.ResponsesStreamResponse.ExtraFields.ChunkIndex - } - if accumulator.Chunks[i].Response.SpeechResponse != nil { - return accumulator.Chunks[i].Response.SpeechResponse.ExtraFields.ChunkIndex < accumulator.Chunks[j].Response.SpeechResponse.ExtraFields.ChunkIndex - } - if accumulator.Chunks[i].Response.SpeechStreamResponse != nil { - return accumulator.Chunks[i].Response.SpeechStreamResponse.ExtraFields.ChunkIndex < accumulator.Chunks[j].Response.SpeechStreamResponse.ExtraFields.ChunkIndex - } - if accumulator.Chunks[i].Response.TranscriptionResponse != nil { - return accumulator.Chunks[i].Response.TranscriptionResponse.ExtraFields.ChunkIndex < accumulator.Chunks[j].Response.TranscriptionResponse.ExtraFields.ChunkIndex + ai, bi := chunkSortKey(accumulator.Chunks[i]) + aj, bj := chunkSortKey(accumulator.Chunks[j]) + if ai != aj { + return ai < aj } - if accumulator.Chunks[i].Response.TranscriptionStreamResponse != nil { - return accumulator.Chunks[i].Response.TranscriptionStreamResponse.ExtraFields.ChunkIndex < accumulator.Chunks[j].Response.TranscriptionStreamResponse.ExtraFields.ChunkIndex - } - if accumulator.Chunks[i].Response.ImageGenerationStreamResponse != nil { - // For image generation, sort by Index first, then ChunkIndex - if accumulator.Chunks[i].Response.ImageGenerationStreamResponse.Index != accumulator.Chunks[j].Response.ImageGenerationStreamResponse.Index { - return accumulator.Chunks[i].Response.ImageGenerationStreamResponse.Index < accumulator.Chunks[j].Response.ImageGenerationStreamResponse.Index - } - return accumulator.Chunks[i].Response.ImageGenerationStreamResponse.ChunkIndex < accumulator.Chunks[j].Response.ImageGenerationStreamResponse.ChunkIndex - } - return false + return bi < bj }) - var streamResponses []string + streamResponses := make([]string, 0, len(accumulator.Chunks)) for i, chunk := range accumulator.Chunks { - if chunk.Response != nil { - chunkData, err := json.Marshal(chunk.Response) - if err != nil { - plugin.logger.Warn("%s Failed to marshal stream chunk %d: %v", PluginLoggerPrefix, i, err) - continue - } - streamResponses = append(streamResponses, string(chunkData)) + if chunk.Response == nil { + continue + } + chunkData, err := json.Marshal(chunk.Response) + if err != nil { + plugin.logger.Warn("Failed to marshal stream chunk %d: %v", i, err) + continue } + streamResponses = append(streamResponses, string(chunkData)) } - // STEP 3: Validate we have valid chunks to cache if len(streamResponses) == 0 { - plugin.logger.Warn("%s Stream for request %s has no valid response chunks, skipping cache storage", PluginLoggerPrefix, requestID) + plugin.logger.Warn("Stream for request %s has no valid response chunks, skipping cache storage", requestID) return nil } - // STEP 4: Build final metadata and submit to .Add() method - finalMetadata := make(map[string]interface{}) + finalMetadata := make(map[string]interface{}, len(accumulator.Metadata)+1) for k, v := range accumulator.Metadata { finalMetadata[k] = v } finalMetadata["stream_chunks"] = streamResponses - // Store complete unified entry using the final cache storage ID. if err := plugin.store.Add(ctx, plugin.config.VectorStoreNamespace, accumulator.StorageID, accumulator.Embedding, finalMetadata); err != nil { return fmt.Errorf("failed to store complete streaming cache entry: %w", err) } - plugin.logger.Debug(fmt.Sprintf("%s Successfully cached complete stream with %d ordered chunks, ID: %s", PluginLoggerPrefix, len(streamResponses), accumulator.StorageID)) + plugin.logger.Debug("Cached stream with %d chunks, storageID=%s", len(streamResponses), accumulator.StorageID) return nil } -// cleanupStreamAccumulator removes the stream accumulator for a request +// cleanupStreamAccumulator drops the accumulator for requestID. Safe to call +// when no entry exists. func (plugin *Plugin) cleanupStreamAccumulator(requestID string) { plugin.streamAccumulators.Delete(requestID) } -// cleanupOldStreamAccumulators removes stream accumulators older than 5 minutes +// streamAccumulatorMaxAge is how long a stream accumulator may live without +// reaching its final chunk before it's reaped by the periodic cleanup. +const streamAccumulatorMaxAge = 5 * time.Minute + +// streamCleanupInterval bounds the worst-case staleness of an abandoned +// accumulator to ~maxAge + interval. +const streamCleanupInterval = 1 * time.Minute + +// cleanupOldStreamAccumulators reaps accumulators whose most recent chunk is +// older than streamAccumulatorMaxAge. Called both periodically and at +// shutdown to prevent abandoned streams (client disconnect, mid-stream +// error) from accumulating in memory; reaping by LastSeenAt rather than +// first-chunk time keeps long-running streams alive while they're still +// receiving chunks. func (plugin *Plugin) cleanupOldStreamAccumulators() { - fiveMinutesAgo := time.Now().Add(-5 * time.Minute) - cleanedCount := 0 - toDelete := make([]string, 0) + cutoff := time.Now().Add(-streamAccumulatorMaxAge) + var toDelete []string plugin.streamAccumulators.Range(func(key, value interface{}) bool { requestID := key.(string) accumulator := value.(*StreamAccumulator) - - // Check if this accumulator is old (no activity for 5 minutes) accumulator.mu.Lock() - if len(accumulator.Chunks) > 0 { - firstChunkTime := accumulator.Chunks[0].Timestamp - if firstChunkTime.Before(fiveMinutesAgo) { - toDelete = append(toDelete, requestID) - plugin.logger.Debug(fmt.Sprintf("%s Cleaned up old stream accumulator for request %s", PluginLoggerPrefix, requestID)) - } + if accumulator.LastSeenAt.Before(cutoff) { + toDelete = append(toDelete, requestID) } accumulator.mu.Unlock() return true }) - // Delete outside the Range loop to avoid concurrent modification for _, requestID := range toDelete { plugin.streamAccumulators.Delete(requestID) - cleanedCount++ } - if cleanedCount > 0 { - plugin.logger.Debug(fmt.Sprintf("%s Cleaned up %d old stream accumulators", PluginLoggerPrefix, cleanedCount)) + if len(toDelete) > 0 { + plugin.logger.Debug("Reaped %d stale stream accumulators", len(toDelete)) + } +} + +// runStreamCleanupLoop runs cleanupOldStreamAccumulators on a ticker until +// stopCh is closed. Started by Init, stopped by Cleanup. +func (plugin *Plugin) runStreamCleanupLoop() { + defer plugin.cleanupWg.Done() + ticker := time.NewTicker(streamCleanupInterval) + defer ticker.Stop() + for { + select { + case <-plugin.stopCh: + return + case <-ticker.C: + plugin.cleanupOldStreamAccumulators() + } } } diff --git a/plugins/semanticcache/test_utils.go b/plugins/semanticcache/test_utils.go index e9b847c6dc..6a506f51a4 100644 --- a/plugins/semanticcache/test_utils.go +++ b/plugins/semanticcache/test_utils.go @@ -4,15 +4,64 @@ import ( "context" "os" "strconv" + "sync" "testing" "time" + "github.com/google/uuid" bifrost "github.com/maximhq/bifrost/core" "github.com/maximhq/bifrost/core/schemas" "github.com/maximhq/bifrost/framework/vectorstore" mocker "github.com/maximhq/bifrost/plugins/mocker" ) +// isTransientUpstreamError reports whether a BifrostError reflects a +// transient upstream condition (timeout, rate-limit, 5xx) where skipping +// the test is reasonable. All other errors — including missing API keys, +// client-side issues, or non-HTTP failures — should fail the test rather +// than mask regressions behind a green skip. +func isTransientUpstreamError(err *schemas.BifrostError) bool { + if err == nil || err.StatusCode == nil { + return false + } + code := *err.StatusCode + return code == 408 || code == 425 || code == 429 || code >= 500 +} + +// withTestRequestID stamps a fresh BifrostContextKeyRequestID on the context. +// Unit tests that call PreLLMHook/PostLLMHook directly need this so the plugin +// can anchor per-request state. In integration tests the framework overwrites +// it, so setting it here is safe in either path. +func withTestRequestID(ctx *schemas.BifrostContext) *schemas.BifrostContext { + ctx.SetValue(schemas.BifrostContextKeyRequestID, uuid.NewString()) + return ctx +} + +// keyForTest returns a cache key namespaced by t.Name(). All tests should +// derive their cache keys via this helper so two tests running in parallel +// (t.Parallel) cannot see each other's entries through the shared Weaviate +// namespace — direct lookups encode cache_key into the storage ID and +// semantic search filters by it. +// +// Pass suffix="" for the most common single-key-per-test case. For tests +// that exercise multiple distinct cache keys (e.g. cross-key isolation +// tests), pass suffixes to disambiguate within the test. +func keyForTest(t testing.TB, suffix string) string { + t.Helper() + if suffix == "" { + return t.Name() + } + return t.Name() + "/" + suffix +} + +// newBaseTestContext returns a BifrostContext with a fresh request ID stamped. +// Replaces bare schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) +// in tests that call plugin.PreLLMHook / PostLLMHook directly — the plugin +// requires a request ID to anchor per-request state. +func newBaseTestContext() *schemas.BifrostContext { + return withTestRequestID(schemas.NewBifrostContext(context.Background(), schemas.NoDeadline)) +} + // getWeaviateConfigFromEnv retrieves Weaviate configuration from environment variables func getWeaviateConfigFromEnv() vectorstore.WeaviateConfig { scheme := os.Getenv("WEAVIATE_SCHEME") @@ -379,11 +428,44 @@ func NewTestSetupWithConfig(t *testing.T, config *Config) *TestSetup { return NewTestSetupWithVectorStore(t, config, vectorstore.VectorStoreTypeWeaviate) } +// SharedTestNamespace is the single Weaviate class all parallel tests share. +// Mirrors production: many concurrent requests hit one namespace, isolated +// by per-test cache_keys (see keyForTest). Distinct from the plugin's +// production default so test runs can't collide with a real cache. +const SharedTestNamespace = "BifrostSemanticCachePluginTest" + +var ( + sharedTestNamespaceOnce sync.Once + sharedTestNamespaceErr error +) + +// ensureSharedTestNamespace creates the shared test class exactly once per +// test process — sync.Once gates the TOCTOU race between concurrent +// Plugin.Init calls (each of which would otherwise check-then-create against +// the shared store and one would lose the race). +// +// Subsequent Plugin.Init calls in tests still invoke CreateNamespace, but the +// vectorstore implementations short-circuit when the class already exists. +func ensureSharedTestNamespace(ctx context.Context, store vectorstore.VectorStore, dim int) error { + sharedTestNamespaceOnce.Do(func() { + sharedTestNamespaceErr = store.CreateNamespace(ctx, SharedTestNamespace, dim, VectorStoreProperties) + }) + return sharedTestNamespaceErr +} + // NewTestSetupWithVectorStore creates a new test setup with custom configuration and vector store type func NewTestSetupWithVectorStore(t *testing.T, config *Config, storeType vectorstore.VectorStoreType) *TestSetup { ctx := schemas.NewBifrostContext(context.Background(), schemas.NoDeadline) logger := bifrost.NewDefaultLogger(schemas.LogLevelDebug) + // All tests share one namespace; isolation comes from per-test cache_keys. + if config.VectorStoreNamespace == "" { + config.VectorStoreNamespace = SharedTestNamespace + } + // Tests must NOT delete the shared namespace at cleanup — other parallel + // tests are still using it. Override any caller default. + config.CleanUpOnShutdown = false + // Get the appropriate config for the vector store type var storeConfig interface{} switch storeType { @@ -408,6 +490,15 @@ func NewTestSetupWithVectorStore(t *testing.T, config *Config, storeType vectors t.Skipf("Vector store %s not available or failed to connect: %v", storeType, err) } + // Pre-create the shared namespace exactly once across the test process so + // concurrent Plugin.Init calls don't lose the TOCTOU race inside the + // vector store driver (check-then-create). + preCreateCtx, preCreateCancel := context.WithTimeout(context.Background(), 30*time.Second) + defer preCreateCancel() + if err := ensureSharedTestNamespace(preCreateCtx, store, config.Dimension); err != nil { + t.Fatalf("Failed to create shared test namespace: %v", err) + } + plugin, err := Init(schemas.NewBifrostContext(context.Background(), schemas.NoDeadline), config, logger, store) if err != nil { t.Fatalf("Failed to initialize plugin: %v", err) @@ -534,13 +625,29 @@ func AssertNoCacheHit(t *testing.T, response *schemas.BifrostResponse) { t.Log("✅ Response correctly not served from cache (cache_debug present but CacheHit=false)") } -// WaitForCache waits for async cache operations to complete +// WaitForCache waits for async cache operations to complete. +// +// WaitForPendingOperations now drains the writersWg accurately (every +// PostLLMHook goroutine + the expired-entry async delete is tracked), so the +// stored entries are guaranteed durable when this returns. The small sleep +// below is a buffer for vector store index visibility on stores with eventual +// consistency (Weaviate is usually immediate on single-node, but cloud or +// multi-shard setups may need a tick to make the entry queryable). +// +// Override via SEMCACHE_TEST_INDEX_DELAY_MS for slower stores / CI. func WaitForCache(plugin schemas.LLMPlugin) { if p, ok := plugin.(*Plugin); ok { p.WaitForPendingOperations() } - // Small buffer for Weaviate index consistency - time.Sleep(500 * time.Millisecond) + delayMs := 100 + if v := os.Getenv("SEMCACHE_TEST_INDEX_DELAY_MS"); v != "" { + if parsed, err := strconv.Atoi(v); err == nil && parsed >= 0 { + delayMs = parsed + } + } + if delayMs > 0 { + time.Sleep(time.Duration(delayMs) * time.Millisecond) + } } // CreateEmbeddingRequest creates an embedding request for testing @@ -611,28 +718,30 @@ func CreateImageGenerationRequest(prompt string, size string, quality string) *s } // CreateContextWithCacheKey creates a context with the test cache key -func CreateContextWithCacheKey(value string) *schemas.BifrostContext { - return schemas.NewBifrostContextWithValue(context.Background(), schemas.NoDeadline, CacheKey, value) +// CreateContextWithCacheKey creates a context with a per-test cache key. +// suffix may be "" for tests using only one cache key. +func CreateContextWithCacheKey(t testing.TB, suffix string) *schemas.BifrostContext { + return withTestRequestID(schemas.NewBifrostContextWithValue(context.Background(), schemas.NoDeadline, CacheKey, keyForTest(t, suffix))) } // CreateContextWithCacheKeyAndType creates a context with cache key and cache type -func CreateContextWithCacheKeyAndType(value string, cacheType CacheType) *schemas.BifrostContext { - return schemas.NewBifrostContextWithValue(context.Background(), schemas.NoDeadline, CacheKey, value).WithValue(CacheTypeKey, cacheType) +func CreateContextWithCacheKeyAndType(t testing.TB, suffix string, cacheType CacheType) *schemas.BifrostContext { + return withTestRequestID(schemas.NewBifrostContextWithValue(context.Background(), schemas.NoDeadline, CacheKey, keyForTest(t, suffix)).WithValue(CacheTypeKey, cacheType)) } // CreateContextWithCacheKeyAndTTL creates a context with cache key and custom TTL -func CreateContextWithCacheKeyAndTTL(value string, ttl time.Duration) *schemas.BifrostContext { - return schemas.NewBifrostContextWithValue(context.Background(), schemas.NoDeadline, CacheKey, value).WithValue(CacheTTLKey, ttl) +func CreateContextWithCacheKeyAndTTL(t testing.TB, suffix string, ttl time.Duration) *schemas.BifrostContext { + return withTestRequestID(schemas.NewBifrostContextWithValue(context.Background(), schemas.NoDeadline, CacheKey, keyForTest(t, suffix)).WithValue(CacheTTLKey, ttl)) } // CreateContextWithCacheKeyAndThreshold creates a context with cache key and custom threshold -func CreateContextWithCacheKeyAndThreshold(value string, threshold float64) *schemas.BifrostContext { - return schemas.NewBifrostContext(context.Background(), schemas.NoDeadline).WithValue(CacheKey, value).WithValue(CacheThresholdKey, threshold) +func CreateContextWithCacheKeyAndThreshold(t testing.TB, suffix string, threshold float64) *schemas.BifrostContext { + return withTestRequestID(schemas.NewBifrostContext(context.Background(), schemas.NoDeadline).WithValue(CacheKey, keyForTest(t, suffix)).WithValue(CacheThresholdKey, threshold)) } // CreateContextWithCacheKeyAndNoStore creates a context with cache key and no-store flag -func CreateContextWithCacheKeyAndNoStore(value string, noStore bool) *schemas.BifrostContext { - return schemas.NewBifrostContext(context.Background(), schemas.NoDeadline).WithValue(CacheKey, value).WithValue(CacheNoStoreKey, noStore) +func CreateContextWithCacheKeyAndNoStore(t testing.TB, suffix string, noStore bool) *schemas.BifrostContext { + return withTestRequestID(schemas.NewBifrostContext(context.Background(), schemas.NoDeadline).WithValue(CacheKey, keyForTest(t, suffix)).WithValue(CacheNoStoreKey, noStore)) } // CreateTestSetupWithConversationThreshold creates a test setup with custom conversation history threshold diff --git a/plugins/semanticcache/utils.go b/plugins/semanticcache/utils.go index 125ae4670e..29f15fc825 100644 --- a/plugins/semanticcache/utils.go +++ b/plugins/semanticcache/utils.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "maps" + "sort" "strings" "time" @@ -14,19 +15,119 @@ import ( "github.com/maximhq/bifrost/core/schemas" ) -// directCacheNamespace is a fixed UUID v5 namespace used for deterministic direct cache ID generation. -// Using a fixed namespace ensures IDs are reproducible across restarts and store types. +// directCacheNamespace is a fixed namespace UUID for generating deterministic +// UUID v5 cache IDs via uuid.NewSHA1, used by generateDirectCacheID. The +// bytes are arbitrary — they only need to be stable across restarts so the +// same (cache_key, request_hash, params_hash) tuple maps to the same ID. var directCacheNamespace = uuid.MustParse("b1f3c2d4-e5a6-7890-abcd-ef1234567890") +// isSemanticCacheSupportedRequestType reports whether semantic cache supports +// this request type for cache lookup and storage. Unsupported types are skipped. +// +// IMPORTANT: this list must stay in sync with the switch in buildRequestMetadataForCaching. +// When adding a new case there, add it here too. +func isSemanticCacheSupportedRequestType(requestType schemas.RequestType) bool { + switch requestType { + case schemas.TextCompletionRequest, + schemas.TextCompletionStreamRequest, + schemas.ChatCompletionRequest, + schemas.ChatCompletionStreamRequest, + schemas.ResponsesRequest, + schemas.ResponsesStreamRequest, + schemas.WebSocketResponsesRequest, + schemas.SpeechRequest, + schemas.SpeechStreamRequest, + schemas.EmbeddingRequest, + schemas.TranscriptionRequest, + schemas.TranscriptionStreamRequest, + schemas.ImageGenerationRequest, + schemas.ImageGenerationStreamRequest: + return true + default: + return false + } +} + +// hashSortedSet returns a deterministic hex hash for an order-insensitive +// list of items. Some request fields are semantically sets but JSON-encoded +// as lists (most notably Tools, where MCP's randomized map iteration would +// otherwise perturb the request hash). The caller supplies a key extractor +// because shapes differ across fields (e.g. ChatTool.Function.Name vs +// ResponsesTool.Name). Use this for set-shaped fields large enough to be +// worth compressing; for short []string sets, prefer sortedStringSet which +// keeps the metadata human-debuggable. +func hashSortedSet[T any](items []T, key func(T) string) (string, error) { + if len(items) == 0 { + return "", nil + } + sorted := make([]T, len(items)) + copy(sorted, items) + sort.SliceStable(sorted, func(i, j int) bool { + return key(sorted[i]) < key(sorted[j]) + }) + payload := make([]any, len(sorted)) + for i, t := range sorted { + payload[i] = t + } + itemsJSON, err := schemas.MarshalDeeplySorted(payload) + if err != nil { + return "", err + } + return fmt.Sprintf("%x", xxhash.Sum64(itemsJSON)), nil +} + +// hashMap returns a deterministic xxhash hex digest of the map. Uses +// MarshalDeeplySorted because plain json.Marshal doesn't guarantee key +// ordering on Go maps. +func hashMap(m map[string]interface{}) (string, error) { + jsonData, err := schemas.MarshalDeeplySorted(m) + if err != nil { + return "", fmt.Errorf("failed to marshal metadata for metadata hash: %w", err) + } + return fmt.Sprintf("%x", xxhash.Sum64(jsonData)), nil +} + +// sortedStringSet returns a sorted copy of a string slice that is semantically +// a set (e.g. modalities, stop sequences, include flags). Sorting in place +// would mutate the caller's parameters, so a copy is returned. +func sortedStringSet(values []string) []string { + if len(values) == 0 { + return nil + } + sorted := make([]string, len(values)) + copy(sorted, values) + sort.Strings(sorted) + return sorted +} + +// putIfSet writes m[key] = *v when v is non-nil. Used by extract*ParametersToMetadata +// to collapse the if-nil-set boilerplate that dominates those functions. +func putIfSet[T any](m map[string]any, key string, v *T) { + if v != nil { + m[key] = *v + } +} + +// putSortedSetIfNonEmpty writes m[key] = sortedStringSet(values) when values +// has any entries — otherwise leaves the key absent so the resulting metadata +// hash treats "unset" and "empty" identically. +func putSortedSetIfNonEmpty(m map[string]any, key string, values []string) { + if len(values) > 0 { + m[key] = sortedStringSet(values) + } +} + // normalizeText applies consistent normalization to text inputs for better cache hit rates. // It converts text to lowercase and trims whitespace to reduce cache misses due to minor variations. func normalizeText(text string) string { return strings.ToLower(strings.TrimSpace(text)) } -// Semantic cache keeps vector-store/search payloads as float32 even though -// normalized embedding API responses now preserve provider precision as float64. -func toFloat32Embedding(values []float64) []float32 { +// float64ToFloat32Embedding converts a []float64 to a []float32. The semantic cache +// keeps vector payloads as float32 even though the embedding APIs now +// preserve full float64 precision — the cosine similarity used at query +// time is well within float32 range. +func float64ToFloat32Embedding(values []float64) []float32 { if len(values) == 0 { return nil } @@ -39,355 +140,264 @@ func toFloat32Embedding(values []float64) []float32 { return embedding } -func flattenToFloat32Embedding(values [][]float64) []float32 { - total := 0 - for _, arr := range values { - total += len(arr) - } - if total == 0 { +// int8ToFloat32Embedding promotes a quantized int8 embedding (used for +// binary/quantized formats by some providers) to float32 so the cache can +// store and compare it uniformly against float32 entries. +func int8ToFloat32Embedding(values []int8) []float32 { + if len(values) == 0 { return nil } - - embedding := make([]float32, 0, total) - for _, arr := range values { - embedding = append(embedding, toFloat32Embedding(arr)...) + embedding := make([]float32, len(values)) + for i, value := range values { + embedding[i] = float32(value) } - return embedding } -// generateEmbedding generates an embedding for the given text using the configured provider. -func (plugin *Plugin) generateEmbedding(ctx *schemas.BifrostContext, text string) ([]float32, int, error) { - // Create embedding request - embeddingReq := &schemas.BifrostEmbeddingRequest{ - Provider: plugin.config.Provider, - Model: plugin.config.EmbeddingModel, - Input: &schemas.EmbeddingInput{ - Text: &text, - }, - } - - // Create a new context from incoming context. Parent ctx will be used for cancellation. - embeddingCtx := schemas.NewBifrostContext(ctx, schemas.NoDeadline) - defer embeddingCtx.ReleasePluginScope() - - embeddingCtx.SetValue(schemas.BifrostContextKeySkipPluginPipeline, true) - - if plugin.embeddingRequestExecutor == nil { - return nil, 0, fmt.Errorf("embedding request executor is not configured") - } - response, err := plugin.embeddingRequestExecutor(embeddingCtx, embeddingReq) - if err != nil { - return nil, 0, fmt.Errorf("failed to generate embedding: %v", err) - } - - // Extract the first embedding from response - if len(response.Data) == 0 { - return nil, 0, fmt.Errorf("no embeddings returned from provider") - } - - // Get the embedding from the first data item - embedding := response.Data[0].Embedding - inputTokens := 0 - if response.Usage != nil { - inputTokens = response.Usage.TotalTokens +// int32ToFloat32Embedding promotes a uint8/ubinary-style int32 embedding to +// float32 for the same reason as int8ToFloat32Embedding. +func int32ToFloat32Embedding(values []int32) []float32 { + if len(values) == 0 { + return nil } - - if embedding.EmbeddingStr != nil { - // decode embedding.EmbeddingStr to []float32 - var vals []float32 - if err := json.Unmarshal([]byte(*embedding.EmbeddingStr), &vals); err != nil { - return nil, 0, fmt.Errorf("failed to parse string embedding: %w", err) - } - return vals, inputTokens, nil - } else if embedding.EmbeddingArray != nil { - return toFloat32Embedding(embedding.EmbeddingArray), inputTokens, nil - } else if len(embedding.Embedding2DArray) > 0 { - return flattenToFloat32Embedding(embedding.Embedding2DArray), inputTokens, nil + embedding := make([]float32, len(values)) + for i, value := range values { + embedding[i] = float32(value) } - - return nil, 0, fmt.Errorf("embedding data is not in expected format") + return embedding } -// generateRequestHash creates an xxhash of the request for semantic cache key generation. -// It normalizes the request by including all relevant fields that affect the response: -// - Input (chat completion, text completion, etc.) -// - Parameters (temperature, max_tokens, tools, etc.) -// - Provider (if CacheByProvider is true) -// - Model (if CacheByModel is true) -// -// Note: Fallbacks are excluded as they only affect error handling, not the actual response. -// -// Parameters: -// - req: The Bifrost request to hash for semantic cache key generation -// -// Returns: -// - string: Hexadecimal representation of the xxhash -// - error: Any error that occurred during request normalization or hashing -func (plugin *Plugin) generateRequestHash(req *schemas.BifrostRequest) (string, error) { - // Build canonical metadata first to ensure deterministic hashing - metadata, err := plugin.buildRequestMetadataForCaching(req) - if err != nil { - return "", fmt.Errorf("failed to build metadata for request hash: %w", err) +// flattenToFloat32Embedding concatenates a 2D embedding (one inner slice per +// input chunk) into a single flat []float32. Used when the provider returns +// per-chunk embeddings that we want to store as a single vector. +func flattenToFloat32Embedding(values [][]float64) []float32 { + total := 0 + for _, arr := range values { + total += len(arr) } - - // Create a hash input structure that includes both input and canonical parameters - hashInput := struct { - Input interface{} `json:"input"` - Params map[string]interface{} `json:"params,omitempty"` - }{ - Input: plugin.getNormalizedInputForCaching(req), - Params: metadata, + if total == 0 { + return nil } - // Marshal to JSON with deeply sorted keys for deterministic hashing - // MarshalDeeplySorted handles OrderedMap and nested map[string]interface{} correctly - jsonData, err := schemas.MarshalDeeplySorted(hashInput) - if err != nil { - return "", fmt.Errorf("failed to marshal request for hashing: %w", err) + embedding := make([]float32, 0, total) + for _, arr := range values { + embedding = append(embedding, float64ToFloat32Embedding(arr)...) } - // Generate hash based on configured algorithm - hash := xxhash.Sum64(jsonData) - return fmt.Sprintf("%x", hash), nil + return embedding } -func (plugin *Plugin) buildRequestMetadataForCaching(req *schemas.BifrostRequest) (map[string]interface{}, error) { +// buildRequestMetadataForCaching extracts the canonical, hashable parameter +// set for the request: anything that should change the cache key when it +// changes. The returned map is fed to hashMap to derive params_hash, which +// then anchors both direct and semantic lookups. +func (plugin *Plugin) buildRequestMetadataForCaching(state *cacheState, req *schemas.BifrostRequest) (map[string]interface{}, error) { metadata := map[string]interface{}{ "stream": bifrost.IsStreamRequestType(req.RequestType), } + if attachments := plugin.extractAttachmentsForCaching(state, req); len(attachments) > 0 { + metadata["attachments"] = attachments + } + switch req.RequestType { case schemas.TextCompletionRequest, schemas.TextCompletionStreamRequest: if req.TextCompletionRequest == nil { - return nil, fmt.Errorf("text completion payload is nil (%s)", describeRequestShape(req)) + return nil, fmt.Errorf("text completion payload is nil") } if req.TextCompletionRequest != nil && req.TextCompletionRequest.Params != nil { plugin.extractTextCompletionParametersToMetadata(req.TextCompletionRequest.Params, metadata) } case schemas.ChatCompletionRequest, schemas.ChatCompletionStreamRequest: if req.ChatRequest == nil { - return nil, fmt.Errorf("chat payload is nil (%s)", describeRequestShape(req)) + return nil, fmt.Errorf("chat payload is nil") } if req.ChatRequest != nil && req.ChatRequest.Params != nil { plugin.extractChatParametersToMetadata(req.ChatRequest.Params, metadata) } case schemas.ResponsesRequest, schemas.ResponsesStreamRequest, schemas.WebSocketResponsesRequest: if req.ResponsesRequest == nil { - return nil, fmt.Errorf("responses payload is nil (%s)", describeRequestShape(req)) + return nil, fmt.Errorf("responses payload is nil") } if req.ResponsesRequest != nil && req.ResponsesRequest.Params != nil { plugin.extractResponsesParametersToMetadata(req.ResponsesRequest.Params, metadata) } case schemas.SpeechRequest, schemas.SpeechStreamRequest: if req.SpeechRequest == nil { - return nil, fmt.Errorf("speech payload is nil (%s)", describeRequestShape(req)) + return nil, fmt.Errorf("speech payload is nil") } if req.SpeechRequest != nil && req.SpeechRequest.Params != nil { plugin.extractSpeechParametersToMetadata(req.SpeechRequest.Params, metadata) } case schemas.EmbeddingRequest: if req.EmbeddingRequest == nil { - return nil, fmt.Errorf("embedding payload is nil (%s)", describeRequestShape(req)) + return nil, fmt.Errorf("embedding payload is nil") } if req.EmbeddingRequest != nil && req.EmbeddingRequest.Params != nil { plugin.extractEmbeddingParametersToMetadata(req.EmbeddingRequest.Params, metadata) } case schemas.TranscriptionRequest, schemas.TranscriptionStreamRequest: if req.TranscriptionRequest == nil { - return nil, fmt.Errorf("transcription payload is nil (%s)", describeRequestShape(req)) + return nil, fmt.Errorf("transcription payload is nil") } if req.TranscriptionRequest != nil && req.TranscriptionRequest.Params != nil { plugin.extractTranscriptionParametersToMetadata(req.TranscriptionRequest.Params, metadata) } case schemas.ImageGenerationRequest, schemas.ImageGenerationStreamRequest: if req.ImageGenerationRequest == nil { - return nil, fmt.Errorf("image generation payload is nil (%s)", describeRequestShape(req)) + return nil, fmt.Errorf("image generation payload is nil") } if req.ImageGenerationRequest != nil && req.ImageGenerationRequest.Params != nil { plugin.extractImageGenerationParametersToMetadata(req.ImageGenerationRequest.Params, metadata) } default: - return nil, fmt.Errorf("unsupported request type for semantic caching (%s)", describeRequestShape(req)) + return nil, fmt.Errorf("unsupported request type for semantic caching") } return metadata, nil } -// isSemanticCacheSupportedRequestType reports whether semantic cache supports -// this request type for cache lookup and storage. Unsupported types are skipped. -// -// IMPORTANT: this list must stay in sync with the switch in buildRequestMetadataForCaching. -// When adding a new case there, add it here too. -func isSemanticCacheSupportedRequestType(requestType schemas.RequestType) bool { - switch requestType { - case schemas.TextCompletionRequest, - schemas.TextCompletionStreamRequest, - schemas.ChatCompletionRequest, - schemas.ChatCompletionStreamRequest, - schemas.ResponsesRequest, - schemas.ResponsesStreamRequest, - schemas.WebSocketResponsesRequest, - schemas.SpeechRequest, - schemas.SpeechStreamRequest, - schemas.EmbeddingRequest, - schemas.TranscriptionRequest, - schemas.TranscriptionStreamRequest, - schemas.ImageGenerationRequest, - schemas.ImageGenerationStreamRequest: - return true - default: - return false +// extractAttachmentsForCaching collects image/file URLs referenced by the +// request input in document order. Attachments are part of the cache key — +// two messages with identical text but different images must not collide. +// Honors ExcludeSystemPrompt via getInputForCaching. Returns nil for +// request types without attachment-bearing content blocks. +func (plugin *Plugin) extractAttachmentsForCaching(state *cacheState, req *schemas.BifrostRequest) []string { + switch req.RequestType { + case schemas.ChatCompletionRequest, schemas.ChatCompletionStreamRequest: + messages, ok := plugin.getInputForCaching(state, req).([]schemas.ChatMessage) + if !ok { + return nil + } + var attachments []string + for _, msg := range messages { + if msg.Content == nil || msg.Content.ContentBlocks == nil { + continue + } + for _, block := range msg.Content.ContentBlocks { + if block.ImageURLStruct != nil && block.ImageURLStruct.URL != "" { + attachments = append(attachments, block.ImageURLStruct.URL) + } + } + } + return attachments + case schemas.ResponsesRequest, schemas.ResponsesStreamRequest, schemas.WebSocketResponsesRequest: + messages, ok := plugin.getInputForCaching(state, req).([]schemas.ResponsesMessage) + if !ok { + return nil + } + var attachments []string + for _, msg := range messages { + if msg.Content == nil || msg.Content.ContentBlocks == nil { + continue + } + for _, block := range msg.Content.ContentBlocks { + if block.ResponsesInputMessageContentBlockImage != nil && block.ResponsesInputMessageContentBlockImage.ImageURL != nil { + attachments = append(attachments, *block.ResponsesInputMessageContentBlockImage.ImageURL) + } + if block.ResponsesInputMessageContentBlockFile != nil && block.ResponsesInputMessageContentBlockFile.FileURL != nil { + attachments = append(attachments, *block.ResponsesInputMessageContentBlockFile.FileURL) + } + } + } + return attachments } + return nil } -func (plugin *Plugin) computeRequestParamsHash(req *schemas.BifrostRequest) (string, error) { - metadata, err := plugin.buildRequestMetadataForCaching(req) - if err != nil { - return "", err - } - - hash, err := getMetadataHash(metadata) - if err != nil { - return "", fmt.Errorf("failed to compute params hash (%s): %w", describeRequestShape(req), err) +// extractChatMessageContent flattens a ChatMessage's content (string or +// blocks) into a single space-joined string. Returns "" when the message +// carries no text (e.g. assistant tool-call messages with nil content). +func extractChatMessageContent(msg schemas.ChatMessage) string { + if msg.Content == nil { + return "" + } + if msg.Content.ContentStr != nil { + return *msg.Content.ContentStr + } + if msg.Content.ContentBlocks != nil { + var parts []string + for _, block := range msg.Content.ContentBlocks { + if block.Text != nil { + parts = append(parts, *block.Text) + } + } + return strings.Join(parts, " ") } - return hash, nil + return "" } -// describeRequestShape summarizes the request families relevant to semantic -// cache lookups and diagnostics. It is intentionally scoped to request types -// that can participate in semantic cache behavior. -func describeRequestShape(req *schemas.BifrostRequest) string { - if req == nil { - return "request=nil" +// extractResponsesMessageContent flattens a ResponsesMessage's content into a +// single string, mirroring extractChatMessageContent but for the Responses API. +func extractResponsesMessageContent(msg schemas.ResponsesMessage) string { + if msg.Content == nil { + return "" } - - return fmt.Sprintf( - "request_type=%s text=%t chat=%t responses=%t embedding=%t speech=%t transcription=%t image=%t", - req.RequestType, - req.TextCompletionRequest != nil, - req.ChatRequest != nil, - req.ResponsesRequest != nil, - req.EmbeddingRequest != nil, - req.SpeechRequest != nil, - req.TranscriptionRequest != nil, - req.ImageGenerationRequest != nil, - ) + if msg.Content.ContentStr != nil { + return *msg.Content.ContentStr + } + if msg.Content.ContentBlocks != nil { + var parts []string + for _, block := range msg.Content.ContentBlocks { + if block.Text != nil { + parts = append(parts, *block.Text) + } + } + return strings.Join(parts, " ") + } + return "" } -// extractTextForEmbedding extracts meaningful text from different input types for embedding generation. -// Returns the text to embed and metadata for storage. +// extractTextForEmbedding flattens the request input into a single string +// suitable for embedding generation. PreLLMHook short-circuits embedding and +// transcription requests before this is called (their inputs aren't +// themselves embeddable), so this function only handles request types that +// reach performSemanticSearch. // // Text serialization format (for cache consistency): // - Chat API: "role: content" // - Responses API: "role: msgType: content" (when msgType is present), "role: content" (when msgType is empty) -// -// Note: Format updated to conditionally include msgType to avoid double colons and maintain consistency. -func (plugin *Plugin) extractTextForEmbedding(req *schemas.BifrostRequest) (string, string, error) { - metadata, err := plugin.buildRequestMetadataForCaching(req) - if err != nil { - return "", "", err - } - attachments := []string{} - +func (plugin *Plugin) extractTextForEmbedding(state *cacheState, req *schemas.BifrostRequest) (string, error) { switch { case req.TextCompletionRequest != nil: - metadataHash, err := getMetadataHash(metadata) - if err != nil { - return "", "", fmt.Errorf("failed to marshal metadata for metadata hash: %w", err) - } - - var textContent string if req.TextCompletionRequest.Input.PromptStr != nil { - textContent = normalizeText(*req.TextCompletionRequest.Input.PromptStr) - } else if len(req.TextCompletionRequest.Input.PromptArray) > 0 { - textContent = normalizeText(strings.Join(req.TextCompletionRequest.Input.PromptArray, " ")) + return normalizeText(*req.TextCompletionRequest.Input.PromptStr), nil } - return textContent, metadataHash, nil + if len(req.TextCompletionRequest.Input.PromptArray) > 0 { + return normalizeText(strings.Join(req.TextCompletionRequest.Input.PromptArray, " ")), nil + } + return "", fmt.Errorf("no prompt found in text completion request") case req.ChatRequest != nil: - reqInput, ok := plugin.getInputForCaching(req).([]schemas.ChatMessage) + reqInput, ok := plugin.getInputForCaching(state, req).([]schemas.ChatMessage) if !ok { - return "", "", fmt.Errorf("failed to cast request input to chat messages") + return "", fmt.Errorf("failed to cast request input to chat messages") } - - // Serialize chat messages for embedding var textParts []string for _, msg := range reqInput { - // Extract content as string - // Content can be nil for messages like assistant tool-call messages - var content string - if msg.Content != nil { - if msg.Content.ContentStr != nil { - content = *msg.Content.ContentStr - } else if msg.Content.ContentBlocks != nil { - // For content blocks, extract text parts - var blockTexts []string - for _, block := range msg.Content.ContentBlocks { - if block.Text != nil { - blockTexts = append(blockTexts, *block.Text) - } - if block.ImageURLStruct != nil && block.ImageURLStruct.URL != "" { - attachments = append(attachments, block.ImageURLStruct.URL) - } - } - content = strings.Join(blockTexts, " ") - } - } - - if content != "" { - textParts = append(textParts, fmt.Sprintf("%s: %s", msg.Role, normalizeText(content))) + content := extractChatMessageContent(msg) + if content == "" { + continue } + textParts = append(textParts, fmt.Sprintf("%s: %s", msg.Role, normalizeText(content))) } - if len(textParts) == 0 { - return "", "", fmt.Errorf("no text content found in chat messages") - } - - if len(attachments) > 0 { - metadata["attachments"] = attachments + return "", fmt.Errorf("no text content found in chat messages") } - - metadataHash, err := getMetadataHash(metadata) - if err != nil { - return "", "", fmt.Errorf("failed to marshal metadata for metadata hash: %w", err) - } - - return strings.Join(textParts, "\n"), metadataHash, nil + return strings.Join(textParts, "\n"), nil case req.ResponsesRequest != nil: - reqInput, ok := plugin.getInputForCaching(req).([]schemas.ResponsesMessage) + reqInput, ok := plugin.getInputForCaching(state, req).([]schemas.ResponsesMessage) if !ok { - return "", "", fmt.Errorf("failed to cast request input to responses messages") + return "", fmt.Errorf("failed to cast request input to responses messages") } - - // Serialize chat messages for embedding var textParts []string for _, msg := range reqInput { - // Extract content as string - // Content can be nil for messages like assistant tool-call messages - var content string - if msg.Content != nil { - if msg.Content.ContentStr != nil { - content = normalizeText(*msg.Content.ContentStr) - } else if msg.Content.ContentBlocks != nil { - // For content blocks, extract text parts - var blockTexts []string - for _, block := range msg.Content.ContentBlocks { - if block.Text != nil { - blockTexts = append(blockTexts, normalizeText(*block.Text)) - } - if block.ResponsesInputMessageContentBlockImage != nil && block.ResponsesInputMessageContentBlockImage.ImageURL != nil { - attachments = append(attachments, *block.ResponsesInputMessageContentBlockImage.ImageURL) - } - if block.ResponsesInputMessageContentBlockFile != nil && block.ResponsesInputMessageContentBlockFile.FileURL != nil { - attachments = append(attachments, *block.ResponsesInputMessageContentBlockFile.FileURL) - } - } - content = strings.Join(blockTexts, " ") - } + content := extractResponsesMessageContent(msg) + if content == "" { + continue } - + content = normalizeText(content) role := "" msgType := "" if msg.Role != nil { @@ -396,396 +406,291 @@ func (plugin *Plugin) extractTextForEmbedding(req *schemas.BifrostRequest) (stri if msg.Type != nil { msgType = string(*msg.Type) } - - if content != "" { - if msgType != "" { - textParts = append(textParts, fmt.Sprintf("%s: %s: %s", role, msgType, content)) - } else { - textParts = append(textParts, fmt.Sprintf("%s: %s", role, content)) - } + if msgType != "" { + textParts = append(textParts, fmt.Sprintf("%s: %s: %s", role, msgType, content)) + } else { + textParts = append(textParts, fmt.Sprintf("%s: %s", role, content)) } } - if len(textParts) == 0 { - return "", "", fmt.Errorf("no text content found in chat messages") - } - - if len(attachments) > 0 { - metadata["attachments"] = attachments + return "", fmt.Errorf("no text content found in responses messages") } - - metadataHash, err := getMetadataHash(metadata) - if err != nil { - return "", "", fmt.Errorf("failed to marshal metadata for metadata hash: %w", err) - } - - return strings.Join(textParts, "\n"), metadataHash, nil + return strings.Join(textParts, "\n"), nil case req.SpeechRequest != nil: - if req.SpeechRequest.Input.Input != "" { - metadataHash, err := getMetadataHash(metadata) - if err != nil { - return "", "", fmt.Errorf("failed to marshal metadata for metadata hash: %w", err) - } - - return req.SpeechRequest.Input.Input, metadataHash, nil - } - return "", "", fmt.Errorf("no input text found in speech request") - - case req.EmbeddingRequest != nil: - metadataHash, err := getMetadataHash(metadata) - if err != nil { - return "", "", fmt.Errorf("failed to marshal metadata for metadata hash: %w", err) - } - - texts := req.EmbeddingRequest.Input.Texts - - if len(texts) == 0 && req.EmbeddingRequest.Input.Text != nil { - texts = []string{*req.EmbeddingRequest.Input.Text} - } - - var text string - for _, t := range texts { - text += t + " " + if req.SpeechRequest.Input.Input == "" { + return "", fmt.Errorf("no input text found in speech request") } - - return strings.TrimSpace(text), metadataHash, nil - - case req.TranscriptionRequest != nil: - // Skip semantic caching for transcription requests - return "", "", fmt.Errorf("transcription requests are not supported for semantic caching") + return normalizeText(req.SpeechRequest.Input.Input), nil case req.ImageGenerationRequest != nil: if req.ImageGenerationRequest.Input == nil || req.ImageGenerationRequest.Input.Prompt == "" { - return "", "", fmt.Errorf("no prompt found in image generation request") + return "", fmt.Errorf("no prompt found in image generation request") } - metadataHash, err := getMetadataHash(metadata) - if err != nil { - return "", "", fmt.Errorf("failed to marshal metadata for metadata hash: %w", err) - } - return normalizeText(req.ImageGenerationRequest.Input.Prompt), metadataHash, nil + return normalizeText(req.ImageGenerationRequest.Input.Prompt), nil default: - return "", "", fmt.Errorf("unsupported input type for semantic caching (%s)", describeRequestShape(req)) - } -} - -func getMetadataHash(metadata map[string]interface{}) (string, error) { - // Use MarshalDeeplySorted for deterministic hashing - plain json.Marshal - // doesn't guarantee key ordering since Go maps have random iteration order - metadataJSON, err := schemas.MarshalDeeplySorted(metadata) - if err != nil { - return "", fmt.Errorf("failed to marshal metadata for metadata hash: %w", err) + return "", fmt.Errorf("unsupported input type for semantic caching") } - return fmt.Sprintf("%x", xxhash.Sum64(metadataJSON)), nil -} - -func (plugin *Plugin) generateDirectCacheID(provider schemas.ModelProvider, model string, cacheKey string, requestHash string, paramsHash string) string { - idInput := struct { - CacheKey string `json:"cache_key"` - RequestHash string `json:"request_hash"` - ParamsHash string `json:"params_hash"` - Provider string `json:"provider,omitempty"` - Model string `json:"model,omitempty"` - }{ - CacheKey: cacheKey, - RequestHash: requestHash, - ParamsHash: paramsHash, - } - - if plugin.config.CacheByProvider != nil && *plugin.config.CacheByProvider { - idInput.Provider = string(provider) - } - if plugin.config.CacheByModel != nil && *plugin.config.CacheByModel { - idInput.Model = model - } - - idJSON, err := schemas.MarshalDeeplySorted(idInput) - if err != nil { - // Fallback: derive deterministic UUID from concatenated inputs - fallbackStr := cacheKey + requestHash + paramsHash - if plugin.config.CacheByProvider != nil && *plugin.config.CacheByProvider { - fallbackStr += string(provider) - } - if plugin.config.CacheByModel != nil && *plugin.config.CacheByModel { - fallbackStr += model - } - return uuid.NewSHA1(directCacheNamespace, []byte(fallbackStr)).String() - } - - return uuid.NewSHA1(directCacheNamespace, idJSON).String() } -// buildUnifiedMetadata constructs the unified metadata structure for VectorEntry -func (plugin *Plugin) buildUnifiedMetadata(provider schemas.ModelProvider, model string, paramsHash string, requestHash string, cacheKey string, ttl time.Duration) map[string]interface{} { +// buildUnifiedMetadata builds the property map written alongside the cache +// entry: the columns the vector store indexes for filtering (cache_key, +// provider, model, params_hash, expires_at) plus the from_bifrost marker +// used by Cleanup and ClearCacheForKey to scope deletes. Caller still adds +// the response payload (response or stream_chunks) before Add. +func (plugin *Plugin) buildUnifiedMetadata(provider schemas.ModelProvider, model string, paramsHash string, cacheKey string, ttl time.Duration) map[string]interface{} { unifiedMetadata := make(map[string]interface{}) - - // Top-level fields (outside params) unifiedMetadata["provider"] = string(provider) unifiedMetadata["model"] = model - unifiedMetadata["request_hash"] = requestHash unifiedMetadata["cache_key"] = cacheKey unifiedMetadata["from_bifrost_semantic_cache_plugin"] = true - - // Calculate expiration timestamp (current time + TTL) - expiresAt := time.Now().Add(ttl).Unix() - unifiedMetadata["expires_at"] = expiresAt - - // Individual param fields will be stored as params_* by the vectorstore - // We pass the params map to the vectorstore, and it handles the individual field storage + unifiedMetadata["expires_at"] = time.Now().Add(ttl).Unix() if paramsHash != "" { unifiedMetadata["params_hash"] = paramsHash } - return unifiedMetadata } -// addSingleResponse stores a single (non-streaming) response in unified VectorEntry format. -// responseData is the pre-marshaled JSON of the response; the caller must marshal -// synchronously before spawning the cache goroutine so the marshal cannot race -// with downstream mutation of the response struct. -func (plugin *Plugin) addSingleResponse(ctx context.Context, responseID string, responseData []byte, embedding []float32, metadata map[string]interface{}, ttl time.Duration) error { - // Add response field to metadata +// addNonStreamingResponse marshals the response and writes it as a single +// cache entry. The metadata map is mutated (response + stream_chunks added) +// — safe because the calling goroutine owns it. The ttl parameter is +// retained for symmetry with addStreamingResponse; the actual expiry is +// already encoded in metadata["expires_at"] by buildUnifiedMetadata. +func (plugin *Plugin) addNonStreamingResponse(ctx context.Context, responseID string, res *schemas.BifrostResponse, embedding []float32, metadata map[string]interface{}, ttl time.Duration) error { + responseData, err := json.Marshal(res) + if err != nil { + return fmt.Errorf("failed to marshal response: %w", err) + } metadata["response"] = string(responseData) metadata["stream_chunks"] = []string{} - // Store unified entry using new VectorStore interface if err := plugin.store.Add(ctx, plugin.config.VectorStoreNamespace, responseID, embedding, metadata); err != nil { return fmt.Errorf("failed to store unified cache entry: %w", err) } - plugin.logger.Debug(fmt.Sprintf("%s Successfully cached single response with ID: %s", PluginLoggerPrefix, responseID)) + plugin.logger.Debug("Successfully cached single response with ID: %s", responseID) return nil } -// addStreamingResponse handles streaming response storage by accumulating chunks -func (plugin *Plugin) addStreamingResponse(ctx context.Context, requestID string, storageID string, res *schemas.BifrostResponse, bifrostErr *schemas.BifrostError, embedding []float32, metadata map[string]interface{}, ttl time.Duration, isFinalChunk bool) error { - // Create accumulator if it doesn't exist +// addStreamingResponse appends one chunk to the per-request accumulator and, +// when the final chunk arrives, flushes the accumulated stream to the cache. +// Errors never reach this function: PostLLMHook returns early on bifrostErr +// (errors are always delivered as the final chunk), so an errored stream +// simply leaves its accumulator behind for the periodic reaper. +func (plugin *Plugin) addStreamingResponse(ctx context.Context, requestID string, storageID string, res *schemas.BifrostResponse, embedding []float32, metadata map[string]interface{}, ttl time.Duration, isFinalChunk bool) error { accumulator := plugin.getOrCreateStreamAccumulator(requestID, storageID, embedding, metadata, ttl) - // Create chunk from current response chunk := &StreamChunk{ Timestamp: time.Now(), Response: res, } - - // Check for finish reason or set error finish reason - if bifrostErr != nil { - // Error case - mark as final chunk with error - chunk.FinishReason = bifrost.Ptr("error") - } else if res != nil && res.ChatResponse != nil && len(res.ChatResponse.Choices) > 0 { - choice := res.ChatResponse.Choices[0] - if choice.ChatStreamResponseChoice != nil { - chunk.FinishReason = choice.FinishReason - } + if err := plugin.addStreamChunk(requestID, chunk); err != nil { + return fmt.Errorf("failed to add stream chunk: %w", err) } - // Add chunk to accumulator synchronously to maintain order - if err := plugin.addStreamChunk(requestID, chunk, isFinalChunk); err != nil { - return fmt.Errorf("failed to add stream chunk: %w", err) + if !isFinalChunk { + return nil } - // Check if this is the final chunk and gate final processing to ensure single invocation + // Gate final processing so it runs exactly once even if multiple chunks + // race here (shouldn't happen in practice but cheap insurance). accumulator.mu.Lock() - // Check for completion: either FinishReason is present, there's an error, or token usage exists alreadyComplete := accumulator.IsComplete - - // Track if any chunk has an error - if bifrostErr != nil { - accumulator.HasError = true - } - - if isFinalChunk && !alreadyComplete { + if !alreadyComplete { accumulator.IsComplete = true - accumulator.FinalTimestamp = chunk.Timestamp } accumulator.mu.Unlock() - // If this is the final chunk and hasn't been processed yet, process accumulated chunks - // Note: processAccumulatedStream will check for errors and skip caching if any errors occurred - if isFinalChunk && !alreadyComplete { - if processErr := plugin.processAccumulatedStream(ctx, requestID); processErr != nil { - plugin.logger.Warn("%s Failed to process accumulated stream for request %s: %v", PluginLoggerPrefix, requestID, processErr) - } + if alreadyComplete { + return nil + } + if err := plugin.processAccumulatedStream(ctx, requestID); err != nil { + plugin.logger.Warn("Failed to process accumulated stream for request %s: %v", requestID, err) } - return nil } -// getInputForCaching extracts request input for hashing/embedding without normalization. -// For Chat/Responses requests, it filters out system messages if configured but returns shallow copies. -// For other request types, it returns direct references to the input. -func (plugin *Plugin) getInputForCaching(req *schemas.BifrostRequest) interface{} { - switch req.RequestType { - case schemas.TextCompletionRequest, schemas.TextCompletionStreamRequest: - return req.TextCompletionRequest.Input - case schemas.ChatCompletionRequest, schemas.ChatCompletionStreamRequest: - originalMessages := req.ChatRequest.Input - filteredMessages := make([]schemas.ChatMessage, 0, len(originalMessages)) - for _, msg := range originalMessages { - // Skip system messages if configured to exclude them - if plugin.config.ExcludeSystemPrompt != nil && *plugin.config.ExcludeSystemPrompt && msg.Role == schemas.ChatMessageRoleSystem { +// parseStreamChunks parses stream_chunks data from the various shapes +// different vector store drivers hand back (Weaviate's JSON-decoded +// []interface{}, typed []string, or Redis's JSON-encoded string) into a +// flat []string of per-chunk JSON payloads. +// +// Non-string elements in the []interface{} case are dropped with a warning +// rather than failing the whole replay — partial cache hits are better than +// no hit at all. +func (plugin *Plugin) parseStreamChunks(streamData interface{}) ([]string, error) { + if streamData == nil { + return nil, fmt.Errorf("stream data is nil") + } + + switch v := streamData.(type) { + case []string: + return v, nil + case []interface{}: + result := make([]string, 0, len(v)) + for i, item := range v { + s, ok := item.(string) + if !ok { + plugin.logger.Warn("Stream chunk %d is not a string (got %T), skipping", i, item) continue } - filteredMessages = append(filteredMessages, msg) + result = append(result, s) } - return filteredMessages - case schemas.ResponsesRequest, schemas.ResponsesStreamRequest, schemas.WebSocketResponsesRequest: - originalMessages := req.ResponsesRequest.Input - filteredMessages := make([]schemas.ResponsesMessage, 0, len(originalMessages)) - for _, msg := range originalMessages { - // Skip system messages if configured to exclude them - if plugin.config.ExcludeSystemPrompt != nil && *plugin.config.ExcludeSystemPrompt && msg.Role != nil && *msg.Role == schemas.ResponsesInputMessageRoleSystem { - continue - } - filteredMessages = append(filteredMessages, msg) + return result, nil + case string: + // Redis: stream_chunks stored as a JSON-encoded array of strings. + var stringArray []string + if err := json.Unmarshal([]byte(v), &stringArray); err != nil { + return nil, fmt.Errorf("failed to parse JSON string: %w", err) } - return filteredMessages + return stringArray, nil + default: + return nil, fmt.Errorf("unsupported stream data type: %T", streamData) + } +} + +// getInputForCaching extracts request input for hashing/embedding without +// normalization. For Chat/Responses requests, system messages are filtered +// out when ExcludeSystemPrompt is enabled — that path returns a fresh slice; +// otherwise the original slice is returned by reference (no allocation). +// Other request types always return the underlying input directly. +// +// The slice for Chat/Responses is memoized on state so attachment extraction, +// embedding text extraction, and the history-threshold check reuse the same +// slice instead of re-walking on each call. State may be nil (tests / +// pre-state callers), in which case nothing is cached. +func (plugin *Plugin) getInputForCaching(state *cacheState, req *schemas.BifrostRequest) interface{} { + if state != nil && state.FilteredInput != nil { + return state.FilteredInput + } + excludeSystem := plugin.config.ExcludeSystemPrompt != nil && *plugin.config.ExcludeSystemPrompt + var out interface{} + switch req.RequestType { + case schemas.TextCompletionRequest, schemas.TextCompletionStreamRequest: + out = req.TextCompletionRequest.Input + case schemas.ChatCompletionRequest, schemas.ChatCompletionStreamRequest: + out = filterChatMessages(req.ChatRequest.Input, excludeSystem) + case schemas.ResponsesRequest, schemas.ResponsesStreamRequest, schemas.WebSocketResponsesRequest: + out = filterResponsesMessages(req.ResponsesRequest.Input, excludeSystem) case schemas.SpeechRequest, schemas.SpeechStreamRequest: - return req.SpeechRequest.Input.Input + out = req.SpeechRequest.Input.Input case schemas.EmbeddingRequest: - return req.EmbeddingRequest.Input + out = req.EmbeddingRequest.Input case schemas.TranscriptionRequest, schemas.TranscriptionStreamRequest: - return req.TranscriptionRequest.Input + out = req.TranscriptionRequest.Input case schemas.ImageGenerationRequest, schemas.ImageGenerationStreamRequest: - return req.ImageGenerationRequest.Input + out = req.ImageGenerationRequest.Input default: return nil } + if state != nil { + state.FilteredInput = out + } + return out +} + +// filterChatMessages returns msgs unchanged when excludeSystem is false. +// Otherwise, returns a copy with system messages dropped. +func filterChatMessages(msgs []schemas.ChatMessage, excludeSystem bool) []schemas.ChatMessage { + if !excludeSystem { + return msgs + } + out := make([]schemas.ChatMessage, 0, len(msgs)) + for _, m := range msgs { + if m.Role == schemas.ChatMessageRoleSystem { + continue + } + out = append(out, m) + } + return out } -// getNormalizedInputForCaching returns a copy of req.Input for hashing/embedding. The input is normalized. -// It applies text normalization (lowercase + trim) and optionally removes system messages. +// filterResponsesMessages returns msgs unchanged when excludeSystem is false. +// Otherwise, returns a copy with system messages dropped. +func filterResponsesMessages(msgs []schemas.ResponsesMessage, excludeSystem bool) []schemas.ResponsesMessage { + if !excludeSystem { + return msgs + } + out := make([]schemas.ResponsesMessage, 0, len(msgs)) + for _, m := range msgs { + if m.Role != nil && *m.Role == schemas.ResponsesInputMessageRoleSystem { + continue + } + out = append(out, m) + } + return out +} + +// getNormalizedInputForCaching returns a copy of req.Input with text fields +// lowercased + trimmed, suitable for hashing/embedding. System messages are +// dropped when ExcludeSystemPrompt is enabled. +// +// Allocation strategy: the original request must never be mutated, but the +// returned value only needs to round-trip through json.Marshal — it's hashed, +// not stored. So we shallow-copy each message struct and rewrite Content +// (the only field we normalize), sharing all other pointer fields with the +// original. This avoids the per-call message-graph deep copy that +// schemas.DeepCopy*Message would otherwise do. func (plugin *Plugin) getNormalizedInputForCaching(req *schemas.BifrostRequest) interface{} { + excludeSystem := plugin.config.ExcludeSystemPrompt != nil && *plugin.config.ExcludeSystemPrompt switch req.RequestType { case schemas.TextCompletionRequest, schemas.TextCompletionStreamRequest: - // Create a deep copy of the input to avoid mutating the original request - copiedInput := schemas.TextCompletionInput{} - if req.TextCompletionRequest.Input.PromptStr != nil { - copiedPromptStr := *req.TextCompletionRequest.Input.PromptStr - copiedInput.PromptStr = &copiedPromptStr - } else if len(req.TextCompletionRequest.Input.PromptArray) > 0 { - copiedPromptArray := make([]string, len(req.TextCompletionRequest.Input.PromptArray)) - copy(copiedPromptArray, req.TextCompletionRequest.Input.PromptArray) - copiedInput.PromptArray = copiedPromptArray - } - - if copiedInput.PromptStr != nil { - normalizedText := normalizeText(*copiedInput.PromptStr) - copiedInput.PromptStr = &normalizedText - } else if len(copiedInput.PromptArray) > 0 { - // Create a copy of the PromptArray and normalize each element - normalizedPromptArray := make([]string, len(copiedInput.PromptArray)) - copy(normalizedPromptArray, copiedInput.PromptArray) - for i, prompt := range normalizedPromptArray { - normalizedPromptArray[i] = normalizeText(prompt) + input := req.TextCompletionRequest.Input + out := schemas.TextCompletionInput{} + if input.PromptStr != nil { + ns := normalizeText(*input.PromptStr) + out.PromptStr = &ns + } else if len(input.PromptArray) > 0 { + arr := make([]string, len(input.PromptArray)) + for i, p := range input.PromptArray { + arr[i] = normalizeText(p) } - copiedInput.PromptArray = normalizedPromptArray + out.PromptArray = arr } - return copiedInput + return out case schemas.ChatCompletionRequest, schemas.ChatCompletionStreamRequest: originalMessages := req.ChatRequest.Input normalizedMessages := make([]schemas.ChatMessage, 0, len(originalMessages)) - for _, msg := range originalMessages { - // Skip system messages if configured to exclude them - if plugin.config.ExcludeSystemPrompt != nil && *plugin.config.ExcludeSystemPrompt && msg.Role == schemas.ChatMessageRoleSystem { + if excludeSystem && msg.Role == schemas.ChatMessageRoleSystem { continue } - - // Create a deep copy of the message with normalized content - normalizedMsg := schemas.DeepCopyChatMessage(msg) - - // Normalize message content - // Content can be nil for messages like assistant tool-call messages - if msg.Content != nil { - if msg.Content.ContentStr != nil { - normalizedContent := normalizeText(*msg.Content.ContentStr) - normalizedMsg.Content.ContentStr = &normalizedContent - } else if msg.Content.ContentBlocks != nil { - // Create a copy of content blocks with normalized text - normalizedBlocks := make([]schemas.ChatContentBlock, len(msg.Content.ContentBlocks)) - for i, block := range msg.Content.ContentBlocks { - normalizedBlocks[i] = block - if block.Text != nil { - normalizedText := normalizeText(*block.Text) - normalizedBlocks[i].Text = &normalizedText - } - } - normalizedMsg.Content.ContentBlocks = normalizedBlocks - } - } - - normalizedMessages = append(normalizedMessages, normalizedMsg) + normalizedMessages = append(normalizedMessages, normalizeChatMessage(msg)) } return normalizedMessages case schemas.ResponsesRequest, schemas.ResponsesStreamRequest, schemas.WebSocketResponsesRequest: originalMessages := req.ResponsesRequest.Input normalizedMessages := make([]schemas.ResponsesMessage, 0, len(originalMessages)) - for _, msg := range originalMessages { - // Skip system messages if configured to exclude them - if plugin.config.ExcludeSystemPrompt != nil && *plugin.config.ExcludeSystemPrompt && msg.Role != nil && *msg.Role == schemas.ResponsesInputMessageRoleSystem { + if excludeSystem && msg.Role != nil && *msg.Role == schemas.ResponsesInputMessageRoleSystem { continue } - - // Create a deep copy of the message with normalized content - normalizedMsg := schemas.DeepCopyResponsesMessage(msg) - - // Create a deep copy of the Content to avoid modifying the original - if msg.Content != nil { - if msg.Content.ContentStr != nil { - normalizedText := normalizeText(*msg.Content.ContentStr) - normalizedMsg.Content.ContentStr = &normalizedText - } else if msg.Content.ContentBlocks != nil { - // Create a copy of content blocks with normalized text - normalizedBlocks := make([]schemas.ResponsesMessageContentBlock, len(msg.Content.ContentBlocks)) - for i, block := range msg.Content.ContentBlocks { - normalizedBlocks[i] = block - if block.Text != nil { - normalizedText := normalizeText(*block.Text) - normalizedBlocks[i].Text = &normalizedText - } - } - normalizedMsg.Content.ContentBlocks = normalizedBlocks - } - } - - normalizedMessages = append(normalizedMessages, normalizedMsg) + normalizedMessages = append(normalizedMessages, normalizeResponsesMessage(msg)) } return normalizedMessages case schemas.SpeechRequest, schemas.SpeechStreamRequest: return normalizeText(req.SpeechRequest.Input.Input) case schemas.EmbeddingRequest: - // Create a deep copy of the input to avoid mutating the original request - copiedInput := schemas.EmbeddingInput{} - if req.EmbeddingRequest.Input.Text != nil { - copiedText := *req.EmbeddingRequest.Input.Text - copiedInput.Text = &copiedText - } else if len(req.EmbeddingRequest.Input.Texts) > 0 { - copiedTexts := make([]string, len(req.EmbeddingRequest.Input.Texts)) - copy(copiedTexts, req.EmbeddingRequest.Input.Texts) - copiedInput.Texts = copiedTexts - } else if req.EmbeddingRequest.Input.Embedding != nil { - copiedEmbedding := make([]int, len(req.EmbeddingRequest.Input.Embedding)) - copy(copiedEmbedding, req.EmbeddingRequest.Input.Embedding) - copiedInput.Embedding = copiedEmbedding - } else if req.EmbeddingRequest.Input.Embeddings != nil { - copiedEmbeddings := make([][]int, len(req.EmbeddingRequest.Input.Embeddings)) - copy(copiedEmbeddings, req.EmbeddingRequest.Input.Embeddings) - copiedInput.Embeddings = copiedEmbeddings - } - if copiedInput.Text != nil { - normalizedText := normalizeText(*copiedInput.Text) - copiedInput.Text = &normalizedText - } else if len(copiedInput.Texts) > 0 { - normalizedTexts := make([]string, len(copiedInput.Texts)) - for i, text := range copiedInput.Texts { - normalizedTexts[i] = normalizeText(text) + input := req.EmbeddingRequest.Input + out := schemas.EmbeddingInput{} + if input.Text != nil { + ns := normalizeText(*input.Text) + out.Text = &ns + } else if len(input.Texts) > 0 { + arr := make([]string, len(input.Texts)) + for i, t := range input.Texts { + arr[i] = normalizeText(t) } - copiedInput.Texts = normalizedTexts - } - return copiedInput + out.Texts = arr + } else if input.Embedding != nil { + // Numeric embeddings aren't text-normalizable but must still appear + // in the hash payload, so copy the slice to avoid aliasing. + out.Embedding = append([]int(nil), input.Embedding...) + } else if input.Embeddings != nil { + out.Embeddings = append([][]int(nil), input.Embeddings...) + } + return out case schemas.TranscriptionRequest, schemas.TranscriptionStreamRequest: return req.TranscriptionRequest.Input case schemas.ImageGenerationRequest, schemas.ImageGenerationStreamRequest: @@ -800,18 +705,60 @@ func (plugin *Plugin) getNormalizedInputForCaching(req *schemas.BifrostRequest) } } -// removeField removes the first occurrence of target from the slice. -func removeField(arr []string, target string) []string { - for i, v := range arr { - if v == target { - // remove element at index i - return append(arr[:i], arr[i+1:]...) +// normalizeChatMessage returns a shallow copy of msg with its Content +// rewritten so text fields are lowercased + trimmed. Other pointer fields +// (ToolCalls, Annotations, ChatToolMessage, ChatAssistantMessage) are +// aliased — safe because we don't mutate them. +func normalizeChatMessage(msg schemas.ChatMessage) schemas.ChatMessage { + out := msg + if msg.Content == nil { + return out + } + nc := *msg.Content + if msg.Content.ContentStr != nil { + ns := normalizeText(*msg.Content.ContentStr) + nc.ContentStr = &ns + } else if msg.Content.ContentBlocks != nil { + blocks := make([]schemas.ChatContentBlock, len(msg.Content.ContentBlocks)) + for i, b := range msg.Content.ContentBlocks { + blocks[i] = b + if b.Text != nil { + nt := normalizeText(*b.Text) + blocks[i].Text = &nt + } + } + nc.ContentBlocks = blocks + } + out.Content = &nc + return out +} + +// normalizeResponsesMessage mirrors normalizeChatMessage for the Responses API. +func normalizeResponsesMessage(msg schemas.ResponsesMessage) schemas.ResponsesMessage { + out := msg + if msg.Content == nil { + return out + } + nc := *msg.Content + if msg.Content.ContentStr != nil { + ns := normalizeText(*msg.Content.ContentStr) + nc.ContentStr = &ns + } else if msg.Content.ContentBlocks != nil { + blocks := make([]schemas.ResponsesMessageContentBlock, len(msg.Content.ContentBlocks)) + for i, b := range msg.Content.ContentBlocks { + blocks[i] = b + if b.Text != nil { + nt := normalizeText(*b.Text) + blocks[i].Text = &nt + } } + nc.ContentBlocks = blocks } - return arr // unchanged if target not found + out.Content = &nc + return out } -// extractChatParametersToMetadata extracts Chat API parameters into metadata map +// extractChatParametersToMetadata extracts Chat API parameters into metadata map. func (plugin *Plugin) extractChatParametersToMetadata(params *schemas.ChatParameters, metadata map[string]interface{}) { if params.ToolChoice != nil { if params.ToolChoice.ChatToolChoiceStr != nil { @@ -820,87 +767,53 @@ func (plugin *Plugin) extractChatParametersToMetadata(params *schemas.ChatParame metadata["tool_choice"] = params.ToolChoice.ChatToolChoiceStruct.Function.Name } } - if params.Temperature != nil { - metadata["temperature"] = *params.Temperature - } - if params.TopP != nil { - metadata["top_p"] = *params.TopP - } - if params.MaxCompletionTokens != nil { - metadata["max_tokens"] = *params.MaxCompletionTokens - } - if params.Stop != nil { - metadata["stop_sequences"] = params.Stop - } - if params.PresencePenalty != nil { - metadata["presence_penalty"] = *params.PresencePenalty - } - if params.FrequencyPenalty != nil { - metadata["frequency_penalty"] = *params.FrequencyPenalty - } - if params.ParallelToolCalls != nil { - metadata["parallel_tool_calls"] = *params.ParallelToolCalls - } - if params.User != nil { - metadata["user"] = *params.User - } - if params.LogitBias != nil { - metadata["logit_bias"] = *params.LogitBias - } - if params.LogProbs != nil { - metadata["logprobs"] = *params.LogProbs - } - if params.Modalities != nil { - metadata["modalities"] = params.Modalities - } - if params.PromptCacheKey != nil { - metadata["prompt_cache_key"] = *params.PromptCacheKey - } - if params.Reasoning != nil && params.Reasoning.Enabled != nil { - metadata["reasoning_enabled"] = *params.Reasoning.Enabled - } - if params.Reasoning != nil && params.Reasoning.Effort != nil { - metadata["reasoning_effort"] = *params.Reasoning.Effort + putIfSet(metadata, "temperature", params.Temperature) + putIfSet(metadata, "top_p", params.TopP) + putIfSet(metadata, "max_tokens", params.MaxCompletionTokens) + putSortedSetIfNonEmpty(metadata, "stop_sequences", params.Stop) + putIfSet(metadata, "presence_penalty", params.PresencePenalty) + putIfSet(metadata, "frequency_penalty", params.FrequencyPenalty) + putIfSet(metadata, "parallel_tool_calls", params.ParallelToolCalls) + putIfSet(metadata, "user", params.User) + putIfSet(metadata, "logit_bias", params.LogitBias) + putIfSet(metadata, "logprobs", params.LogProbs) + putSortedSetIfNonEmpty(metadata, "modalities", params.Modalities) + putIfSet(metadata, "prompt_cache_key", params.PromptCacheKey) + if params.Reasoning != nil { + putIfSet(metadata, "reasoning_enabled", params.Reasoning.Enabled) + putIfSet(metadata, "reasoning_effort", params.Reasoning.Effort) } if params.ResponseFormat != nil { + // ResponseFormat is a struct pointer that callers expect to round-trip + // through JSON; store the pointer directly so MarshalDeeplySorted walks it. metadata["response_format"] = params.ResponseFormat } - if params.SafetyIdentifier != nil { - metadata["safety_identifier"] = *params.SafetyIdentifier - } - if params.Seed != nil { - metadata["seed"] = *params.Seed - } - if params.ServiceTier != nil { - metadata["service_tier"] = *params.ServiceTier - } - if params.Store != nil { - metadata["store"] = *params.Store - } - if params.TopLogProbs != nil { - metadata["top_logprobs"] = *params.TopLogProbs - } - if params.Verbosity != nil { - metadata["verbosity"] = *params.Verbosity - } + putIfSet(metadata, "safety_identifier", params.SafetyIdentifier) + putIfSet(metadata, "seed", params.Seed) + putIfSet(metadata, "service_tier", params.ServiceTier) + putIfSet(metadata, "store", params.Store) + putIfSet(metadata, "top_logprobs", params.TopLogProbs) + putIfSet(metadata, "verbosity", params.Verbosity) if len(params.ExtraParams) > 0 { maps.Copy(metadata, params.ExtraParams) } if len(params.Tools) > 0 { - tools := make([]interface{}, len(params.Tools)) - for i, t := range params.Tools { - tools[i] = t - } - if toolsJSON, err := schemas.MarshalDeeplySorted(tools); err != nil { - plugin.logger.Warn("%s Failed to marshal tools for metadata: %v", PluginLoggerPrefix, err) - } else { - toolHash := xxhash.Sum64(toolsJSON) - metadata["tools_hash"] = fmt.Sprintf("%x", toolHash) + // Tools are an order-insensitive set; producer-side ordering (notably + // MCP's randomized map iteration) must not perturb the request hash. + if toolsHash, err := hashSortedSet(params.Tools, func(t schemas.ChatTool) string { + if t.Function == nil { + return "" + } + return t.Function.Name + }); err != nil { + plugin.logger.Warn("Failed to marshal tools for metadata: %v", err) + } else if toolsHash != "" { + metadata["tools_hash"] = toolsHash } } } -// extractResponsesParametersToMetadata extracts Responses API parameters into metadata map +// extractResponsesParametersToMetadata extracts Responses API parameters into metadata map. func (plugin *Plugin) extractResponsesParametersToMetadata(params *schemas.ResponsesParameters, metadata map[string]interface{}) { if params.ToolChoice != nil { if params.ToolChoice.ResponsesToolChoiceStr != nil { @@ -909,158 +822,86 @@ func (plugin *Plugin) extractResponsesParametersToMetadata(params *schemas.Respo metadata["tool_choice"] = *params.ToolChoice.ResponsesToolChoiceStruct.Name } } - if params.Temperature != nil { - metadata["temperature"] = *params.Temperature - } - if params.TopP != nil { - metadata["top_p"] = *params.TopP - } - if params.MaxOutputTokens != nil { - metadata["max_tokens"] = *params.MaxOutputTokens - } - if params.ParallelToolCalls != nil { - metadata["parallel_tool_calls"] = *params.ParallelToolCalls - } - if params.Background != nil { - metadata["background"] = *params.Background - } - if params.Conversation != nil { - metadata["conversation"] = *params.Conversation - } - if params.Include != nil { - metadata["include"] = params.Include - } - if params.Instructions != nil { - metadata["instructions"] = *params.Instructions - } - if params.MaxToolCalls != nil { - metadata["max_tool_calls"] = *params.MaxToolCalls - } - if params.PreviousResponseID != nil { - metadata["previous_response_id"] = *params.PreviousResponseID - } - if params.PromptCacheKey != nil { - metadata["prompt_cache_key"] = *params.PromptCacheKey - } + putIfSet(metadata, "temperature", params.Temperature) + putIfSet(metadata, "top_p", params.TopP) + putIfSet(metadata, "max_tokens", params.MaxOutputTokens) + putIfSet(metadata, "parallel_tool_calls", params.ParallelToolCalls) + putIfSet(metadata, "background", params.Background) + putIfSet(metadata, "conversation", params.Conversation) + putSortedSetIfNonEmpty(metadata, "include", params.Include) + putIfSet(metadata, "instructions", params.Instructions) + putIfSet(metadata, "max_tool_calls", params.MaxToolCalls) + putIfSet(metadata, "previous_response_id", params.PreviousResponseID) + putIfSet(metadata, "prompt_cache_key", params.PromptCacheKey) if params.Reasoning != nil { - if params.Reasoning.Effort != nil { - metadata["reasoning_effort"] = *params.Reasoning.Effort - } - if params.Reasoning.MaxTokens != nil { - metadata["reasoning_max_tokens"] = *params.Reasoning.MaxTokens - } - if params.Reasoning.Summary != nil { - metadata["reasoning_summary"] = *params.Reasoning.Summary - } - } - if params.SafetyIdentifier != nil { - metadata["safety_identifier"] = *params.SafetyIdentifier - } - if params.ServiceTier != nil { - metadata["service_tier"] = *params.ServiceTier - } - if params.Store != nil { - metadata["store"] = *params.Store + putIfSet(metadata, "reasoning_effort", params.Reasoning.Effort) + putIfSet(metadata, "reasoning_max_tokens", params.Reasoning.MaxTokens) + putIfSet(metadata, "reasoning_summary", params.Reasoning.Summary) } + putIfSet(metadata, "safety_identifier", params.SafetyIdentifier) + putIfSet(metadata, "service_tier", params.ServiceTier) + putIfSet(metadata, "store", params.Store) if params.Text != nil { - if params.Text.Verbosity != nil { - metadata["text_verbosity"] = *params.Text.Verbosity - } + putIfSet(metadata, "text_verbosity", params.Text.Verbosity) if params.Text.Format != nil { metadata["text_format_type"] = params.Text.Format.Type } } - if params.TopLogProbs != nil { - metadata["top_logprobs"] = *params.TopLogProbs - } - if params.Truncation != nil { - metadata["truncation"] = *params.Truncation - } + putIfSet(metadata, "top_logprobs", params.TopLogProbs) + putIfSet(metadata, "truncation", params.Truncation) if len(params.ExtraParams) > 0 { maps.Copy(metadata, params.ExtraParams) } if len(params.Tools) > 0 { - tools := make([]interface{}, len(params.Tools)) - for i, t := range params.Tools { - tools[i] = t - } - if toolsJSON, err := schemas.MarshalDeeplySorted(tools); err != nil { - plugin.logger.Warn("%s Failed to marshal tools for metadata: %v", PluginLoggerPrefix, err) - } else { - toolHash := xxhash.Sum64(toolsJSON) - metadata["tools_hash"] = fmt.Sprintf("%x", toolHash) + // Tools are an order-insensitive set; producer-side ordering (notably + // MCP's randomized map iteration) must not perturb the request hash. + if toolsHash, err := hashSortedSet(params.Tools, func(t schemas.ResponsesTool) string { + if t.Name == nil { + return "" + } + return *t.Name + }); err != nil { + plugin.logger.Warn("Failed to marshal tools for metadata: %v", err) + } else if toolsHash != "" { + metadata["tools_hash"] = toolsHash } } } -// extractTextCompletionParametersToMetadata extracts Text Completion parameters into metadata map +// extractTextCompletionParametersToMetadata extracts Text Completion parameters into metadata map. func (plugin *Plugin) extractTextCompletionParametersToMetadata(params *schemas.TextCompletionParameters, metadata map[string]interface{}) { - if params.Temperature != nil { - metadata["temperature"] = *params.Temperature - } - if params.TopP != nil { - metadata["top_p"] = *params.TopP - } - if params.MaxTokens != nil { - metadata["max_tokens"] = *params.MaxTokens - } - if params.Stop != nil { - metadata["stop_sequences"] = params.Stop - } - if params.PresencePenalty != nil { - metadata["presence_penalty"] = *params.PresencePenalty - } - if params.FrequencyPenalty != nil { - metadata["frequency_penalty"] = *params.FrequencyPenalty - } - if params.User != nil { - metadata["user"] = *params.User - } - if params.BestOf != nil { - metadata["best_of"] = *params.BestOf - } - if params.Echo != nil { - metadata["echo"] = *params.Echo - } - if params.LogitBias != nil { - metadata["logit_bias"] = *params.LogitBias - } - if params.LogProbs != nil { - metadata["logprobs"] = *params.LogProbs - } - if params.N != nil { - metadata["n"] = *params.N - } - if params.Seed != nil { - metadata["seed"] = *params.Seed - } - if params.Suffix != nil { - metadata["suffix"] = *params.Suffix - } + putIfSet(metadata, "temperature", params.Temperature) + putIfSet(metadata, "top_p", params.TopP) + putIfSet(metadata, "max_tokens", params.MaxTokens) + putSortedSetIfNonEmpty(metadata, "stop_sequences", params.Stop) + putIfSet(metadata, "presence_penalty", params.PresencePenalty) + putIfSet(metadata, "frequency_penalty", params.FrequencyPenalty) + putIfSet(metadata, "user", params.User) + putIfSet(metadata, "best_of", params.BestOf) + putIfSet(metadata, "echo", params.Echo) + putIfSet(metadata, "logit_bias", params.LogitBias) + putIfSet(metadata, "logprobs", params.LogProbs) + putIfSet(metadata, "n", params.N) + putIfSet(metadata, "seed", params.Seed) + putIfSet(metadata, "suffix", params.Suffix) if len(params.ExtraParams) > 0 { maps.Copy(metadata, params.ExtraParams) } } -// extractSpeechParametersToMetadata extracts Speech parameters into metadata map +// extractSpeechParametersToMetadata extracts Speech parameters into metadata map. func (plugin *Plugin) extractSpeechParametersToMetadata(params *schemas.SpeechParameters, metadata map[string]interface{}) { if params == nil { return } - - if params.Speed != nil { - metadata["speed"] = *params.Speed - } + putIfSet(metadata, "speed", params.Speed) if params.ResponseFormat != "" { metadata["response_format"] = params.ResponseFormat } if params.Instructions != "" { metadata["instructions"] = params.Instructions } - // Check if VoiceConfig.Voice is non-nil before accessing it - if params.VoiceConfig.Voice != nil { - metadata["voice"] = *params.VoiceConfig.Voice - } + putIfSet(metadata, "voice", params.VoiceConfig.Voice) if len(params.VoiceConfig.MultiVoiceConfig) > 0 { flattenedVC := make([]string, len(params.VoiceConfig.MultiVoiceConfig)) for i, vc := range params.VoiceConfig.MultiVoiceConfig { @@ -1068,117 +909,97 @@ func (plugin *Plugin) extractSpeechParametersToMetadata(params *schemas.SpeechPa } metadata["multi_voice_count"] = flattenedVC } + if len(params.PronunciationDictionaryLocators) > 0 { + if hash, err := hashSortedSet(params.PronunciationDictionaryLocators, func(l schemas.SpeechPronunciationDictionaryLocator) string { + return l.PronunciationDictionaryID + }); err != nil { + plugin.logger.Warn("Failed to marshal pronunciation_dictionary_locators for metadata: %v", err) + } else if hash != "" { + metadata["pronunciation_dictionary_locators_hash"] = hash + } + } if len(params.ExtraParams) > 0 { maps.Copy(metadata, params.ExtraParams) } } -// extractEmbeddingParametersToMetadata extracts Embedding parameters into metadata map +// extractEmbeddingParametersToMetadata extracts Embedding parameters into metadata map. func (plugin *Plugin) extractEmbeddingParametersToMetadata(params *schemas.EmbeddingParameters, metadata map[string]interface{}) { - if params.EncodingFormat != nil { - metadata["encoding_format"] = *params.EncodingFormat - } - if params.Dimensions != nil { - metadata["dimensions"] = *params.Dimensions - } + putIfSet(metadata, "encoding_format", params.EncodingFormat) + putIfSet(metadata, "dimensions", params.Dimensions) if len(params.ExtraParams) > 0 { maps.Copy(metadata, params.ExtraParams) } } -// extractTranscriptionParametersToMetadata extracts Transcription parameters into metadata map +// extractTranscriptionParametersToMetadata extracts Transcription parameters into metadata map. func (plugin *Plugin) extractTranscriptionParametersToMetadata(params *schemas.TranscriptionParameters, metadata map[string]interface{}) { - if params.Language != nil { - metadata["language"] = *params.Language - } - if params.ResponseFormat != nil { - metadata["response_format"] = *params.ResponseFormat - } - if params.Prompt != nil { - metadata["prompt"] = *params.Prompt - } - if params.Format != nil { - metadata["file_format"] = *params.Format + putIfSet(metadata, "language", params.Language) + putIfSet(metadata, "response_format", params.ResponseFormat) + putIfSet(metadata, "prompt", params.Prompt) + putIfSet(metadata, "file_format", params.Format) + putSortedSetIfNonEmpty(metadata, "timestamp_granularities", params.TimestampGranularities) + putSortedSetIfNonEmpty(metadata, "include", params.Include) + if len(params.AdditionalFormats) > 0 { + if hash, err := hashSortedSet(params.AdditionalFormats, func(f schemas.TranscriptionAdditionalFormat) string { + return string(f.Format) + }); err != nil { + plugin.logger.Warn("Failed to marshal additional_formats for metadata: %v", err) + } else if hash != "" { + metadata["additional_formats_hash"] = hash + } } if len(params.ExtraParams) > 0 { maps.Copy(metadata, params.ExtraParams) } } -// extractImageGenerationParametersToMetadata extracts Image Generation parameters into metadata map +// extractImageGenerationParametersToMetadata extracts Image Generation parameters into metadata map. func (plugin *Plugin) extractImageGenerationParametersToMetadata(params *schemas.ImageGenerationParameters, metadata map[string]interface{}) { if params == nil { return } - - if params.N != nil { - metadata["n"] = *params.N - } - if params.Background != nil { - metadata["background"] = *params.Background - } - if params.Moderation != nil { - metadata["moderation"] = *params.Moderation - } - if params.PartialImages != nil { - metadata["partial_images"] = *params.PartialImages - } - if params.Size != nil { - metadata["size"] = *params.Size - } - if params.Quality != nil { - metadata["quality"] = *params.Quality - } - if params.OutputCompression != nil { - metadata["output_compression"] = *params.OutputCompression - } - if params.OutputFormat != nil { - metadata["output_format"] = *params.OutputFormat - } - if params.Style != nil { - metadata["style"] = *params.Style + putIfSet(metadata, "n", params.N) + putIfSet(metadata, "background", params.Background) + putIfSet(metadata, "moderation", params.Moderation) + putIfSet(metadata, "partial_images", params.PartialImages) + putIfSet(metadata, "size", params.Size) + putIfSet(metadata, "quality", params.Quality) + putIfSet(metadata, "output_compression", params.OutputCompression) + putIfSet(metadata, "output_format", params.OutputFormat) + putIfSet(metadata, "style", params.Style) + putIfSet(metadata, "response_format", params.ResponseFormat) + putIfSet(metadata, "seed", params.Seed) + putIfSet(metadata, "negative_prompt", params.NegativePrompt) + putIfSet(metadata, "num_inference_steps", params.NumInferenceSteps) + putIfSet(metadata, "user", params.User) + if len(params.InputImages) > 0 { + metadata["input_images"] = params.InputImages } - if params.ResponseFormat != nil { - metadata["response_format"] = *params.ResponseFormat - } - if params.Seed != nil { - metadata["seed"] = *params.Seed - } - if params.NegativePrompt != nil { - metadata["negative_prompt"] = *params.NegativePrompt - } - if params.NumInferenceSteps != nil { - metadata["num_inference_steps"] = *params.NumInferenceSteps - } - if params.User != nil { - metadata["user"] = *params.User - } - if len(params.ExtraParams) > 0 { maps.Copy(metadata, params.ExtraParams) } } -func (plugin *Plugin) isConversationHistoryThresholdExceeded(req *schemas.BifrostRequest) bool { +// isConversationHistoryThresholdExceeded returns true when the request's +// conversation history is longer than ConversationHistoryThreshold. Long +// histories are unlikely to repeat and unlikely to be semantically similar +// to other requests, so caching them mostly bloats the store; PreLLMHook +// uses this to skip caching such requests entirely. +func (plugin *Plugin) isConversationHistoryThresholdExceeded(state *cacheState, req *schemas.BifrostRequest) bool { switch { case req.ChatRequest != nil: - input, ok := plugin.getInputForCaching(req).([]schemas.ChatMessage) + input, ok := plugin.getInputForCaching(state, req).([]schemas.ChatMessage) if !ok { return false } - if len(input) > plugin.config.ConversationHistoryThreshold { - return true - } - return false + return len(input) > plugin.config.ConversationHistoryThreshold case req.ResponsesRequest != nil: - input, ok := plugin.getInputForCaching(req).([]schemas.ResponsesMessage) + input, ok := plugin.getInputForCaching(state, req).([]schemas.ResponsesMessage) if !ok { return false } - if len(input) > plugin.config.ConversationHistoryThreshold { - return true - } - return false + return len(input) > plugin.config.ConversationHistoryThreshold default: return false } diff --git a/transports/bifrost-http/handlers/cache.go b/transports/bifrost-http/handlers/cache.go index c46515dc60..1f173f9679 100644 --- a/transports/bifrost-http/handlers/cache.go +++ b/transports/bifrost-http/handlers/cache.go @@ -8,8 +8,16 @@ import ( "github.com/valyala/fasthttp" ) +// cacheClearer is the minimal contract the handler needs from the semantic +// cache plugin. Defined here (rather than imported) so tests can substitute +// a fake without spinning up a real vector store. +type cacheClearer interface { + ClearCacheForCacheID(cacheID string) error + ClearCacheForKey(cacheKey string) error +} + type CacheHandler struct { - plugin *semanticcache.Plugin + plugin cacheClearer } func NewCacheHandler(plugin schemas.LLMPlugin) *CacheHandler { @@ -24,17 +32,17 @@ func NewCacheHandler(plugin schemas.LLMPlugin) *CacheHandler { } func (h *CacheHandler) RegisterRoutes(r *router.Router, middlewares ...schemas.BifrostHTTPMiddleware) { - r.DELETE("/api/cache/clear/{requestId}", lib.ChainMiddlewares(h.clearCache, middlewares...)) + r.DELETE("/api/cache/clear/{cacheId}", lib.ChainMiddlewares(h.clearCache, middlewares...)) r.DELETE("/api/cache/clear-by-key/{cacheKey}", lib.ChainMiddlewares(h.clearCacheByKey, middlewares...)) } func (h *CacheHandler) clearCache(ctx *fasthttp.RequestCtx) { - requestID, ok := ctx.UserValue("requestId").(string) - if !ok { - SendError(ctx, fasthttp.StatusBadRequest, "Invalid request ID") + cacheID, ok := ctx.UserValue("cacheId").(string) + if !ok || cacheID == "" { + SendError(ctx, fasthttp.StatusBadRequest, "Invalid cache ID") return } - if err := h.plugin.ClearCacheForRequestID(requestID); err != nil { + if err := h.plugin.ClearCacheForCacheID(cacheID); err != nil { SendError(ctx, fasthttp.StatusInternalServerError, "Failed to clear cache") return } diff --git a/transports/bifrost-http/handlers/cache_test.go b/transports/bifrost-http/handlers/cache_test.go new file mode 100644 index 0000000000..a27e763c9b --- /dev/null +++ b/transports/bifrost-http/handlers/cache_test.go @@ -0,0 +1,139 @@ +package handlers + +import ( + "errors" + "strings" + "testing" + + "github.com/valyala/fasthttp" +) + +// fakeCacheClearer records calls and returns configured errors so the handler +// branches can be exercised without a real semantic cache plugin. +type fakeCacheClearer struct { + clearByID func(string) error + clearByKey func(string) error + idCalls []string + keyCalls []string +} + +func (f *fakeCacheClearer) ClearCacheForCacheID(id string) error { + f.idCalls = append(f.idCalls, id) + if f.clearByID != nil { + return f.clearByID(id) + } + return nil +} + +func (f *fakeCacheClearer) ClearCacheForKey(key string) error { + f.keyCalls = append(f.keyCalls, key) + if f.clearByKey != nil { + return f.clearByKey(key) + } + return nil +} + +func newCacheCtx(userKey, userVal string) *fasthttp.RequestCtx { + ctx := &fasthttp.RequestCtx{} + if userKey != "" { + ctx.SetUserValue(userKey, userVal) + } + return ctx +} + +// ----------------------------------------------------------------------------- +// clearCache (DELETE /api/cache/clear/{cacheId}) +// ----------------------------------------------------------------------------- + +func TestClearCache_OK(t *testing.T) { + clearer := &fakeCacheClearer{} + h := &CacheHandler{plugin: clearer} + + ctx := newCacheCtx("cacheId", "abc-123") + h.clearCache(ctx) + + if got := ctx.Response.StatusCode(); got != fasthttp.StatusOK { + t.Fatalf("expected 200, got %d body=%s", got, ctx.Response.Body()) + } + if len(clearer.idCalls) != 1 || clearer.idCalls[0] != "abc-123" { + t.Fatalf("expected ClearCacheForCacheID('abc-123'), got %v", clearer.idCalls) + } +} + +func TestClearCache_RejectsEmptyID(t *testing.T) { + clearer := &fakeCacheClearer{} + h := &CacheHandler{plugin: clearer} + + ctx := newCacheCtx("cacheId", "") + h.clearCache(ctx) + + if got := ctx.Response.StatusCode(); got != fasthttp.StatusBadRequest { + t.Fatalf("expected 400 for empty id, got %d", got) + } + if len(clearer.idCalls) != 0 { + t.Fatalf("expected no Clear calls on bad id, got %v", clearer.idCalls) + } +} + +func TestClearCache_MissingUserValue(t *testing.T) { + clearer := &fakeCacheClearer{} + h := &CacheHandler{plugin: clearer} + + // No user value set at all (simulates a routing misconfiguration). + ctx := &fasthttp.RequestCtx{} + h.clearCache(ctx) + + if got := ctx.Response.StatusCode(); got != fasthttp.StatusBadRequest { + t.Fatalf("expected 400 when cacheId user value missing, got %d", got) + } +} + +func TestClearCache_PluginErrorReturns500(t *testing.T) { + clearer := &fakeCacheClearer{ + clearByID: func(string) error { return errors.New("store unavailable") }, + } + h := &CacheHandler{plugin: clearer} + + ctx := newCacheCtx("cacheId", "abc-123") + h.clearCache(ctx) + + if got := ctx.Response.StatusCode(); got != fasthttp.StatusInternalServerError { + t.Fatalf("expected 500 on plugin error, got %d", got) + } + if !strings.Contains(string(ctx.Response.Body()), "Failed to clear cache") { + t.Fatalf("expected 'Failed to clear cache' in body, got %s", ctx.Response.Body()) + } +} + +// ----------------------------------------------------------------------------- +// clearCacheByKey (DELETE /api/cache/clear-by-key/{cacheKey}) +// ----------------------------------------------------------------------------- + +func TestClearCacheByKey_OK(t *testing.T) { + clearer := &fakeCacheClearer{} + h := &CacheHandler{plugin: clearer} + + ctx := newCacheCtx("cacheKey", "session-42") + h.clearCacheByKey(ctx) + + if got := ctx.Response.StatusCode(); got != fasthttp.StatusOK { + t.Fatalf("expected 200, got %d body=%s", got, ctx.Response.Body()) + } + if len(clearer.keyCalls) != 1 || clearer.keyCalls[0] != "session-42" { + t.Fatalf("expected ClearCacheForKey('session-42'), got %v", clearer.keyCalls) + } +} + +func TestClearCacheByKey_PluginErrorReturns500(t *testing.T) { + clearer := &fakeCacheClearer{ + clearByKey: func(string) error { return errors.New("vector store down") }, + } + h := &CacheHandler{plugin: clearer} + + ctx := newCacheCtx("cacheKey", "session-42") + h.clearCacheByKey(ctx) + + if got := ctx.Response.StatusCode(); got != fasthttp.StatusInternalServerError { + t.Fatalf("expected 500 on plugin error, got %d", got) + } +} diff --git a/transports/bifrost-http/handlers/logging.go b/transports/bifrost-http/handlers/logging.go index 56282e40b4..8521191964 100644 --- a/transports/bifrost-http/handlers/logging.go +++ b/transports/bifrost-http/handlers/logging.go @@ -475,6 +475,9 @@ func (h *LoggingHandler) getLogs(ctx *fasthttp.RequestCtx) { filters.MissingCostOnly = val } } + if cacheHitTypes := string(ctx.QueryArgs().Peek("cache_hit_types")); cacheHitTypes != "" { + filters.CacheHitTypes = parseCommaSeparated(cacheHitTypes) + } if contentSearch := string(ctx.QueryArgs().Peek("content_search")); contentSearch != "" { filters.ContentSearch = contentSearch } @@ -713,6 +716,9 @@ func (h *LoggingHandler) getLogsStats(ctx *fasthttp.RequestCtx) { filters.MissingCostOnly = val } } + if cacheHitTypes := string(ctx.QueryArgs().Peek("cache_hit_types")); cacheHitTypes != "" { + filters.CacheHitTypes = parseCommaSeparated(cacheHitTypes) + } if contentSearch := string(ctx.QueryArgs().Peek("content_search")); contentSearch != "" { filters.ContentSearch = contentSearch } @@ -869,6 +875,9 @@ func parseHistogramFilters(ctx *fasthttp.RequestCtx) *logstore.SearchFilters { filters.MissingCostOnly = val } } + if cacheHitTypes := string(ctx.QueryArgs().Peek("cache_hit_types")); cacheHitTypes != "" { + filters.CacheHitTypes = parseCommaSeparated(cacheHitTypes) + } if contentSearch := string(ctx.QueryArgs().Peek("content_search")); contentSearch != "" { filters.ContentSearch = contentSearch } diff --git a/transports/bifrost-http/handlers/middlewares.go b/transports/bifrost-http/handlers/middlewares.go index bf72a7ed7a..58c2b86777 100644 --- a/transports/bifrost-http/handlers/middlewares.go +++ b/transports/bifrost-http/handlers/middlewares.go @@ -49,33 +49,33 @@ func SecurityHeadersMiddleware() schemas.BifrostHTTPMiddleware { func CorsMiddleware(config *lib.Config) schemas.BifrostHTTPMiddleware { return func(next fasthttp.RequestHandler) fasthttp.RequestHandler { return func(ctx *fasthttp.RequestCtx) { - startTime := time.Now() + // startTime := time.Now() // skip logging if it's a /health check request if slices.IndexFunc(loggingSkipPaths, func(path string) bool { return strings.HasPrefix(string(ctx.RequestURI()), path) }) != -1 { goto corsFlow } - defer func() { - statusCode := ctx.Response.Header.StatusCode() - level := schemas.LogLevelInfo - if statusCode >= 500 { - level = schemas.LogLevelError - } else if statusCode >= 400 { - level = schemas.LogLevelWarn - } - logBuilder := logger.LogHTTPRequest(level, "request completed"). - Str("http.method", string(ctx.Method())). - Str("http.target", string(ctx.RequestURI())). - Int("http.status_code", statusCode). - Int64("http.request_duration_ms", time.Since(startTime).Milliseconds()). - Str("http.remote_addr", ctx.RemoteAddr().String()). - Str("http.user_agent", string(ctx.Request.Header.UserAgent())) - if traceID, ok := ctx.UserValue(schemas.BifrostContextKeyTraceID).(string); ok && traceID != "" { - logBuilder = logBuilder.Str("trace_id", traceID) - } - logBuilder.Send() - }() + // defer func() { + // statusCode := ctx.Response.Header.StatusCode() + // level := schemas.LogLevelInfo + // if statusCode >= 500 { + // level = schemas.LogLevelError + // } else if statusCode >= 400 { + // level = schemas.LogLevelWarn + // } + // logBuilder := logger.LogHTTPRequest(level, "request completed"). + // Str("http.method", string(ctx.Method())). + // Str("http.target", string(ctx.RequestURI())). + // Int("http.status_code", statusCode). + // Int64("http.request_duration_ms", time.Since(startTime).Milliseconds()). + // Str("http.remote_addr", ctx.RemoteAddr().String()). + // Str("http.user_agent", string(ctx.Request.Header.UserAgent())) + // if traceID, ok := ctx.UserValue(schemas.BifrostContextKeyTraceID).(string); ok && traceID != "" { + // logBuilder = logBuilder.Str("trace_id", traceID) + // } + // logBuilder.Send() + // }() corsFlow: origin := string(ctx.Request.Header.Peek("Origin")) allowed := IsOriginAllowed(origin, config.ClientConfig.AllowedOrigins) @@ -808,7 +808,7 @@ func (m *AuthMiddleware) middleware(shouldSkip func(*configstore.AuthConfig, str } authConfig := m.authConfig.Load() if authConfig == nil || !authConfig.IsEnabled { - logger.Debug("auth middleware is disabled because auth config is not present or not enabled") + // logger.Debug("auth middleware is disabled because auth config is not present or not enabled") ctx.SetUserValue(schemas.BifrostContextKeySessionToken, "") // Mark as local admin so downstream RBAC bypasses cleanly when // auth is fully disabled; otherwise RBAC 401s and the UI enters diff --git a/ui/app/workspace/config/views/pluginsForm.tsx b/ui/app/workspace/config/views/pluginsForm.tsx index dcd459de4c..fc4ddae7da 100644 --- a/ui/app/workspace/config/views/pluginsForm.tsx +++ b/ui/app/workspace/config/views/pluginsForm.tsx @@ -2,20 +2,32 @@ import { Button } from "@/components/ui/button"; import { Card, CardContent } from "@/components/ui/card"; import { Input } from "@/components/ui/input"; import { Label } from "@/components/ui/label"; +import { ModelMultiselect } from "@/components/ui/modelMultiselect"; import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select"; import { Separator } from "@/components/ui/separator"; import { Switch } from "@/components/ui/switch"; -import { getProviderLabel } from "@/lib/constants/logs"; +import { ProviderIconType, RenderProviderIcon } from "@/lib/constants/icons"; +import { EmbeddingSupportedProviders, getProviderLabel } from "@/lib/constants/logs"; import { getErrorMessage, useCreatePluginMutation, useGetPluginsQuery, useGetProvidersQuery, useUpdatePluginMutation } from "@/lib/store"; -import { CacheConfig, EditorCacheConfig, ModelProviderName } from "@/lib/types/config"; +import { CacheConfig, EditorCacheConfig, ModelProvider, ModelProviderName } from "@/lib/types/config"; import { SEMANTIC_CACHE_PLUGIN } from "@/lib/types/plugins"; import { cacheConfigSchema } from "@/lib/types/schemas"; import { Loader2 } from "lucide-react"; import { useEffect, useMemo, useState } from "react"; import { toast } from "sonner"; +// Semantic caching needs an embedding-capable provider. Built-in providers are +// gated by EmbeddingSupportedProviders; custom providers expose support via +// custom_provider_config.allowed_requests.embedding. +const supportsEmbedding = (provider: ModelProvider): boolean => { + if (provider.custom_provider_config) { + return provider.custom_provider_config.allowed_requests?.embedding === true; + } + return (EmbeddingSupportedProviders as readonly string[]).includes(provider.name); +}; + const defaultCacheConfig: EditorCacheConfig = { - ttl_seconds: 300, + ttl: 300, threshold: 0.8, conversation_history_threshold: 3, exclude_system_prompt: false, @@ -23,14 +35,20 @@ const defaultCacheConfig: EditorCacheConfig = { cache_by_provider: true, }; -const toEditorCacheConfig = (config?: Partial): EditorCacheConfig => ({ - ...defaultCacheConfig, - ...config, -}); +const toEditorCacheConfig = (config?: Partial & { ttl_seconds?: number }): EditorCacheConfig => { + const { ttl_seconds, ...rest } = config ?? {}; + const merged: EditorCacheConfig = { ...defaultCacheConfig, ...rest }; + // Migration: older saves stored TTL under `ttl_seconds`; the Go plugin only + // reads `ttl`, so adopt the legacy value if the new field isn't present. + if (rest.ttl === undefined && typeof ttl_seconds === "number") { + merged.ttl = ttl_seconds; + } + return merged; +}; const normalizeCacheConfigForSave = (config: EditorCacheConfig) => { const normalized: Record = { - ttl_seconds: config.ttl_seconds, + ttl: config.ttl, threshold: config.threshold, cache_by_model: config.cache_by_model, cache_by_provider: config.cache_by_provider, @@ -51,6 +69,8 @@ const normalizeCacheConfigForSave = (config: EditorCacheConfig) => { const provider = config.provider?.trim(); const embeddingModel = config.embedding_model?.trim(); + const namespace = config.vector_store_namespace?.trim(); + const defaultKey = config.default_cache_key?.trim(); if (provider) { normalized.provider = provider; @@ -61,6 +81,12 @@ const normalizeCacheConfigForSave = (config: EditorCacheConfig) => { if (config.dimension !== undefined) { normalized.dimension = config.dimension; } + if (namespace) { + normalized.vector_store_namespace = namespace; + } + if (defaultKey) { + normalized.default_cache_key = defaultKey; + } return normalized; }; @@ -78,6 +104,7 @@ export default function PluginsForm({ isVectorStoreEnabled }: PluginsFormProps) const { data: providersData, error: providersError, isLoading: providersLoading } = useGetProvidersQuery(); const providers = useMemo(() => providersData || [], [providersData]); + const embeddingProviders = useMemo(() => providers.filter(supportsEmbedding), [providers]); useEffect(() => { if (providersError) { @@ -108,17 +135,23 @@ export default function PluginsForm({ isVectorStoreEnabled }: PluginsFormProps) } }, [semanticCachePlugin]); - // Update default provider when providers are loaded (only for new configs) + // Seed default provider/model/dimension when the providers list loads, but + // only for new configs that haven't picked a provider yet — re-running this + // effect on subsequent embeddingProviders changes would otherwise clobber + // an in-progress user selection. useEffect(() => { - if (providers.length > 0 && !semanticCachePlugin?.config) { - setCacheConfig((prev) => ({ - ...prev, - provider: providers[0].name as ModelProviderName, - embedding_model: prev.embedding_model ?? "text-embedding-3-small", - dimension: prev.dimension ?? 1536, - })); + if (embeddingProviders.length > 0 && !semanticCachePlugin?.config) { + setCacheConfig((prev) => { + if (prev.provider) return prev; + return { + ...prev, + provider: embeddingProviders[0].name as ModelProviderName, + embedding_model: prev.embedding_model ?? "text-embedding-3-small", + dimension: prev.dimension ?? 1536, + }; + }); } - }, [providers, semanticCachePlugin?.config]); + }, [embeddingProviders, semanticCachePlugin?.config]); const hasChanges = useMemo(() => { if (originalCacheEnabled !== serverCacheEnabled) return true; @@ -127,12 +160,14 @@ export default function PluginsForm({ isVectorStoreEnabled }: PluginsFormProps) cacheConfig.provider !== serverCacheConfig.provider || cacheConfig.embedding_model !== serverCacheConfig.embedding_model || cacheConfig.dimension !== serverCacheConfig.dimension || - cacheConfig.ttl_seconds !== serverCacheConfig.ttl_seconds || + cacheConfig.ttl !== serverCacheConfig.ttl || cacheConfig.threshold !== serverCacheConfig.threshold || cacheConfig.conversation_history_threshold !== serverCacheConfig.conversation_history_threshold || cacheConfig.exclude_system_prompt !== serverCacheConfig.exclude_system_prompt || cacheConfig.cache_by_model !== serverCacheConfig.cache_by_model || - cacheConfig.cache_by_provider !== serverCacheConfig.cache_by_provider + cacheConfig.cache_by_provider !== serverCacheConfig.cache_by_provider || + (cacheConfig.vector_store_namespace ?? "") !== (serverCacheConfig.vector_store_namespace ?? "") || + (cacheConfig.default_cache_key ?? "") !== (serverCacheConfig.default_cache_key ?? "") ); }, [cacheConfig, serverCacheConfig, originalCacheEnabled, serverCacheEnabled]); @@ -219,6 +254,13 @@ export default function PluginsForm({ isVectorStoreEnabled }: PluginsFormProps) {!providersLoading && providers?.length === 0 && ( Requires at least one provider to be configured. )} + {!providersLoading && providers.length > 0 && embeddingProviders.length === 0 && ( + + {" "} + Requires at least one provider that supports embedding requests. Configure a built-in embedding provider, or enable the + embeddingrequest type on a custom provider. + + )}

@@ -226,22 +268,13 @@ export default function PluginsForm({ isVectorStoreEnabled }: PluginsFormProps) id="enable-caching" size="md" checked={originalCacheEnabled && isVectorStoreEnabled} - disabled={!isVectorStoreEnabled || providersLoading || providers.length === 0} + disabled={!isVectorStoreEnabled || providersLoading || embeddingProviders.length === 0} onCheckedChange={(checked) => { if (isVectorStoreEnabled) { handleSemanticCacheToggle(checked); } }} /> - {(isSemanticCacheEnabled || originalCacheEnabled) && ( - - )}
@@ -267,6 +300,12 @@ export default function PluginsForm({ isVectorStoreEnabled }: PluginsFormProps) embedding model's real dimension before saving, or remove the provider to stay in direct-only mode. )} +
+ Heads up: a vector store namespace can only hold vectors of one dimension. Whenever you + change the embedding provider, model, or dimension, make sure the dimension still matches what the model produces - otherwise writes to the existing namespace will + fail and reads will silently miss. The namespace is not recreated automatically; either use a fresh namespace or drop the existing class/index in your vector store + before saving. +
{/* Provider and Model Settings */}

Provider and Model Settings

@@ -275,17 +314,25 @@ export default function PluginsForm({ isVectorStoreEnabled }: PluginsFormProps) updateCacheConfigLocal({ embedding_model: e.target.value })} + onChange={(model) => updateCacheConfigLocal({ embedding_model: model })} + placeholder={cacheConfig.provider ? "Search or type an embedding model..." : "Select a provider first"} + disabled={!cacheConfig.provider} />
+

+ API keys for the embedding provider will be inherited from the main provider configuration. The semantic cache will use + the configured provider's keys automatically. +

{/* Cache Settings */} @@ -313,16 +367,16 @@ export default function PluginsForm({ isVectorStoreEnabled }: PluginsFormProps) id="ttl" type="number" min="1" - value={cacheConfig.ttl_seconds === undefined || Number.isNaN(cacheConfig.ttl_seconds) ? "" : cacheConfig.ttl_seconds} + value={cacheConfig.ttl === undefined || Number.isNaN(cacheConfig.ttl) ? "" : cacheConfig.ttl} onChange={(e) => { const value = e.target.value; if (value === "") { - updateCacheConfigLocal({ ttl_seconds: undefined }); + updateCacheConfigLocal({ ttl: undefined }); return; } const parsed = parseInt(value); if (!Number.isNaN(parsed)) { - updateCacheConfigLocal({ ttl_seconds: parsed }); + updateCacheConfigLocal({ ttl: parsed }); } }} /> @@ -368,12 +422,51 @@ export default function PluginsForm({ isVectorStoreEnabled }: PluginsFormProps) } }} /> +

+ Vector size produced by the embedding model - must match the model exactly (e.g. 1536 for + OpenAI text-embedding-3-small, 3072 for text-embedding-3-large, + 768 for many Cohere/Voyage models). Use 1 only in direct-only mode (no provider). +

+ + + + + {/* Storage & Cache Key */} +
+

Storage & Cache Key

+
+
+ + updateCacheConfigLocal({ vector_store_namespace: e.target.value })} + /> +

+ Bucket/index name where cache entries are stored in the vector store. Leave blank to use the default + (BifrostSemanticCachePlugin). Changing the namespace points the plugin at a different (possibly empty) bucket. All previously + cached entries become inaccessible - every request will miss until the new namespace is repopulated. +

+
+
+ + updateCacheConfigLocal({ default_cache_key: e.target.value })} + /> +

+ Fallback value used as the cache partition when a request doesn't set the x-bf-cache-key header. + Cache keys isolate entries: requests that share a key can hit each other's cached responses, while requests + with different keys can't. Leaving this blank means caching is disabled for any request that doesn't + send the header. +

-

- API keys for the embedding provider will be inherited from the main provider configuration. The semantic cache will use - the configured provider's keys automatically. -

{/* Conversation Settings */} @@ -456,6 +549,15 @@ export default function PluginsForm({ isVectorStoreEnabled }: PluginsFormProps) + +
+ +
))} diff --git a/ui/app/workspace/logs/page.tsx b/ui/app/workspace/logs/page.tsx index 8306464540..17c3404b02 100644 --- a/ui/app/workspace/logs/page.tsx +++ b/ui/app/workspace/logs/page.tsx @@ -95,6 +95,7 @@ export default function LogsPage() { polling: parseAsBoolean.withDefault(true).withOptions({ clearOnDefault: false }), period: parseAsString.withDefault(hasExplicitTimeRange ? "" : "1h").withOptions({ clearOnDefault: false }), missing_cost_only: parseAsBoolean.withDefault(false), + cache_hit_types: parseAsArrayOf(parseAsString).withDefault([]), metadata_filters: parseAsString.withDefault(""), selected_log: parseAsString.withDefault(""), }, @@ -129,6 +130,7 @@ export default function LogsPage() { business_unit_ids: urlState.business_unit_ids, content_search: urlState.content_search, missing_cost_only: urlState.missing_cost_only, + cache_hit_types: urlState.cache_hit_types, metadata_filters: urlState.metadata_filters ? (() => { try { @@ -163,6 +165,7 @@ export default function LogsPage() { urlState.content_search, urlState.parent_request_id, urlState.missing_cost_only, + urlState.cache_hit_types, urlState.metadata_filters, urlState.start_time, urlState.end_time, @@ -213,6 +216,7 @@ export default function LogsPage() { start_time: newFilters.start_time ? dateUtils.toUnixTimestamp(new Date(newFilters.start_time)) : undefined, end_time: newFilters.end_time ? dateUtils.toUnixTimestamp(new Date(newFilters.end_time)) : undefined, missing_cost_only: newFilters.missing_cost_only ?? false, + cache_hit_types: newFilters.cache_hit_types || [], metadata_filters: newFilters.metadata_filters ? JSON.stringify(newFilters.metadata_filters) : "", offset: 0, }); diff --git a/ui/app/workspace/logs/sheets/logDetailView.tsx b/ui/app/workspace/logs/sheets/logDetailView.tsx index cf3ab9e902..67c83a8430 100644 --- a/ui/app/workspace/logs/sheets/logDetailView.tsx +++ b/ui/app/workspace/logs/sheets/logDetailView.tsx @@ -398,7 +398,7 @@ function HeroStat({ ); } -function CopyInlineButton({ text }: { text: string }) { +function CopyInlineButton({ text, testId }: { text: string; testId?: string }) { const { copy } = useCopyToClipboard({ successMessage: "Copied" }); return ( @@ -813,6 +814,22 @@ export function LogDetailView({ Async ) : null} + {log.cache_debug?.hit_type === "direct" ? ( + + Direct Cache + + ) : null} + {log.cache_debug?.hit_type === "semantic" ? ( + + Semantic Cache + + ) : null} {(log.is_large_payload_request || log.is_large_payload_response) && (
-
+
Request
{log.id || "—"} - {log.id ? : null} + {log.id ? : null}
- {(log.routing_rule || log.selected_key) && ( -
- {log.routing_rule ? ( - <> - matched rule{" "} - - “{log.routing_rule.name}” - - - ) : null} - {log.routing_rule && log.selected_key ? " · " : ""} - {log.selected_key ? ( - <> - key{" "} - - {log.selected_key.name} - - - ) : null} + {log.cache_debug?.cache_id && ( +
+
+ Cache {log.cache_debug.cache_hit ? "(hit)" : "(miss)"} +
+ + {log.cache_debug.cache_id} + + +
+ )} + {log.routing_rule && ( +
+
+ Rule +
+ + “{log.routing_rule.name}” + +
+ )} + {log.selected_key && ( +
+
+ Key +
+ + {log.selected_key.name} +
)}
diff --git a/ui/components/filters/logsFilterSidebar.tsx b/ui/components/filters/logsFilterSidebar.tsx index a235a563ce..7127ec2822 100644 --- a/ui/components/filters/logsFilterSidebar.tsx +++ b/ui/components/filters/logsFilterSidebar.tsx @@ -115,6 +115,7 @@ export function LogsFilterSidebar({ filters, onFiltersChange }: LogsSidebarProps + @@ -821,6 +822,38 @@ function CostFilter({ filters, onFiltersChange, defaultOpen }: FilterComponentPr ); } +// --------------------------------------------------------------------------- +// LocalCachingFilter – filter by semantic-cache hit type (direct / semantic) +// --------------------------------------------------------------------------- + +const LocalCachingOptions: { key: string; label: string }[] = [ + { key: "direct", label: "Direct cache" }, + { key: "semantic", label: "Semantic cache" }, +]; + +function LocalCachingFilter({ filters, onFiltersChange, defaultOpen }: FilterComponentProps) { + const hasActive = (filters.cache_hit_types || []).length > 0; + return ( + + {LocalCachingOptions.map((option) => ( + { + const current = filters.cache_hit_types || []; + const next = current.includes(option.key) + ? current.filter((t) => t !== option.key) + : [...current, option.key]; + onFiltersChange({ ...filters, cache_hit_types: next }); + }} + testId={`local-caching-filter-checkbox-${option.key}`} + /> + ))} + + ); +} + // --------------------------------------------------------------------------- // MetadataFilters – fetches metadata keys internally // --------------------------------------------------------------------------- diff --git a/ui/lib/constants/logs.ts b/ui/lib/constants/logs.ts index a4259b4d0b..32b42ec405 100644 --- a/ui/lib/constants/logs.ts +++ b/ui/lib/constants/logs.ts @@ -30,6 +30,25 @@ export type ProviderName = (typeof KnownProvidersNames)[number]; export const ProviderNames: readonly ProviderName[] = KnownProvidersNames; +// Built-in providers whose Bifrost implementation supports embedding requests. +// Custom providers must instead be checked via custom_provider_config.allowed_requests.embedding. +export const EmbeddingSupportedProviders: readonly ProviderName[] = [ + "azure", + "bedrock", + "cohere", + "fireworks", + "gemini", + "huggingface", + "mistral", + "nebius", + "ollama", + "openai", + "openrouter", + "sgl", + "vertex", + "vllm", +] as const; + export const Statuses = ["success", "error", "processing", "cancelled"] as const; export const RequestTypes = [ diff --git a/ui/lib/store/apis/logsApi.ts b/ui/lib/store/apis/logsApi.ts index c10fc91cd2..aa8b5d6630 100644 --- a/ui/lib/store/apis/logsApi.ts +++ b/ui/lib/store/apis/logsApi.ts @@ -68,6 +68,9 @@ function buildFilterParams(filters: LogFilters): Record if (filters.min_tokens !== undefined) params.min_tokens = filters.min_tokens; if (filters.max_tokens !== undefined) params.max_tokens = filters.max_tokens; if (filters.missing_cost_only) params.missing_cost_only = "true"; + if (filters.cache_hit_types && filters.cache_hit_types.length > 0) { + params.cache_hit_types = filters.cache_hit_types.join(","); + } if (filters.content_search) params.content_search = filters.content_search; if (filters.user_ids && filters.user_ids.length > 0) { params.user_ids = filters.user_ids.join(","); diff --git a/ui/lib/types/config.ts b/ui/lib/types/config.ts index ce4333e31c..ed72934ea9 100644 --- a/ui/lib/types/config.ts +++ b/ui/lib/types/config.ts @@ -527,12 +527,14 @@ export const DefaultCoreConfig: CoreConfig = { // Semantic cache configuration types interface BaseCacheConfig { - ttl_seconds: number; + ttl: number; threshold: number; conversation_history_threshold?: number; exclude_system_prompt?: boolean; cache_by_model: boolean; cache_by_provider: boolean; + vector_store_namespace?: string; + default_cache_key?: string; created_at?: string; updated_at?: string; } diff --git a/ui/lib/types/logs.ts b/ui/lib/types/logs.ts index 6b8059ca0e..68ec412578 100644 --- a/ui/lib/types/logs.ts +++ b/ui/lib/types/logs.ts @@ -585,6 +585,7 @@ export interface LogFilters { min_tokens?: number; max_tokens?: number; missing_cost_only?: boolean; + cache_hit_types?: string[]; // For filtering by local-cache hit type ("direct", "semantic") content_search?: string; metadata_filters?: Record; // key=metadataKey, value=metadataValue for filtering by metadata user_ids?: string[]; diff --git a/ui/lib/types/schemas.ts b/ui/lib/types/schemas.ts index 0393c8bae6..5fe0e5dc3b 100644 --- a/ui/lib/types/schemas.ts +++ b/ui/lib/types/schemas.ts @@ -701,12 +701,14 @@ export const updateProviderRequestSchema = z.object({ // Cache config schema const baseCacheConfigSchema = z.object({ - ttl_seconds: z.number().int().min(1).default(3600), + ttl: z.number().int().min(1).default(3600), threshold: z.number().min(0).max(1).default(0.8), conversation_history_threshold: z.number().int().min(0).optional(), exclude_system_prompt: z.boolean().optional(), cache_by_model: z.boolean().default(false), cache_by_provider: z.boolean().default(false), + vector_store_namespace: z.string().min(1).optional(), + default_cache_key: z.string().min(1).optional(), created_at: z.string().optional(), updated_at: z.string().optional(), }); From fe6eea8346bcbb308487da8acf7c1b2ecf295a5f Mon Sep 17 00:00:00 2001 From: Pratham Mishra <99235987+Pratham-Mishra04@users.noreply.github.com> Date: Wed, 13 May 2026 01:03:40 +0530 Subject: [PATCH 10/81] feat: remove `cleanup_on_shutdown` from semantic cache plugin config (#3330) ## Summary Removes the `cleanup_on_shutdown` option from the semantic cache plugin. Cache data now always persists between Bifrost restarts. The previous behavior of deleting all cache entries and the vector store namespace on shutdown is no longer supported. ## Changes - Removed `CleanUpOnShutdown` field from `Config` struct in `plugins/semanticcache/main.go` and stripped the corresponding shutdown deletion logic from `Cleanup()` - Removed `cleanup_on_shutdown` from the JSON config schema (`transports/config.schema.json`), Helm values schema (`helm-charts/bifrost/values.schema.json`), Helm template helper (`_helpers.tpl`), and default `values.yaml` - Removed `cleanup_on_shutdown` from all example Kubernetes values files and documentation code samples - Added migration guide entry (Breaking Change 16) in `docs/migration-guides/v1.5.0.mdx` describing the removal, how to clear cache data using the existing invalidation endpoints, and how to handle dimension/provider/model rotation without the old escape hatch - Updated the semantic caching feature docs to remove references to `cleanup_on_shutdown` and the associated warning block - Removed `TestCleanup_DeletesEntriesAndNamespaceWhenEnabled` test and simplified `newTestPlugin` helper to drop the `cleanupOnShutdown` parameter across all test files ## Type of change - [ ] Bug fix - [ ] Feature - [x] Refactor - [ ] Documentation - [ ] Chore/CI ## Affected areas - [ ] Core (Go) - [ ] Transports (HTTP) - [ ] Providers/Integrations - [x] Plugins - [ ] UI (React) - [x] Docs ## How to test ```sh go test ./plugins/semanticcache/... ``` Verify that passing `cleanup_on_shutdown` in a semantic cache plugin config is rejected by schema validation. Confirm that restarting Bifrost with a semantic cache configured leaves existing vector store entries intact. ## Breaking changes - [x] Yes - [ ] No The `cleanup_on_shutdown` field is removed from the semantic cache plugin config schema and will be rejected by validation. Remove it from `config.json`, Helm values, and any `PUT /api/config` payloads. To clear cache data, use `DELETE /api/cache/clear/{cacheId}`, `DELETE /api/cache/clear-by-key/{cacheKey}`, or rotate `vector_store_namespace` to a fresh name. ## Related issues See Breaking Change 16 in the v1.5.0 migration guide. ## Security considerations None. ## Checklist - [ ] I read `docs/contributing/README.md` and followed the guidelines - [x] I added/updated tests where appropriate - [x] I updated documentation where needed - [ ] I verified builds succeed (Go and UI) - [ ] I verified the CI pipeline passes locally if applicable --- .../scripts/validate-helm-config-fields.sh | 2 - core/providers/anthropic/types.go | 18 +++--- docs/deployment-guides/helm/plugins.mdx | 1 - docs/features/semantic-caching.mdx | 8 --- docs/migration-guides/v1.5.0.mdx | 54 ++++++++++++++++ .../values-semantic-search-redis.yaml | 1 - .../values-semantic-search-weaviate.yaml | 1 - helm-charts/bifrost/templates/_helpers.tpl | 3 - helm-charts/bifrost/values.schema.json | 3 - helm-charts/bifrost/values.yaml | 1 - .../semanticcache/config_unmarshal_test.go | 44 ++++++------- plugins/semanticcache/main.go | 38 ------------ plugins/semanticcache/plugin_api_test.go | 61 +++++++------------ plugins/semanticcache/plugin_core_test.go | 18 +++--- .../semanticcache/plugin_no_mutation_test.go | 2 - plugins/semanticcache/plugin_paths_test.go | 30 ++++----- .../semanticcache/plugin_vectorstore_test.go | 1 - plugins/semanticcache/test_utils.go | 15 ++--- transports/bifrost-http/handlers/plugins.go | 2 +- transports/config.schema.json | 4 -- 20 files changed, 132 insertions(+), 175 deletions(-) diff --git a/.github/workflows/scripts/validate-helm-config-fields.sh b/.github/workflows/scripts/validate-helm-config-fields.sh index 352ef88cde..11de5c7356 100755 --- a/.github/workflows/scripts/validate-helm-config-fields.sh +++ b/.github/workflows/scripts/validate-helm-config-fields.sh @@ -637,7 +637,6 @@ bifrost: cache_by_model: true cache_by_provider: false exclude_system_prompt: true - cleanup_on_shutdown: true vector_store_namespace: "bifrost-cache" otel: enabled: true @@ -710,7 +709,6 @@ assert_field_value 'plugins: semantic_cache conversation_history_threshold' '.pl assert_field_value 'plugins: semantic_cache cache_by_model' '.plugins.[4].config.cache_by_model' 'true' assert_field_value 'plugins: semantic_cache cache_by_provider' '.plugins.[4].config.cache_by_provider' 'false' assert_field_value 'plugins: semantic_cache exclude_system_prompt' '.plugins.[4].config.exclude_system_prompt' 'true' -assert_field_value 'plugins: semantic_cache cleanup_on_shutdown' '.plugins.[4].config.cleanup_on_shutdown' 'true' assert_field_value 'plugins: semantic_cache vector_store_namespace' '.plugins.[4].config.vector_store_namespace' '"bifrost-cache"' # OTEL plugin diff --git a/core/providers/anthropic/types.go b/core/providers/anthropic/types.go index 35874f658a..08eb9a7111 100644 --- a/core/providers/anthropic/types.go +++ b/core/providers/anthropic/types.go @@ -1200,18 +1200,18 @@ const ( type AnthropicToolName string const ( - AnthropicToolNameComputer AnthropicToolName = "computer" - AnthropicToolNameWebSearch AnthropicToolName = "web_search" - AnthropicToolNameWebFetch AnthropicToolName = "web_fetch" - AnthropicToolNameBash AnthropicToolName = "bash" - AnthropicToolNameTextEditor AnthropicToolName = "str_replace_based_edit_tool" + AnthropicToolNameComputer AnthropicToolName = "computer" + AnthropicToolNameWebSearch AnthropicToolName = "web_search" + AnthropicToolNameWebFetch AnthropicToolName = "web_fetch" + AnthropicToolNameBash AnthropicToolName = "bash" + AnthropicToolNameTextEditor AnthropicToolName = "str_replace_based_edit_tool" // AnthropicToolNameTextEditorLegacy is the name required for text_editor_20250124 // and text_editor_20250429. Newer text_editor_20250728+ use AnthropicToolNameTextEditor. AnthropicToolNameTextEditorLegacy AnthropicToolName = "str_replace_editor" - AnthropicToolNameCodeExecution AnthropicToolName = "code_execution" - AnthropicToolNameMemory AnthropicToolName = "memory" - AnthropicToolNameToolSearchBM25 AnthropicToolName = "tool_search_tool_bm25" - AnthropicToolNameToolSearchRegex AnthropicToolName = "tool_search_tool_regex" + AnthropicToolNameCodeExecution AnthropicToolName = "code_execution" + AnthropicToolNameMemory AnthropicToolName = "memory" + AnthropicToolNameToolSearchBM25 AnthropicToolName = "tool_search_tool_bm25" + AnthropicToolNameToolSearchRegex AnthropicToolName = "tool_search_tool_regex" ) type AnthropicToolComputerUse struct { diff --git a/docs/deployment-guides/helm/plugins.mdx b/docs/deployment-guides/helm/plugins.mdx index 887cf85600..c0f8b695b8 100644 --- a/docs/deployment-guides/helm/plugins.mdx +++ b/docs/deployment-guides/helm/plugins.mdx @@ -178,7 +178,6 @@ Two modes: | `bifrost.plugins.semanticCache.config.cache_by_model` | Include model name in cache key | `true` | | `bifrost.plugins.semanticCache.config.cache_by_provider` | Include provider name in cache key | `true` | | `bifrost.plugins.semanticCache.config.exclude_system_prompt` | Exclude system prompt from cache key | `false` | -| `bifrost.plugins.semanticCache.config.cleanup_on_shutdown` | Delete cache data on pod shutdown | `false` | **Semantic mode (with OpenAI embeddings + Weaviate):** diff --git a/docs/features/semantic-caching.mdx b/docs/features/semantic-caching.mdx index 5413649ea2..d2eb9b2cba 100644 --- a/docs/features/semantic-caching.mdx +++ b/docs/features/semantic-caching.mdx @@ -198,7 +198,6 @@ bifrostConfig := schemas.BifrostConfig{ "embedding_model": "text-embedding-3-small", "dimension": 1536, - "cleanup_on_shutdown": true, "ttl": "5m", "threshold": 0.8, @@ -281,7 +280,6 @@ bifrost: config: dimension: 1 ttl: "5m" - cleanup_on_shutdown: true cache_by_model: true cache_by_provider: true ``` @@ -299,7 +297,6 @@ bifrost: "config": { "dimension": 1, "ttl": "5m", - "cleanup_on_shutdown": true, "cache_by_model": true, "cache_by_provider": true } @@ -671,16 +668,11 @@ The semantic cache automatically handles cleanup to prevent storage bloat: - Clear all entries for a cache key - Restart Bifrost to clear all cache data - -The semantic cache namespace and all its cache entries are deleted when Bifrost client shuts down **only if `cleanup_on_shutdown` is set to `true`**. By default (`cleanup_on_shutdown: false`), cache data persists between restarts. DO NOT use the plugin's namespace for external purposes. - - **Dimension / Provider / Model Changes**: A vector store namespace can only hold vectors of **one** dimension. If you change `dimension` (or switch to an embedding `provider`/`model` that produces a different vector size), the existing namespace is **not** recreated automatically — `CreateNamespace` is a no-op when the class/collection already exists. Subsequent writes will fail (vector-size mismatch) and reads will silently miss. Before saving the change, either: - point `vector_store_namespace` at a fresh name, or - drop the existing class/index in your vector store, or -- set `cleanup_on_shutdown: true` and restart so the old namespace is removed first. --- diff --git a/docs/migration-guides/v1.5.0.mdx b/docs/migration-guides/v1.5.0.mdx index 95d4a49dd8..0cf4eca096 100644 --- a/docs/migration-guides/v1.5.0.mdx +++ b/docs/migration-guides/v1.5.0.mdx @@ -751,6 +751,56 @@ If you parse `cache_debug` and assumed it was either absent or had `cache_hit: t --- +## Breaking Change 16: Semantic Cache `cleanup_on_shutdown` Removed + +The `cleanup_on_shutdown` option on the semantic cache plugin config has been removed. Cache entries and the vector store namespace are no longer deleted when Bifrost shuts down — cache data always persists between restarts. + +**Before:** +```json +{ + "plugins": { + "semantic_cache": { + "config": { + "ttl": "5m", + "cleanup_on_shutdown": true + } + } + } +} +``` + +**After:** +```json +{ + "plugins": { + "semantic_cache": { + "config": { + "ttl": "5m" + } + } + } +} +``` + +The field is no longer part of the config schema and will be rejected by validation. Remove it from `config.json`, Helm values, and any `PUT /api/config` payloads. + +### How to clear cache data + +If you previously relied on `cleanup_on_shutdown: true` to drop the cache on restart, use one of the supported invalidation paths instead: + +- `DELETE /api/cache/clear/{cacheId}` — invalidate a single entry +- `DELETE /api/cache/clear-by-key/{cacheKey}` — invalidate all entries for a cache key +- Drop the vector store class/collection or point `vector_store_namespace` at a fresh name to start clean + +### Dimension / provider / model changes + +The previous `cleanup_on_shutdown: true` + restart workflow was the documented escape hatch for changing `dimension` (or switching to an embedding `provider`/`model` that produces a different vector size). That option is gone. To rotate the namespace now, either: + +- point `vector_store_namespace` at a fresh name, or +- drop the existing class/index in your vector store before restarting + +--- + ## Opting Out: `version: 1` Compatibility Mode If you are not ready to adopt the new deny-by-default semantics, you can add a single field to `config.json` to restore v1.4.x behavior for all allow-list fields loaded from that file: @@ -853,6 +903,10 @@ Remove `allow_direct_keys` from `config.json` and any `PUT /api/config` payloads Replace `DELETE /api/cache/clear/{requestId}` with `DELETE /api/cache/clear/{cacheId}`, and replace `plugin.ClearCacheForRequestID(...)` with `plugin.ClearCacheForCacheID(...)`. Read the cache ID from `extra_fields.cache_debug.cache_id` on the response (now populated on misses too). + + +Drop the `cleanup_on_shutdown` field from the semantic cache plugin config in `config.json`, Helm values, and any API payloads — it is no longer part of the schema. Cache data now always persists across restarts; use the cache clear endpoints or rotate `vector_store_namespace` to drop entries. + --- diff --git a/examples/k8s/examples/values-semantic-search-redis.yaml b/examples/k8s/examples/values-semantic-search-redis.yaml index 4d9cd18224..8867162e2f 100644 --- a/examples/k8s/examples/values-semantic-search-redis.yaml +++ b/examples/k8s/examples/values-semantic-search-redis.yaml @@ -18,7 +18,6 @@ bifrost: cache_by_model: true cache_by_provider: true exclude_system_prompt: true - cleanup_on_shutdown: false vector_store_namespace: "bifrost-semantic-cache" vectorStore: diff --git a/examples/k8s/examples/values-semantic-search-weaviate.yaml b/examples/k8s/examples/values-semantic-search-weaviate.yaml index 7691106be7..0d00d87d1a 100644 --- a/examples/k8s/examples/values-semantic-search-weaviate.yaml +++ b/examples/k8s/examples/values-semantic-search-weaviate.yaml @@ -18,7 +18,6 @@ bifrost: cache_by_model: true cache_by_provider: true exclude_system_prompt: false - cleanup_on_shutdown: false vector_store_namespace: "bifrost-semantic-cache" vectorStore: diff --git a/helm-charts/bifrost/templates/_helpers.tpl b/helm-charts/bifrost/templates/_helpers.tpl index 9849b760f4..1e5c6312ed 100644 --- a/helm-charts/bifrost/templates/_helpers.tpl +++ b/helm-charts/bifrost/templates/_helpers.tpl @@ -1100,9 +1100,6 @@ false {{- if hasKey $inputConfig "exclude_system_prompt" }} {{- $_ := set $scConfig "exclude_system_prompt" $inputConfig.exclude_system_prompt }} {{- end }} -{{- if hasKey $inputConfig "cleanup_on_shutdown" }} -{{- $_ := set $scConfig "cleanup_on_shutdown" $inputConfig.cleanup_on_shutdown }} -{{- end }} {{- $plugin := dict "enabled" true "name" "semantic_cache" "config" $scConfig }} {{- if hasKey .Values.bifrost.plugins.semanticCache "version" }}{{- $_ := set $plugin "version" (.Values.bifrost.plugins.semanticCache.version | int) }}{{- end }} {{- $plugins = append $plugins $plugin }} diff --git a/helm-charts/bifrost/values.schema.json b/helm-charts/bifrost/values.schema.json index 9507bb8a97..bf5ad7a822 100644 --- a/helm-charts/bifrost/values.schema.json +++ b/helm-charts/bifrost/values.schema.json @@ -817,9 +817,6 @@ "exclude_system_prompt": { "type": "boolean" }, - "cleanup_on_shutdown": { - "type": "boolean" - }, "vector_store_namespace": { "type": "string" }, diff --git a/helm-charts/bifrost/values.yaml b/helm-charts/bifrost/values.yaml index b4d85cf890..65690fb1f8 100644 --- a/helm-charts/bifrost/values.yaml +++ b/helm-charts/bifrost/values.yaml @@ -420,7 +420,6 @@ bifrost: cache_by_model: true cache_by_provider: true exclude_system_prompt: false - cleanup_on_shutdown: false vector_store_namespace: "" otel: diff --git a/plugins/semanticcache/config_unmarshal_test.go b/plugins/semanticcache/config_unmarshal_test.go index d38ad31fc6..5d2edd44f1 100644 --- a/plugins/semanticcache/config_unmarshal_test.go +++ b/plugins/semanticcache/config_unmarshal_test.go @@ -43,7 +43,6 @@ func TestUnmarshalJSON_AllFields(t *testing.T) { input := `{ "provider": "openai", "embedding_model": "text-embedding-3-small", - "cleanup_on_shutdown": true, "dimension": 1536, "ttl": "10m", "threshold": 0.9, @@ -66,9 +65,6 @@ func TestUnmarshalJSON_AllFields(t *testing.T) { if config.EmbeddingModel != "text-embedding-3-small" { t.Errorf("EmbeddingModel: expected %q, got %q", "text-embedding-3-small", config.EmbeddingModel) } - if !config.CleanUpOnShutdown { - t.Error("CleanUpOnShutdown: expected true") - } if config.Dimension != 1536 { t.Errorf("Dimension: expected 1536, got %d", config.Dimension) } @@ -136,32 +132,32 @@ func TestUnmarshalJSON_TTLFormats(t *testing.T) { func TestUnmarshalJSON_BoolPointerFields(t *testing.T) { tests := []struct { - name string - json string - expectCacheByModel *bool - expectCacheByProv *bool - expectExcludeSys *bool + name string + json string + expectCacheByModel *bool + expectCacheByProv *bool + expectExcludeSys *bool }{ { - name: "all set to true", - json: `{"dimension": 1536, "cache_by_model": true, "cache_by_provider": true, "exclude_system_prompt": true}`, - expectCacheByModel: bifrost.Ptr(true), - expectCacheByProv: bifrost.Ptr(true), - expectExcludeSys: bifrost.Ptr(true), + name: "all set to true", + json: `{"dimension": 1536, "cache_by_model": true, "cache_by_provider": true, "exclude_system_prompt": true}`, + expectCacheByModel: bifrost.Ptr(true), + expectCacheByProv: bifrost.Ptr(true), + expectExcludeSys: bifrost.Ptr(true), }, { - name: "all set to false", - json: `{"dimension": 1536, "cache_by_model": false, "cache_by_provider": false, "exclude_system_prompt": false}`, - expectCacheByModel: bifrost.Ptr(false), - expectCacheByProv: bifrost.Ptr(false), - expectExcludeSys: bifrost.Ptr(false), + name: "all set to false", + json: `{"dimension": 1536, "cache_by_model": false, "cache_by_provider": false, "exclude_system_prompt": false}`, + expectCacheByModel: bifrost.Ptr(false), + expectCacheByProv: bifrost.Ptr(false), + expectExcludeSys: bifrost.Ptr(false), }, { - name: "all omitted", - json: `{"dimension": 1536}`, - expectCacheByModel: nil, - expectCacheByProv: nil, - expectExcludeSys: nil, + name: "all omitted", + json: `{"dimension": 1536}`, + expectCacheByModel: nil, + expectCacheByProv: nil, + expectExcludeSys: nil, }, } diff --git a/plugins/semanticcache/main.go b/plugins/semanticcache/main.go index 798c98a52d..9fefc497f2 100644 --- a/plugins/semanticcache/main.go +++ b/plugins/semanticcache/main.go @@ -31,7 +31,6 @@ type Config struct { EmbeddingModel string `json:"embedding_model,omitempty"` // Model to use for generating embeddings (optional) // Plugin behavior settings - CleanUpOnShutdown bool `json:"cleanup_on_shutdown,omitempty"` // Clean up cache on shutdown (default: false) TTL time.Duration `json:"ttl,omitempty"` // Time-to-live for cached responses (default: 5min) Threshold float64 `json:"threshold,omitempty"` // Cosine similarity threshold for semantic matching (0 = unset → default 0.8) VectorStoreNamespace string `json:"vector_store_namespace,omitempty"` // Namespace for vector store (optional) @@ -684,43 +683,6 @@ func (plugin *Plugin) Cleanup() error { // and stopCh is still in memory. This call evicts those before we return. plugin.cleanupOldStreamAccumulators() - // Only clean up cache entries if configured to do so - if !plugin.config.CleanUpOnShutdown { - plugin.logger.Debug("Cleanup on shutdown is disabled, skipping cache cleanup") - return nil - } - - // Clean up all cache entries created by this plugin - ctx, cancel := context.WithTimeout(context.Background(), CacheSetTimeout) - defer cancel() - - plugin.logger.Debug("Starting cleanup of cache entries...") - - // Delete all cache entries created by this plugin - queries := []vectorstore.Query{ - { - Field: "from_bifrost_semantic_cache_plugin", - Operator: vectorstore.QueryOperatorEqual, - Value: true, - }, - } - - results, err := plugin.store.DeleteAll(ctx, plugin.config.VectorStoreNamespace, queries) - if err != nil { - return fmt.Errorf("failed to delete cache entries: %w", err) - } - - for _, result := range results { - if result.Status == vectorstore.DeleteStatusError { - plugin.logger.Warn("Failed to delete cache entry: %s", result.Error) - } - } - plugin.logger.Debug("Cleanup completed - deleted all cache entries") - - if err := plugin.store.DeleteNamespace(ctx, plugin.config.VectorStoreNamespace); err != nil { - return fmt.Errorf("failed to delete namespace: %w", err) - } - return nil } diff --git a/plugins/semanticcache/plugin_api_test.go b/plugins/semanticcache/plugin_api_test.go index 908e88149d..78dbbd45af 100644 --- a/plugins/semanticcache/plugin_api_test.go +++ b/plugins/semanticcache/plugin_api_test.go @@ -16,15 +16,15 @@ import ( // Delete / DeleteAll / DeleteNamespace calls so the tests can assert on the // public Clear* APIs and on Cleanup teardown behavior. type observableStore struct { - mu sync.Mutex - chunks map[string]vectorstore.SearchResult - addIDs []string - deleteIDs []string - deleteAllQueries [][]vectorstore.Query - namespaceDeletes int - deleteAllErr error - deleteErr error - deleteAllResults []vectorstore.DeleteResult + mu sync.Mutex + chunks map[string]vectorstore.SearchResult + addIDs []string + deleteIDs []string + deleteAllQueries [][]vectorstore.Query + namespaceDeletes int + deleteAllErr error + deleteErr error + deleteAllResults []vectorstore.DeleteResult } func newObservableStore() *observableStore { @@ -85,10 +85,9 @@ func (s *observableStore) DeleteAll(ctx context.Context, ns string, queries []ve } func (s *observableStore) Close(ctx context.Context, ns string) error { return nil } -func newTestPlugin(t *testing.T, store vectorstore.VectorStore, cleanupOnShutdown bool) *Plugin { +func newTestPlugin(t *testing.T, store vectorstore.VectorStore) *Plugin { t.Helper() cfg := getDefaultTestConfig() - cfg.CleanUpOnShutdown = cleanupOnShutdown return &Plugin{ store: store, config: cfg, @@ -102,7 +101,7 @@ func newTestPlugin(t *testing.T, store vectorstore.VectorStore, cleanupOnShutdow // ----------------------------------------------------------------------------- func TestClearCacheForCacheID_EmptyIDRejected(t *testing.T) { - plugin := newTestPlugin(t, newObservableStore(), false) + plugin := newTestPlugin(t, newObservableStore()) if err := plugin.ClearCacheForCacheID(""); err == nil { t.Fatal("expected error for empty cache ID") } @@ -110,7 +109,7 @@ func TestClearCacheForCacheID_EmptyIDRejected(t *testing.T) { func TestClearCacheForCacheID_PointDelete(t *testing.T) { store := newObservableStore() - plugin := newTestPlugin(t, store, false) + plugin := newTestPlugin(t, store) if err := plugin.ClearCacheForCacheID("cache-abc"); err != nil { t.Fatalf("ClearCacheForCacheID failed: %v", err) @@ -128,7 +127,7 @@ func TestClearCacheForCacheID_PointDelete(t *testing.T) { func TestClearCacheForKey_FiltersByCacheKeyAndPluginMarker(t *testing.T) { store := newObservableStore() - plugin := newTestPlugin(t, store, false) + plugin := newTestPlugin(t, store) if err := plugin.ClearCacheForKey("session-42"); err != nil { t.Fatalf("ClearCacheForKey failed: %v", err) @@ -162,7 +161,7 @@ func TestClearCacheForKey_FiltersByCacheKeyAndPluginMarker(t *testing.T) { // ----------------------------------------------------------------------------- func TestStampCacheDebugForMiss_AlwaysSetsCacheID(t *testing.T) { - plugin := newTestPlugin(t, newObservableStore(), false) + plugin := newTestPlugin(t, newObservableStore()) state := &cacheState{} extra := &schemas.BifrostResponseExtraFields{} @@ -184,7 +183,7 @@ func TestStampCacheDebugForMiss_AlwaysSetsCacheID(t *testing.T) { } func TestStampCacheDebugForMiss_AddsTelemetryWhenSemanticRan(t *testing.T) { - plugin := newTestPlugin(t, newObservableStore(), false) + plugin := newTestPlugin(t, newObservableStore()) state := &cacheState{EmbeddingsInputTokens: 42} extra := &schemas.BifrostResponseExtraFields{} @@ -199,7 +198,7 @@ func TestStampCacheDebugForMiss_AddsTelemetryWhenSemanticRan(t *testing.T) { } func TestStampCacheDebugForMiss_StreamSkipsNonFinalChunks(t *testing.T) { - plugin := newTestPlugin(t, newObservableStore(), false) + plugin := newTestPlugin(t, newObservableStore()) state := &cacheState{} extra := &schemas.BifrostResponseExtraFields{} @@ -216,7 +215,7 @@ func TestStampCacheDebugForMiss_StreamSkipsNonFinalChunks(t *testing.T) { func TestCleanup_SkipsEntryDeletionWhenDisabled(t *testing.T) { store := newObservableStore() - plugin := newTestPlugin(t, store, false) // CleanUpOnShutdown=false + plugin := newTestPlugin(t, store) // CleanUpOnShutdown=false if err := plugin.Cleanup(); err != nil { t.Fatalf("Cleanup failed: %v", err) @@ -232,26 +231,8 @@ func TestCleanup_SkipsEntryDeletionWhenDisabled(t *testing.T) { } } -func TestCleanup_DeletesEntriesAndNamespaceWhenEnabled(t *testing.T) { - store := newObservableStore() - plugin := newTestPlugin(t, store, true) // CleanUpOnShutdown=true - - if err := plugin.Cleanup(); err != nil { - t.Fatalf("Cleanup failed: %v", err) - } - - store.mu.Lock() - defer store.mu.Unlock() - if len(store.deleteAllQueries) != 1 { - t.Fatalf("expected one DeleteAll call, got %d", len(store.deleteAllQueries)) - } - if store.namespaceDeletes != 1 { - t.Fatalf("expected one DeleteNamespace call, got %d", store.namespaceDeletes) - } -} - func TestCleanup_DrainsPendingWriters(t *testing.T) { - plugin := newTestPlugin(t, newObservableStore(), false) + plugin := newTestPlugin(t, newObservableStore()) var done atomic.Bool plugin.writersWg.Add(1) @@ -274,7 +255,7 @@ func TestCleanup_DrainsPendingWriters(t *testing.T) { // ----------------------------------------------------------------------------- func TestCleanupOldCacheStates_ReapsOldEntries(t *testing.T) { - plugin := newTestPlugin(t, newObservableStore(), false) + plugin := newTestPlugin(t, newObservableStore()) plugin.cacheStates.Store("old-1", &cacheState{CreatedAt: time.Now().Add(-2 * cacheStateMaxAge)}) plugin.cacheStates.Store("old-2", &cacheState{CreatedAt: time.Now().Add(-2 * cacheStateMaxAge)}) @@ -298,7 +279,7 @@ func TestCleanupOldCacheStates_ReapsOldEntries(t *testing.T) { // ----------------------------------------------------------------------------- func TestCleanupOldStreamAccumulators_ReapsByLastSeenAt(t *testing.T) { - plugin := newTestPlugin(t, newObservableStore(), false) + plugin := newTestPlugin(t, newObservableStore()) plugin.streamAccumulators.Store("old", &StreamAccumulator{ RequestID: "old", @@ -324,7 +305,7 @@ func TestCleanupOldStreamAccumulators_ReapsByLastSeenAt(t *testing.T) { // ----------------------------------------------------------------------------- func TestBuildStreamingResponseFromResult_ConsumerAbandonment(t *testing.T) { - plugin := newTestPlugin(t, newObservableStore(), false) + plugin := newTestPlugin(t, newObservableStore()) // Build a cached entry with multiple chunks. chunkJSON := `{"chat_response":{"choices":[]}}` diff --git a/plugins/semanticcache/plugin_core_test.go b/plugins/semanticcache/plugin_core_test.go index e0f88464de..a280f662f6 100644 --- a/plugins/semanticcache/plugin_core_test.go +++ b/plugins/semanticcache/plugin_core_test.go @@ -581,11 +581,10 @@ func TestInvalidProviderRejection(t *testing.T) { for _, provider := range unsupportedProviders { t.Run(string(provider), func(t *testing.T) { config := &Config{ - Provider: provider, - EmbeddingModel: "some-model", - Dimension: 1536, - Threshold: 0.8, - CleanUpOnShutdown: false, + Provider: provider, + EmbeddingModel: "some-model", + Dimension: 1536, + Threshold: 0.8, } // Provider validation was moved to request time (global client handles it). @@ -608,11 +607,10 @@ func TestValidProviderAccepted(t *testing.T) { // Test a supported provider (OpenAI) config := &Config{ - Provider: schemas.OpenAI, - EmbeddingModel: "text-embedding-3-small", - Dimension: 1536, - Threshold: 0.8, - CleanUpOnShutdown: false, + Provider: schemas.OpenAI, + EmbeddingModel: "text-embedding-3-small", + Dimension: 1536, + Threshold: 0.8, } // Init should succeed; provider validation happens at request time via the global client. diff --git a/plugins/semanticcache/plugin_no_mutation_test.go b/plugins/semanticcache/plugin_no_mutation_test.go index 340b4fdd9a..d0a65b681f 100644 --- a/plugins/semanticcache/plugin_no_mutation_test.go +++ b/plugins/semanticcache/plugin_no_mutation_test.go @@ -88,8 +88,6 @@ func TestCachingDoesNotMutateRequestSentToProvider(t *testing.T) { Threshold: 0.8, ConversationHistoryThreshold: DefaultConversationHistoryThreshold, VectorStoreNamespace: SharedTestNamespace, - // Do NOT clean up on shutdown — other parallel tests share the namespace. - CleanUpOnShutdown: false, } if err := ensureSharedTestNamespace(context.Background(), store, cfg.Dimension); err != nil { t.Fatalf("ensureSharedTestNamespace: %v", err) diff --git a/plugins/semanticcache/plugin_paths_test.go b/plugins/semanticcache/plugin_paths_test.go index 5ca1ac8c7a..fc6f54920b 100644 --- a/plugins/semanticcache/plugin_paths_test.go +++ b/plugins/semanticcache/plugin_paths_test.go @@ -21,7 +21,7 @@ import ( func TestPostLLMHook_SkipsOnBifrostError(t *testing.T) { store := newObservableStore() - plugin := newTestPlugin(t, store, false) + plugin := newTestPlugin(t, store) ctx := newBaseTestContext() ctx.SetValue(CacheKey, keyForTest(t, "")) @@ -61,7 +61,7 @@ func TestPostLLMHook_SkipsOnBifrostError(t *testing.T) { // ----------------------------------------------------------------------------- func TestShouldSkipCaching_LargePayloadMode(t *testing.T) { - plugin := newTestPlugin(t, newObservableStore(), false) + plugin := newTestPlugin(t, newObservableStore()) ctx := newBaseTestContext() ctx.SetValue(schemas.BifrostContextKeyLargePayloadMode, true) @@ -73,7 +73,7 @@ func TestShouldSkipCaching_LargePayloadMode(t *testing.T) { } func TestShouldSkipCaching_LargeResponseMode(t *testing.T) { - plugin := newTestPlugin(t, newObservableStore(), false) + plugin := newTestPlugin(t, newObservableStore()) ctx := newBaseTestContext() ctx.SetValue(schemas.BifrostContextKeyLargeResponseMode, true) @@ -85,7 +85,7 @@ func TestShouldSkipCaching_LargeResponseMode(t *testing.T) { } func TestShouldSkipCaching_CacheHitReplay(t *testing.T) { - plugin := newTestPlugin(t, newObservableStore(), false) + plugin := newTestPlugin(t, newObservableStore()) ctx := newBaseTestContext() res := &schemas.BifrostResponse{ @@ -102,7 +102,7 @@ func TestShouldSkipCaching_CacheHitReplay(t *testing.T) { } func TestShouldSkipCaching_NoStoreFlag(t *testing.T) { - plugin := newTestPlugin(t, newObservableStore(), false) + plugin := newTestPlugin(t, newObservableStore()) ctx := newBaseTestContext() ctx.SetValue(CacheNoStoreKey, true) @@ -162,7 +162,7 @@ func TestInit_AllowsDirectOnlyMode(t *testing.T) { // ----------------------------------------------------------------------------- func TestPreLLMHook_FallsBackToDirectWhenExecutorMissing(t *testing.T) { - plugin := newTestPlugin(t, newObservableStore(), false) + plugin := newTestPlugin(t, newObservableStore()) // Intentionally do NOT set plugin.embeddingRequestExecutor. req := &schemas.BifrostRequest{ @@ -197,7 +197,7 @@ func TestPreLLMHook_FallsBackToDirectWhenExecutorMissing(t *testing.T) { func TestExpiredEntry_DetectedAndDeleted(t *testing.T) { store := newObservableStore() - plugin := newTestPlugin(t, store, false) + plugin := newTestPlugin(t, store) // Plant an already-expired entry under a deterministic ID. expiredID := "expired-id-1" @@ -295,7 +295,7 @@ func TestUnmarshalJSON_RejectsBadDurationString(t *testing.T) { // ----------------------------------------------------------------------------- func TestStreamReplay_CancelImmediately(t *testing.T) { - plugin := newTestPlugin(t, newObservableStore(), false) + plugin := newTestPlugin(t, newObservableStore()) chunk := `{"chat_response":{"choices":[]}}` streamArray := []string{chunk, chunk, chunk} @@ -331,7 +331,7 @@ func TestStreamReplay_CancelImmediately(t *testing.T) { } func TestStreamReplay_FullDrain(t *testing.T) { - plugin := newTestPlugin(t, newObservableStore(), false) + plugin := newTestPlugin(t, newObservableStore()) chunk := `{"chat_response":{"choices":[]}}` streamArray := []string{chunk, chunk, chunk} @@ -379,7 +379,7 @@ func scopedTestContext(t testing.TB, suffix string) *schemas.BifrostContext { func TestPreLLMHook_EmitsPluginLogOnEmbeddingFailure(t *testing.T) { store := newObservableStore() - plugin := newTestPlugin(t, store, false) + plugin := newTestPlugin(t, store) plugin.SetEmbeddingRequestExecutor(func(_ *schemas.BifrostContext, _ *schemas.BifrostEmbeddingRequest) (*schemas.BifrostEmbeddingResponse, *schemas.BifrostError) { return nil, &schemas.BifrostError{Error: &schemas.ErrorField{Message: "rate limit exceeded"}} }) @@ -438,7 +438,7 @@ func TestPreLLMHook_NoDebugLogsOnFlow(t *testing.T) { // flow (hit/miss). cache_debug already conveys that. Only Warn-level // failure logs should appear on the response. store := newObservableStore() - plugin := newTestPlugin(t, store, false) + plugin := newTestPlugin(t, store) req := &schemas.BifrostRequest{ RequestType: schemas.ChatCompletionRequest, @@ -461,7 +461,7 @@ func TestPreLLMHook_NoDebugLogsOnFlow(t *testing.T) { } func TestResolveCacheTypes_EmitsPluginLogOnInvalidValue(t *testing.T) { - plugin := newTestPlugin(t, newObservableStore(), false) + plugin := newTestPlugin(t, newObservableStore()) ctx := scopedTestContext(t, "") ctx.SetValue(CacheTypeKey, "not-a-cache-type") // wrong type @@ -484,7 +484,7 @@ func TestResolveCacheTypes_EmitsPluginLogOnInvalidValue(t *testing.T) { // ----------------------------------------------------------------------------- func TestGenerateEmbedding_AcceptsInt8Array(t *testing.T) { - plugin := newTestPlugin(t, newObservableStore(), false) + plugin := newTestPlugin(t, newObservableStore()) plugin.SetEmbeddingRequestExecutor(func(_ *schemas.BifrostContext, _ *schemas.BifrostEmbeddingRequest) (*schemas.BifrostEmbeddingResponse, *schemas.BifrostError) { return &schemas.BifrostEmbeddingResponse{ Data: []schemas.EmbeddingData{{ @@ -507,7 +507,7 @@ func TestGenerateEmbedding_AcceptsInt8Array(t *testing.T) { } func TestGenerateEmbedding_AcceptsInt32Array(t *testing.T) { - plugin := newTestPlugin(t, newObservableStore(), false) + plugin := newTestPlugin(t, newObservableStore()) plugin.SetEmbeddingRequestExecutor(func(_ *schemas.BifrostContext, _ *schemas.BifrostEmbeddingRequest) (*schemas.BifrostEmbeddingResponse, *schemas.BifrostError) { return &schemas.BifrostEmbeddingResponse{ Data: []schemas.EmbeddingData{{ @@ -534,7 +534,7 @@ func TestGenerateEmbedding_AcceptsInt32Array(t *testing.T) { // ----------------------------------------------------------------------------- func TestPreLLMHook_ConcurrentSameRequestID(t *testing.T) { - plugin := newTestPlugin(t, newObservableStore(), false) + plugin := newTestPlugin(t, newObservableStore()) req := &schemas.BifrostRequest{ RequestType: schemas.ChatCompletionRequest, diff --git a/plugins/semanticcache/plugin_vectorstore_test.go b/plugins/semanticcache/plugin_vectorstore_test.go index 6d29f08c8b..3e93cc8baa 100644 --- a/plugins/semanticcache/plugin_vectorstore_test.go +++ b/plugins/semanticcache/plugin_vectorstore_test.go @@ -55,7 +55,6 @@ func getDefaultTestConfig() *Config { EmbeddingModel: "text-embedding-3-small", Dimension: 1536, Threshold: 0.8, - CleanUpOnShutdown: true, ConversationHistoryThreshold: DefaultConversationHistoryThreshold, } } diff --git a/plugins/semanticcache/test_utils.go b/plugins/semanticcache/test_utils.go index 6a506f51a4..d9b4084926 100644 --- a/plugins/semanticcache/test_utils.go +++ b/plugins/semanticcache/test_utils.go @@ -415,11 +415,10 @@ type TestSetup struct { // NewTestSetup creates a new test setup with default configuration func NewTestSetup(t *testing.T) *TestSetup { return NewTestSetupWithConfig(t, &Config{ - Provider: schemas.OpenAI, - EmbeddingModel: "text-embedding-3-small", - Dimension: 1536, - Threshold: 0.8, - CleanUpOnShutdown: true, + Provider: schemas.OpenAI, + EmbeddingModel: "text-embedding-3-small", + Dimension: 1536, + Threshold: 0.8, }) } @@ -462,9 +461,6 @@ func NewTestSetupWithVectorStore(t *testing.T, config *Config, storeType vectors if config.VectorStoreNamespace == "" { config.VectorStoreNamespace = SharedTestNamespace } - // Tests must NOT delete the shared namespace at cleanup — other parallel - // tests are still using it. Override any caller default. - config.CleanUpOnShutdown = false // Get the appropriate config for the vector store type var storeConfig interface{} @@ -750,7 +746,6 @@ func CreateTestSetupWithConversationThreshold(t *testing.T, threshold int) *Test Provider: schemas.OpenAI, EmbeddingModel: "text-embedding-3-small", Dimension: 1536, - CleanUpOnShutdown: true, Threshold: 0.8, ConversationHistoryThreshold: threshold, } @@ -764,7 +759,6 @@ func CreateTestSetupWithExcludeSystemPrompt(t *testing.T, excludeSystem bool) *T Provider: schemas.OpenAI, EmbeddingModel: "text-embedding-3-small", Dimension: 1536, - CleanUpOnShutdown: true, Threshold: 0.8, ExcludeSystemPrompt: &excludeSystem, } @@ -778,7 +772,6 @@ func CreateTestSetupWithThresholdAndExcludeSystem(t *testing.T, threshold int, e Provider: schemas.OpenAI, EmbeddingModel: "text-embedding-3-small", Dimension: 1536, - CleanUpOnShutdown: true, Threshold: 0.8, ConversationHistoryThreshold: threshold, ExcludeSystemPrompt: &excludeSystem, diff --git a/transports/bifrost-http/handlers/plugins.go b/transports/bifrost-http/handlers/plugins.go index 3dc4f8353c..71613c15ba 100644 --- a/transports/bifrost-http/handlers/plugins.go +++ b/transports/bifrost-http/handlers/plugins.go @@ -16,9 +16,9 @@ import ( ) type PluginsLoader interface { + GetPluginStatus(ctx context.Context) map[string]schemas.PluginStatus ReloadPlugin(ctx context.Context, name string, path *string, pluginConfig any, placement *schemas.PluginPlacement, order *int) error RemovePlugin(ctx context.Context, name string) error - GetPluginStatus(ctx context.Context) map[string]schemas.PluginStatus } // PluginsHandler is the handler for the plugins API diff --git a/transports/config.schema.json b/transports/config.schema.json index 2728b039f9..e159208e13 100644 --- a/transports/config.schema.json +++ b/transports/config.schema.json @@ -1440,10 +1440,6 @@ "type": "string", "description": "Model to use for generating embeddings in provider-backed semantic caching. Required when provider is set and not allowed in direct-only mode." }, - "cleanup_on_shutdown": { - "type": "boolean", - "description": "Clean up cache on shutdown (default: false)" - }, "ttl": { "description": "Time-to-live for cached responses (supports duration strings like '5m', '1h' or seconds as number, default: 5min)", "oneOf": [ From a532cf7e3a2bdbd96f3ebfcbbdf7e4ab71a5448a Mon Sep 17 00:00:00 2001 From: Pratham Mishra <99235987+Pratham-Mishra04@users.noreply.github.com> Date: Wed, 13 May 2026 01:06:40 +0530 Subject: [PATCH 11/81] refactor: semantic cache ui revamp (#3331) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Replaces the separate `PluginsForm` component with a fully self-contained `CachingView` that introduces a first-class **Direct / Direct + Semantic** mode toggle for the local cache plugin. Previously, the UI only exposed provider-backed semantic cache settings and had no concept of direct-only (hash-based) caching as a distinct, supported mode. This rewrite makes direct-only mode the default and gates semantic configuration behind an explicit mode selection. ## Changes - Deleted `pluginsForm.tsx` and consolidated all local cache configuration logic directly into `cachingView.tsx`. - Introduced a `CacheMode` type (`"direct"` | `"semantic"`) with a tab-based picker. Direct-only mode requires no embedding provider; semantic mode adds vector similarity on top and requires a provider, model, and dimension. - The enable/disable toggle now immediately calls `updatePlugin` or `createPlugin` (for first-time setup) rather than deferring the enabled-state change to the Save button, decoupling the plugin lifecycle from config edits. - Added `inferMode` to derive the active mode from a saved config, `isEmptyConfig` to detect zero-value configs from the API, `buildPayload` to strip semantic-only fields when persisting a direct-only config, and `validateForSave` for inline validation surfaced before the user clicks Save. - Structural change warnings (provider/model/dimension drift vs. server state) are now shown only when the user has actually modified those fields, rather than permanently in semantic mode. - Removed the Zod `cacheConfigSchema` validation path in favor of the new `validateForSave` function. - Removed the effect that auto-seeded a default provider/model/dimension on first load, since direct-only mode no longer requires those fields. - Per-request override documentation expanded to include `x-bf-cache-key` and `x-bf-cache-no-store` with clearer descriptions. ## Type of change - [ ] Bug fix - [ ] Feature - [x] Refactor - [ ] Documentation - [ ] Chore/CI ## Affected areas - [ ] Core (Go) - [ ] Transports (HTTP) - [ ] Providers/Integrations - [ ] Plugins - [x] UI (React) - [ ] Docs ## How to test ```sh cd ui pnpm i || npm i pnpm build || npm run build ``` 1. Navigate to the Workspace → Config → Caching view. 2. Verify the page loads with **Direct only** selected by default and no provider/model/dimension fields visible. 3. Switch to **Direct + Semantic** and confirm provider, model, and dimension fields appear with inline validation. 4. Toggle caching on without a vector store configured and confirm the toggle is disabled. 5. Save a direct-only config and confirm the plugin is created/updated with `dimension: 1` and no provider fields. 6. Save a semantic config with a valid provider, model, and dimension and confirm the full payload is persisted. 7. Reload the page and confirm the saved mode and config are correctly hydrated. ## Screenshots/Recordings Before/after screenshots recommended showing the mode tab picker, the conditional semantic fields, and the structural change warning banner. ## Breaking changes - [x] Yes - [ ] No The `PluginsForm` component is removed. Any code importing it directly will need to be updated. The enable/disable toggle now persists immediately rather than requiring a Save click, which changes the interaction model for existing users. ## Related issues N/A ## Security considerations No new auth, secrets, or PII handling introduced. API keys for embedding providers continue to be inherited from the provider's existing configuration and are not re-entered or stored in the cache config. ## Checklist - [ ] I read `docs/contributing/README.md` and followed the guidelines - [ ] I added/updated tests where appropriate - [ ] I updated documentation where needed - [ ] I verified builds succeed (Go and UI) - [ ] I verified the CI pipeline passes locally if applicable --- ui/app/workspace/config/views/cachingView.tsx | 692 +++++++++++++++++- ui/app/workspace/config/views/pluginsForm.tsx | 566 -------------- 2 files changed, 679 insertions(+), 579 deletions(-) delete mode 100644 ui/app/workspace/config/views/pluginsForm.tsx diff --git a/ui/app/workspace/config/views/cachingView.tsx b/ui/app/workspace/config/views/cachingView.tsx index 4c00456cdc..1118d0f6b0 100644 --- a/ui/app/workspace/config/views/cachingView.tsx +++ b/ui/app/workspace/config/views/cachingView.tsx @@ -1,22 +1,282 @@ -import { getErrorMessage, useGetCoreConfigQuery } from "@/lib/store"; -import PluginsForm from "./pluginsForm"; +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; +import { ModelMultiselect } from "@/components/ui/modelMultiselect"; +import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select"; +import { Switch } from "@/components/ui/switch"; +import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs"; +import { ProviderIconType, RenderProviderIcon } from "@/lib/constants/icons"; +import { EmbeddingSupportedProviders, getProviderLabel } from "@/lib/constants/logs"; +import { + getErrorMessage, + useCreatePluginMutation, + useGetCoreConfigQuery, + useGetPluginsQuery, + useGetProvidersQuery, + useUpdatePluginMutation, +} from "@/lib/store"; +import { CacheConfig, EditorCacheConfig, ModelProvider, ModelProviderName } from "@/lib/types/config"; +import { SEMANTIC_CACHE_PLUGIN } from "@/lib/types/plugins"; +import { cn } from "@/lib/utils"; +import { Loader2 } from "lucide-react"; +import { useEffect, useMemo, useState } from "react"; +import { toast } from "sonner"; + +// The local cache plugin runs in one of two modes. Direct-only is purely +// hash-based, no embedding provider needed; perfect for exact-replay +// caching. Semantic adds vector similarity on top, requiring an +// embedding-capable provider and the model's real dimension. +type CacheMode = "direct" | "semantic"; + +// Embedding-capable providers gate the semantic mode. Built-in providers +// are listed in EmbeddingSupportedProviders; custom providers expose +// support via custom_provider_config.allowed_requests.embedding. +const supportsEmbedding = (provider: ModelProvider): boolean => { + if (provider.custom_provider_config) { + return provider.custom_provider_config.allowed_requests?.embedding === true; + } + return (EmbeddingSupportedProviders as readonly string[]).includes(provider.name); +}; + +const defaultDirectConfig: EditorCacheConfig = { + ttl: 300, + threshold: 0.8, + dimension: 1, + conversation_history_threshold: 3, + exclude_system_prompt: false, + cache_by_model: true, + cache_by_provider: true, +}; + +// Configs we treat as "the user has nothing saved": both API responses +// where every field is the type's zero value and the literal undefined +// look like this. +const isEmptyConfig = (config: Partial | undefined): boolean => { + if (!config) return true; + // Booleans are deliberate user choices (e.g. cache_by_model: false), not + // empty markers — only treat numeric/string zero values as empty. + const isZero = (v: unknown) => v === undefined || v === null || v === 0 || v === ""; + return Object.values(config).every(isZero); +}; + +const toEditorCacheConfig = (config?: Partial): EditorCacheConfig => { + if (!config || isEmptyConfig(config)) { + return { ...defaultDirectConfig }; + } + return { ...defaultDirectConfig, ...config }; +}; + +const inferMode = (config: EditorCacheConfig): CacheMode => { + if (config.dimension && config.dimension > 1 && config.provider) return "semantic"; + return "direct"; +}; + +// Strip semantic-only fields when persisting a direct-only payload so the +// server validator doesn't reject a stale provider choice. +const buildPayload = (config: EditorCacheConfig, mode: CacheMode): CacheConfig => { + const base = { + ttl: config.ttl ?? 0, + threshold: config.threshold ?? 0, + conversation_history_threshold: config.conversation_history_threshold, + exclude_system_prompt: config.exclude_system_prompt, + cache_by_model: config.cache_by_model, + cache_by_provider: config.cache_by_provider, + vector_store_namespace: config.vector_store_namespace?.trim() || undefined, + default_cache_key: config.default_cache_key?.trim() || undefined, + }; + if (mode === "direct") { + return { ...base, dimension: 1 } as CacheConfig; + } + return { + ...base, + provider: config.provider as ModelProviderName, + embedding_model: config.embedding_model ?? "", + dimension: config.dimension ?? 0, + } as CacheConfig; +}; + +const validateForSave = (config: EditorCacheConfig, mode: CacheMode): string | null => { + if (mode === "semantic") { + if (!config.provider) return "Pick an embedding provider for semantic mode, or switch to Direct only."; + if (!config.embedding_model?.trim()) return "Pick an embedding model for semantic mode."; + if (!config.dimension || config.dimension <= 1) { + return "Semantic mode requires the embedding model's real dimension (must be > 1)."; + } + } + if (config.ttl !== undefined && config.ttl < 0) return "TTL must be non-negative."; + if (config.threshold !== undefined && (config.threshold < 0 || config.threshold > 1)) { + return "Similarity threshold must be between 0 and 1."; + } + if ( + config.conversation_history_threshold !== undefined && + (config.conversation_history_threshold < 1 || config.conversation_history_threshold > 50) + ) { + return "Conversation history threshold must be between 1 and 50."; + } + return null; +}; export default function CachingView() { - const { data: bifrostConfig, isLoading, error: configError } = useGetCoreConfigQuery({ fromDB: true }); + const { data: bifrostConfig, isLoading: configLoading, error: configError } = useGetCoreConfigQuery({ fromDB: true }); + const isVectorStoreEnabled = bifrostConfig?.is_cache_connected ?? false; + + // Local cache state lives on the plugin row keyed by SEMANTIC_CACHE_PLUGIN. + // No dedicated /local-cache-config endpoint exists — the plugins API is + // the source of truth for both the enabled flag and the config blob. + const { data: plugins, isLoading: pluginsLoading } = useGetPluginsQuery(); + const semanticCachePlugin = useMemo(() => plugins?.find((p) => p.name === SEMANTIC_CACHE_PLUGIN), [plugins]); + const enabledOnServer = Boolean(semanticCachePlugin?.enabled); + + const { data: providersData, error: providersError, isLoading: providersLoading } = useGetProvidersQuery(); + const providers = useMemo(() => providersData || [], [providersData]); + const embeddingProviders = useMemo(() => providers.filter(supportsEmbedding), [providers]); + + const [updatePlugin, { isLoading: isUpdating }] = useUpdatePluginMutation(); + const [createPlugin, { isLoading: isCreating }] = useCreatePluginMutation(); + const isSaving = isUpdating || isCreating; + + const [cacheConfig, setCacheConfig] = useState(defaultDirectConfig); + const [serverCacheConfig, setServerCacheConfig] = useState(defaultDirectConfig); + const [mode, setMode] = useState("direct"); + + // Hydrate from the plugin row once it lands. If the plugin doesn't exist + // yet (first-time setup), keep the default direct-only seed so the user + // can start typing before any save. + useEffect(() => { + if (plugins === undefined) return; + if (!semanticCachePlugin?.config) return; + const editorConfig = toEditorCacheConfig(semanticCachePlugin.config as Partial); + setCacheConfig(editorConfig); + setServerCacheConfig(editorConfig); + setMode(inferMode(editorConfig)); + }, [plugins, semanticCachePlugin]); + + useEffect(() => { + if (providersError) { + toast.error(`Failed to load providers: ${getErrorMessage(providersError as any)}`); + } + }, [providersError]); + + // Surface validation problems inline rather than only on Save click. + const validationError = useMemo(() => validateForSave(cacheConfig, mode), [cacheConfig, mode]); + + // Only show the dimension/namespace heads-up when the user has actually + // touched a structural field. Showing it permanently in semantic mode + // trains users to ignore it; showing it on diff makes it land. + const hasStructuralChange = useMemo(() => { + return ( + cacheConfig.provider !== serverCacheConfig.provider || + cacheConfig.embedding_model !== serverCacheConfig.embedding_model || + cacheConfig.dimension !== serverCacheConfig.dimension + ); + }, [cacheConfig, serverCacheConfig]); + + const hasUnsavedConfigChanges = useMemo(() => { + const fields: (keyof EditorCacheConfig)[] = [ + "provider", + "embedding_model", + "dimension", + "ttl", + "threshold", + "conversation_history_threshold", + "exclude_system_prompt", + "cache_by_model", + "cache_by_provider", + "vector_store_namespace", + "default_cache_key", + ]; + const changed = fields.some((k) => (cacheConfig[k] ?? "") !== (serverCacheConfig[k] ?? "")); + const modeChanged = inferMode(serverCacheConfig) !== mode; + return changed || modeChanged; + }, [cacheConfig, serverCacheConfig, mode]); + + const updateLocal = (updates: Partial) => { + setCacheConfig((prev) => ({ ...prev, ...updates })); + }; + + // Toggle handler. Updates the semantic_cache plugin's enabled flag while + // keeping the last-saved config so the backend can ReloadPlugin/RemovePlugin + // based on the new flag. When toggling on for the first time and no plugin + // row exists, we seed it with the current editor config (direct-only by + // default) so the create call has a valid payload — the user can refine + // the config and Save afterwards. + const handleToggle = async (checked: boolean) => { + try { + if (semanticCachePlugin) { + await updatePlugin({ + name: SEMANTIC_CACHE_PLUGIN, + data: { enabled: checked, config: semanticCachePlugin.config }, + }).unwrap(); + } else { + // No plugin row + user toggling off ⇒ nothing to disable. + // Bail before the success toast so we don't lie about the state. + if (!checked) return; + const err = validateForSave(cacheConfig, mode); + if (err) { + toast.error(err); + return; + } + const payload = buildPayload(cacheConfig, mode); + await createPlugin({ + name: SEMANTIC_CACHE_PLUGIN, + enabled: true, + config: payload, + path: "", + }).unwrap(); + } + toast.success(checked ? "Local cache enabled" : "Local cache disabled"); + } catch (error) { + toast.error(`Failed to ${checked ? "enable" : "disable"} local cache: ${getErrorMessage(error)}`); + } + }; + + const handleSave = async () => { + const err = validateForSave(cacheConfig, mode); + if (err) { + toast.error(err); + return; + } + const payload = buildPayload(cacheConfig, mode); + try { + const updated = semanticCachePlugin + ? await updatePlugin({ + name: SEMANTIC_CACHE_PLUGIN, + data: { enabled: semanticCachePlugin.enabled, config: payload }, + }).unwrap() + : await createPlugin({ + name: SEMANTIC_CACHE_PLUGIN, + enabled: false, + config: payload, + path: "", + }).unwrap(); + const editor = toEditorCacheConfig(updated.config as Partial); + setCacheConfig(editor); + setServerCacheConfig(editor); + setMode(inferMode(editor)); + toast.success("Cache configuration updated"); + } catch (error) { + toast.error(`Failed to update cache configuration: ${getErrorMessage(error)}`); + } + }; + + const cachingActive = enabledOnServer && isVectorStoreEnabled; + const isLoading = configLoading || pluginsLoading; return ( -
+
-

Caching

-

Configure semantic caching for requests.

+

Local Cache

+

+ Cache responses locally with two complementary lookup paths: direct hash matching for exact replays, and{" "} + semantic similarity search for related content. Send the x-bf-cache-key header to scope cached + responses to a tenant or feature. {!isVectorStoreEnabled && ( + + Requires a vector store to be configured and enabled in config.json. + + )} +

- {isLoading && ( -
-

Loading configuration...

-
- )} - {configError !== undefined && (

Failed to load configuration

@@ -26,7 +286,413 @@ export default function CachingView() {
)} - {!isLoading && !configError && } + {isLoading && ( +
+ +
+ )} + + {!isLoading && !configError && ( +
+ {/* Enable toggle flips plugin.enabled on the semantic_cache + plugin row. The plugins API handles ReloadPlugin / + RemovePlugin transparently on update. */} +
+
+ +

+ Loads (or unloads) the plugin without a server restart. Configuration changes you make below mutate the live + plugin in place, no redeploy needed.{" "} + +

+
+ +
+ + {providersLoading ? ( +
+ +
+ ) : ( + <> +
+ {/* Mode picker. Direct-only is first-class. */} +
+ + setMode(v as CacheMode)}> + + + Direct only + + + Direct + Semantic + + + +

+ {mode === "direct" ? ( + <> + Direct-only mode hashes each request and replays an exact match. No embeddings, no provider needed. + Cheapest path, perfect for stable prompts. + + ) : ( + <> + Direct + semantic mode adds vector similarity search on top of direct hash matching. Requires an + embedding-capable provider and the model's real dimension. Direct hits are still served first; + semantic search runs only when the direct lookup misses. + + )} +

+
+ + {validationError && ( +
+ {validationError} +
+ )} + + {/* Provider/model/dimension only appear in semantic mode. */} + {mode === "semantic" && ( + <> + {hasStructuralChange && ( +
+ Heads up: a vector store namespace can only hold vectors of one dimension. Whenever you + change the embedding provider, model, or dimension, make sure the dimension{" "} + still matches what the model produces, otherwise writes to the existing namespace will fail and reads + will silently miss. The namespace is not recreated automatically; either use a fresh namespace + or drop the existing class/index in your vector store before saving. +
+ )} + +
+

Embedding Provider & Model

+
+
+ + +
+
+ + updateLocal({ embedding_model: model })} + placeholder={cacheConfig.provider ? "Search or type an embedding model..." : "Select a provider first"} + disabled={!cacheConfig.provider} + /> +
+
+

+ API keys are inherited from the embedding provider's main configuration, you don't need to + add them again here. +

+
+ + { + const value = e.target.value; + if (value === "") { + updateLocal({ dimension: undefined }); + return; + } + const parsed = parseInt(value); + if (!Number.isNaN(parsed)) { + updateLocal({ dimension: parsed }); + } + }} + /> +

+ Vector size produced by the embedding model. Must match the model exactly (e.g. 1536{" "} + for OpenAI text-embedding-3-small, 3072 for{" "} + text-embedding-3-large, 768 for many Cohere/Voyage models). +

+
+
+ + )} + + {/* Cache settings shared across modes. */} +
+

Cache Settings

+
+
+ + { + const value = e.target.value; + if (value === "") { + updateLocal({ ttl: undefined }); + return; + } + const parsed = parseInt(value); + if (!Number.isNaN(parsed)) { + updateLocal({ ttl: parsed }); + } + }} + /> +

+ How long cached entries live before they expire. Override per-request via the x-bf-cache-ttl header. +

+
+ {mode === "semantic" && ( +
+ + { + const value = e.target.value; + if (value === "") { + updateLocal({ threshold: undefined }); + return; + } + const parsed = parseFloat(value); + if (!Number.isNaN(parsed)) { + updateLocal({ threshold: parsed }); + } + }} + /> +

+ Minimum cosine similarity for a semantic hit. Override per-request via{" "} + x-bf-cache-threshold. +

+
+ )} +
+
+ + {/* Storage & Cache Key. */} +
+

Storage & Cache Key

+
+
+ + updateLocal({ vector_store_namespace: e.target.value })} + /> +

+ Bucket/index name where cache entries live. Leave blank to use the default ( + BifrostLocalCachePlugin). Changing this points the plugin at a different (possibly empty) + bucket. Old entries are not deleted, they just stop being queried. +

+
+
+ + updateLocal({ default_cache_key: e.target.value })} + /> +

+ Fallback partition key used when a request doesn't set x-bf-cache-key. Cache keys isolate + entries: same key ↔ shared cache pool. Leave blank to disable caching for any request that + doesn't send the header. +

+
+
+
+ + {/* Conversation Settings. */} +
+

Conversation Settings

+
+
+ + { + const value = e.target.value; + if (value === "") { + updateLocal({ conversation_history_threshold: undefined }); + return; + } + const parsed = parseInt(value); + if (!Number.isNaN(parsed)) { + updateLocal({ conversation_history_threshold: parsed }); + } + }} + /> +

+ Skip caching for conversations with more than this many messages. Long histories rarely match exactly + and inflate the cache without paying off. +

+
+
+
+
+
+ +

Strip system messages from the cache key.

+
+ updateLocal({ exclude_system_prompt: checked })} + size="md" + /> +
+
+
+ + {/* Cache Behavior applies to both modes. */} +
+

Cache Key Composition

+
+
+
+ +

+ Include model name in the cache key. Different models won't share cached responses. +

+
+ updateLocal({ cache_by_model: checked })} + size="md" + /> +
+
+
+ +

+ Include provider name in the cache key. Different providers won't share cached responses. +

+
+ updateLocal({ cache_by_provider: checked })} + size="md" + /> +
+
+
+ +
+ +
    +
  • + x-bf-cache-key: scope this request to a specific cache partition. +
  • +
  • + x-bf-cache-ttl: override TTL for just this request. +
  • +
  • + x-bf-cache-threshold: override the semantic similarity threshold. +
  • +
  • + x-bf-cache-type: send direct or semantic to limit lookup to one path. +
  • +
  • + x-bf-cache-no-store: true to skip writing the response (still serves cached hits). +
  • +
+
+
+ +
+ +
+ + )} +
+ )}
); } \ No newline at end of file diff --git a/ui/app/workspace/config/views/pluginsForm.tsx b/ui/app/workspace/config/views/pluginsForm.tsx deleted file mode 100644 index fc4ddae7da..0000000000 --- a/ui/app/workspace/config/views/pluginsForm.tsx +++ /dev/null @@ -1,566 +0,0 @@ -import { Button } from "@/components/ui/button"; -import { Card, CardContent } from "@/components/ui/card"; -import { Input } from "@/components/ui/input"; -import { Label } from "@/components/ui/label"; -import { ModelMultiselect } from "@/components/ui/modelMultiselect"; -import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select"; -import { Separator } from "@/components/ui/separator"; -import { Switch } from "@/components/ui/switch"; -import { ProviderIconType, RenderProviderIcon } from "@/lib/constants/icons"; -import { EmbeddingSupportedProviders, getProviderLabel } from "@/lib/constants/logs"; -import { getErrorMessage, useCreatePluginMutation, useGetPluginsQuery, useGetProvidersQuery, useUpdatePluginMutation } from "@/lib/store"; -import { CacheConfig, EditorCacheConfig, ModelProvider, ModelProviderName } from "@/lib/types/config"; -import { SEMANTIC_CACHE_PLUGIN } from "@/lib/types/plugins"; -import { cacheConfigSchema } from "@/lib/types/schemas"; -import { Loader2 } from "lucide-react"; -import { useEffect, useMemo, useState } from "react"; -import { toast } from "sonner"; - -// Semantic caching needs an embedding-capable provider. Built-in providers are -// gated by EmbeddingSupportedProviders; custom providers expose support via -// custom_provider_config.allowed_requests.embedding. -const supportsEmbedding = (provider: ModelProvider): boolean => { - if (provider.custom_provider_config) { - return provider.custom_provider_config.allowed_requests?.embedding === true; - } - return (EmbeddingSupportedProviders as readonly string[]).includes(provider.name); -}; - -const defaultCacheConfig: EditorCacheConfig = { - ttl: 300, - threshold: 0.8, - conversation_history_threshold: 3, - exclude_system_prompt: false, - cache_by_model: true, - cache_by_provider: true, -}; - -const toEditorCacheConfig = (config?: Partial & { ttl_seconds?: number }): EditorCacheConfig => { - const { ttl_seconds, ...rest } = config ?? {}; - const merged: EditorCacheConfig = { ...defaultCacheConfig, ...rest }; - // Migration: older saves stored TTL under `ttl_seconds`; the Go plugin only - // reads `ttl`, so adopt the legacy value if the new field isn't present. - if (rest.ttl === undefined && typeof ttl_seconds === "number") { - merged.ttl = ttl_seconds; - } - return merged; -}; - -const normalizeCacheConfigForSave = (config: EditorCacheConfig) => { - const normalized: Record = { - ttl: config.ttl, - threshold: config.threshold, - cache_by_model: config.cache_by_model, - cache_by_provider: config.cache_by_provider, - }; - - if (config.conversation_history_threshold !== undefined) { - normalized.conversation_history_threshold = config.conversation_history_threshold; - } - if (config.exclude_system_prompt !== undefined) { - normalized.exclude_system_prompt = config.exclude_system_prompt; - } - if (config.created_at !== undefined) { - normalized.created_at = config.created_at; - } - if (config.updated_at !== undefined) { - normalized.updated_at = config.updated_at; - } - - const provider = config.provider?.trim(); - const embeddingModel = config.embedding_model?.trim(); - const namespace = config.vector_store_namespace?.trim(); - const defaultKey = config.default_cache_key?.trim(); - - if (provider) { - normalized.provider = provider; - } - if (embeddingModel) { - normalized.embedding_model = embeddingModel; - } - if (config.dimension !== undefined) { - normalized.dimension = config.dimension; - } - if (namespace) { - normalized.vector_store_namespace = namespace; - } - if (defaultKey) { - normalized.default_cache_key = defaultKey; - } - - return normalized; -}; - -interface PluginsFormProps { - isVectorStoreEnabled: boolean; -} - -export default function PluginsForm({ isVectorStoreEnabled }: PluginsFormProps) { - const [cacheConfig, setCacheConfig] = useState(defaultCacheConfig); - const [originalCacheEnabled, setOriginalCacheEnabled] = useState(false); - const [serverCacheConfig, setServerCacheConfig] = useState(defaultCacheConfig); - const [serverCacheEnabled, setServerCacheEnabled] = useState(false); - - const { data: providersData, error: providersError, isLoading: providersLoading } = useGetProvidersQuery(); - - const providers = useMemo(() => providersData || [], [providersData]); - const embeddingProviders = useMemo(() => providers.filter(supportsEmbedding), [providers]); - - useEffect(() => { - if (providersError) { - toast.error(`Failed to load providers: ${getErrorMessage(providersError as any)}`); - } - }, [providersError]); - - // RTK Query hooks - const { data: plugins, isLoading: loading } = useGetPluginsQuery(); - const [updatePlugin, { isLoading: isUpdating }] = useUpdatePluginMutation(); - const [createPlugin, { isLoading: isCreating }] = useCreatePluginMutation(); - - // Get semantic cache plugin and its config - const semanticCachePlugin = useMemo(() => plugins?.find((plugin) => plugin.name === SEMANTIC_CACHE_PLUGIN), [plugins]); - - const isSemanticCacheEnabled = Boolean(semanticCachePlugin?.enabled); - const loadedDirectOnlyConfig = serverCacheConfig.dimension === 1 && !serverCacheConfig.provider; - const hasInvalidProviderBackedDimension = cacheConfig.dimension === 1 && Boolean(cacheConfig.provider?.trim()); - - // Initialize cache config from plugin data - useEffect(() => { - if (semanticCachePlugin?.config) { - const config = toEditorCacheConfig(semanticCachePlugin.config as Partial); - setCacheConfig(config); - setServerCacheConfig(config); - setOriginalCacheEnabled(semanticCachePlugin.enabled); - setServerCacheEnabled(semanticCachePlugin.enabled); - } - }, [semanticCachePlugin]); - - // Seed default provider/model/dimension when the providers list loads, but - // only for new configs that haven't picked a provider yet — re-running this - // effect on subsequent embeddingProviders changes would otherwise clobber - // an in-progress user selection. - useEffect(() => { - if (embeddingProviders.length > 0 && !semanticCachePlugin?.config) { - setCacheConfig((prev) => { - if (prev.provider) return prev; - return { - ...prev, - provider: embeddingProviders[0].name as ModelProviderName, - embedding_model: prev.embedding_model ?? "text-embedding-3-small", - dimension: prev.dimension ?? 1536, - }; - }); - } - }, [embeddingProviders, semanticCachePlugin?.config]); - - const hasChanges = useMemo(() => { - if (originalCacheEnabled !== serverCacheEnabled) return true; - - return ( - cacheConfig.provider !== serverCacheConfig.provider || - cacheConfig.embedding_model !== serverCacheConfig.embedding_model || - cacheConfig.dimension !== serverCacheConfig.dimension || - cacheConfig.ttl !== serverCacheConfig.ttl || - cacheConfig.threshold !== serverCacheConfig.threshold || - cacheConfig.conversation_history_threshold !== serverCacheConfig.conversation_history_threshold || - cacheConfig.exclude_system_prompt !== serverCacheConfig.exclude_system_prompt || - cacheConfig.cache_by_model !== serverCacheConfig.cache_by_model || - cacheConfig.cache_by_provider !== serverCacheConfig.cache_by_provider || - (cacheConfig.vector_store_namespace ?? "") !== (serverCacheConfig.vector_store_namespace ?? "") || - (cacheConfig.default_cache_key ?? "") !== (serverCacheConfig.default_cache_key ?? "") - ); - }, [cacheConfig, serverCacheConfig, originalCacheEnabled, serverCacheEnabled]); - - // Handle semantic cache toggle (create or update) - const handleSemanticCacheToggle = (enabled: boolean) => { - setOriginalCacheEnabled(enabled); - }; - - // Update cache config locally - const updateCacheConfigLocal = (updates: Partial) => { - setCacheConfig((prev) => ({ ...prev, ...updates })); - }; - - // Save all changes - const handleSave = async () => { - if (hasInvalidProviderBackedDimension) { - toast.error( - "Provider-backed semantic cache requires the embedding model's real dimension. Use a value greater than 1, or remove the provider to keep direct-only mode.", - ); - return; - } - - const parseResult = cacheConfigSchema.safeParse(normalizeCacheConfigForSave(cacheConfig)); - if (!parseResult.success) { - const firstIssue = parseResult.error.issues[0]?.message ?? "Semantic cache configuration is invalid."; - toast.error(firstIssue); - return; - } - - const savedConfig = parseResult.data as CacheConfig; - - try { - if (semanticCachePlugin) { - // Update existing plugin - await updatePlugin({ - name: SEMANTIC_CACHE_PLUGIN, - data: { enabled: originalCacheEnabled, config: savedConfig }, - }).unwrap(); - } else { - // Create new plugin - await createPlugin({ - name: SEMANTIC_CACHE_PLUGIN, - enabled: originalCacheEnabled, - config: savedConfig, - path: "", - }).unwrap(); - } - toast.success("Plugin configuration updated successfully"); - // Update server state to match current state - const normalizedConfig = toEditorCacheConfig(savedConfig); - setCacheConfig(normalizedConfig); - setServerCacheConfig(normalizedConfig); - setServerCacheEnabled(originalCacheEnabled); - } catch (error) { - const errorMessage = getErrorMessage(error); - toast.error(`Failed to update plugin configuration: ${errorMessage}`); - } - }; - - if (loading) { - return ( - - -
Loading plugins configuration...
-
-
- ); - } - - return ( -
- {/* Semantic Cache Toggle */} -
-
-
- -

- Enable semantic caching for requests. Send x-bf-cache-key header with requests to use semantic caching.{" "} - {!isVectorStoreEnabled && ( - Requires vector store to be configured and enabled in config.json. - )} - {!providersLoading && providers?.length === 0 && ( - Requires at least one provider to be configured. - )} - {!providersLoading && providers.length > 0 && embeddingProviders.length === 0 && ( - - {" "} - Requires at least one provider that supports embedding requests. Configure a built-in embedding provider, or enable the - embeddingrequest type on a custom provider. - - )} -

-
-
- { - if (isVectorStoreEnabled) { - handleSemanticCacheToggle(checked); - } - }} - /> -
-
- - {/* Cache Configuration (only show when enabled) */} - {originalCacheEnabled && - isVectorStoreEnabled && - (providersLoading ? ( -
- -
- ) : ( -
- - {loadedDirectOnlyConfig && ( -
- This plugin was loaded in direct-only mode via config.json. The Web UI currently edits provider-backed - semantic cache settings; keep using config.json if you want to stay in direct-only mode. -
- )} - {hasInvalidProviderBackedDimension && ( -
- You selected a provider while keeping dimension: 1. That is only valid for direct-only mode. Set the - embedding model's real dimension before saving, or remove the provider to stay in direct-only mode. -
- )} -
- Heads up: a vector store namespace can only hold vectors of one dimension. Whenever you - change the embedding provider, model, or dimension, make sure the dimension still matches what the model produces - otherwise writes to the existing namespace will - fail and reads will silently miss. The namespace is not recreated automatically; either use a fresh namespace or drop the existing class/index in your vector store - before saving. -
- {/* Provider and Model Settings */} -
-

Provider and Model Settings

-
-
- - -
-
- - updateCacheConfigLocal({ embedding_model: model })} - placeholder={cacheConfig.provider ? "Search or type an embedding model..." : "Select a provider first"} - disabled={!cacheConfig.provider} - /> -
-
-

- API keys for the embedding provider will be inherited from the main provider configuration. The semantic cache will use - the configured provider's keys automatically. -

-
- - {/* Cache Settings */} -
-

Cache Settings

-
-
- - { - const value = e.target.value; - if (value === "") { - updateCacheConfigLocal({ ttl: undefined }); - return; - } - const parsed = parseInt(value); - if (!Number.isNaN(parsed)) { - updateCacheConfigLocal({ ttl: parsed }); - } - }} - /> -
-
- - { - const value = e.target.value; - if (value === "") { - updateCacheConfigLocal({ threshold: undefined }); - return; - } - const parsed = parseFloat(value); - if (!Number.isNaN(parsed)) { - updateCacheConfigLocal({ threshold: parsed }); - } - }} - /> -
-
- - { - const value = e.target.value; - if (value === "") { - updateCacheConfigLocal({ dimension: undefined }); - return; - } - const parsed = parseInt(value); - if (!Number.isNaN(parsed)) { - updateCacheConfigLocal({ dimension: parsed }); - } - }} - /> -

- Vector size produced by the embedding model - must match the model exactly (e.g. 1536 for - OpenAI text-embedding-3-small, 3072 for text-embedding-3-large, - 768 for many Cohere/Voyage models). Use 1 only in direct-only mode (no provider). -

-
-
-
- - {/* Storage & Cache Key */} -
-

Storage & Cache Key

-
-
- - updateCacheConfigLocal({ vector_store_namespace: e.target.value })} - /> -

- Bucket/index name where cache entries are stored in the vector store. Leave blank to use the default - (BifrostSemanticCachePlugin). Changing the namespace points the plugin at a different (possibly empty) bucket. All previously - cached entries become inaccessible - every request will miss until the new namespace is repopulated. -

-
-
- - updateCacheConfigLocal({ default_cache_key: e.target.value })} - /> -

- Fallback value used as the cache partition when a request doesn't set the x-bf-cache-key header. - Cache keys isolate entries: requests that share a key can hit each other's cached responses, while requests - with different keys can't. Leaving this blank means caching is disabled for any request that doesn't - send the header. -

-
-
-
- - {/* Conversation Settings */} -
-

Conversation Settings

-
-
- - updateCacheConfigLocal({ conversation_history_threshold: parseInt(e.target.value) || 3 })} - /> -

- Skip caching for conversations with more than this number of messages (prevents false positives) -

-
-
-
-
-
- -

Exclude system messages from cache key generation

-
- updateCacheConfigLocal({ exclude_system_prompt: checked })} - size="md" - /> -
-
-
- - {/* Cache Behavior */} -
-

Cache Behavior

-
-
-
- -

Include model name in cache key

-
- updateCacheConfigLocal({ cache_by_model: checked })} - size="md" - /> -
-
-
- -

Include provider name in cache key

-
- updateCacheConfigLocal({ cache_by_provider: checked })} - size="md" - /> -
-
-
- -
- -
    -
  • - You can pass x-bf-cache-ttl header with requests to use request-specific TTL. -
  • -
  • - You can pass x-bf-cache-threshold header with requests to use request-specific similarity threshold. -
  • -
  • - You can pass x-bf-cache-type header with "direct" or "semantic" to control cache behavior. -
  • -
  • - You can pass x-bf-cache-no-store header with "true" to disable response caching. -
  • -
-
- -
- -
-
- ))} -
-
- ); -} \ No newline at end of file From 8216bd006ff36476523a084512c44fbd62a8c389 Mon Sep 17 00:00:00 2001 From: Pratham Mishra <99235987+Pratham-Mishra04@users.noreply.github.com> Date: Wed, 13 May 2026 01:09:39 +0530 Subject: [PATCH 12/81] fix: resolve cache plugin at request time to support post-boot loads and plugin reloads (#3423) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary The `CacheHandler` previously captured a reference to the `semantic_cache` plugin at boot time. This caused two bugs: (1) if the plugin was not present in `config.json` at startup, cache-clear routes were never registered, resulting in HTTP 405 for the entire process lifetime; (2) if the plugin was loaded or reloaded via `/api/plugins` after boot, the handler held a stale (or nil) pointer and would silently misbehave. Additionally, `GET /api/plugins/:name` was returning the raw plugin config without runtime status, causing the UI to see an empty status when refetching a single plugin. ## Changes - `CacheHandler` now accepts a `CacheClearerResolver` function instead of a concrete plugin pointer. The resolver is called on every cache-clear request, so plugin lifecycle changes via `/api/plugins` are always honored. - `CacheClearer` and `CacheClearerResolver` are exported so server wiring can supply the resolver without importing the plugin's concrete type. - Cache routes are registered unconditionally at startup. When no plugin is loaded, requests return HTTP 400 with a descriptive message instead of HTTP 405. - The server wiring in `RegisterAPIRoutes` uses a closure over `lib.FindPluginAs` to resolve the plugin per request, replacing the boot-time capture. - `getPlugin` now returns the same response shape as list/create/update (with runtime status merged in), fixing the empty status seen by `useGetPluginQuery` in the UI. - Tests cover the new "plugin not loaded" path for both `clearCache` and `clearCacheByKey`, and existing tests are updated to use the resolver-based constructor. ## Type of change - [x] Bug fix - [ ] Feature - [ ] Refactor - [ ] Documentation - [ ] Chore/CI ## Affected areas - [ ] Core (Go) - [x] Transports (HTTP) - [ ] Providers/Integrations - [x] Plugins - [ ] UI (React) - [ ] Docs ## How to test ```sh go test ./transports/bifrost-http/handlers/... go test ./transports/bifrost-http/... ``` 1. Start the server **without** `semantic_cache` in `config.json`. Issue `DELETE /api/cache/clear/{cacheId}` — expect HTTP 400 with `"semantic_cache plugin is not loaded"` (previously HTTP 405). 2. Load the `semantic_cache` plugin via `POST /api/plugins`. Repeat the request — expect the cache-clear to succeed. 3. Reload or remove the plugin via `PUT`/`DELETE /api/plugins`. Verify the handler reflects the new state on the next request without a server restart. 4. Issue `GET /api/plugins/{name}` for a loaded plugin and confirm the response includes runtime status fields, matching the shape returned by the list endpoint. ## Breaking changes - [x] Yes - [ ] No `NewCacheHandler` now accepts a `CacheClearerResolver` function instead of a `schemas.LLMPlugin`. Any caller constructing a `CacheHandler` directly must be updated to pass a resolver. ## Related issues ## Security considerations None. The change does not affect authentication, secrets, or PII handling. ## Checklist - [ ] I read `docs/contributing/README.md` and followed the guidelines - [x] I added/updated tests where appropriate - [ ] I updated documentation where needed - [x] I verified builds succeed (Go and UI) - [ ] I verified the CI pipeline passes locally if applicable --- transports/bifrost-http/handlers/cache.go | 48 ++++++++++------ .../bifrost-http/handlers/cache_test.go | 57 +++++++++++++++---- transports/bifrost-http/handlers/plugins.go | 5 +- transports/bifrost-http/server/server.go | 21 ++++--- 4 files changed, 95 insertions(+), 36 deletions(-) diff --git a/transports/bifrost-http/handlers/cache.go b/transports/bifrost-http/handlers/cache.go index 1f173f9679..c2becb1d4b 100644 --- a/transports/bifrost-http/handlers/cache.go +++ b/transports/bifrost-http/handlers/cache.go @@ -3,32 +3,36 @@ package handlers import ( "github.com/fasthttp/router" "github.com/maximhq/bifrost/core/schemas" - "github.com/maximhq/bifrost/plugins/semanticcache" "github.com/maximhq/bifrost/transports/bifrost-http/lib" "github.com/valyala/fasthttp" ) -// cacheClearer is the minimal contract the handler needs from the semantic -// cache plugin. Defined here (rather than imported) so tests can substitute -// a fake without spinning up a real vector store. -type cacheClearer interface { +// CacheClearer is the minimal contract the handler needs from the semantic +// cache plugin. Exported so the server wiring can supply a resolver without +// pulling in the plugin's concrete type and so tests can substitute a fake. +type CacheClearer interface { ClearCacheForCacheID(cacheID string) error ClearCacheForKey(cacheKey string) error } +// CacheClearerResolver returns the currently-loaded cache plugin or nil if +// none is loaded. Called on every cache-clear request so plugin lifecycle +// (POST/PUT/DELETE /api/plugins) is honored — without this, the handler +// would hold a stale pointer after a plugin reload and the routes would +// silently misbehave (or never exist at all if the plugin was loaded +// post-boot rather than at startup). +type CacheClearerResolver func() CacheClearer + type CacheHandler struct { - plugin cacheClearer + resolve CacheClearerResolver } -func NewCacheHandler(plugin schemas.LLMPlugin) *CacheHandler { - semanticCachePlugin, ok := plugin.(*semanticcache.Plugin) - if !ok { - logger.Fatal("Cache handler requires a semantic cache plugin") - } - - return &CacheHandler{ - plugin: semanticCachePlugin, - } +// NewCacheHandler returns a CacheHandler that resolves the current plugin +// at request time. The handler is safe to wire unconditionally — when no +// plugin is loaded, each cache-clear request returns HTTP 400 with a clear +// message rather than the route being absent (HTTP 405). +func NewCacheHandler(resolve CacheClearerResolver) *CacheHandler { + return &CacheHandler{resolve: resolve} } func (h *CacheHandler) RegisterRoutes(r *router.Router, middlewares ...schemas.BifrostHTTPMiddleware) { @@ -37,12 +41,17 @@ func (h *CacheHandler) RegisterRoutes(r *router.Router, middlewares ...schemas.B } func (h *CacheHandler) clearCache(ctx *fasthttp.RequestCtx) { + plugin := h.resolve() + if plugin == nil { + SendError(ctx, fasthttp.StatusBadRequest, "semantic_cache plugin is not loaded") + return + } cacheID, ok := ctx.UserValue("cacheId").(string) if !ok || cacheID == "" { SendError(ctx, fasthttp.StatusBadRequest, "Invalid cache ID") return } - if err := h.plugin.ClearCacheForCacheID(cacheID); err != nil { + if err := plugin.ClearCacheForCacheID(cacheID); err != nil { SendError(ctx, fasthttp.StatusInternalServerError, "Failed to clear cache") return } @@ -53,12 +62,17 @@ func (h *CacheHandler) clearCache(ctx *fasthttp.RequestCtx) { } func (h *CacheHandler) clearCacheByKey(ctx *fasthttp.RequestCtx) { + plugin := h.resolve() + if plugin == nil { + SendError(ctx, fasthttp.StatusBadRequest, "semantic_cache plugin is not loaded") + return + } cacheKey, ok := ctx.UserValue("cacheKey").(string) if !ok { SendError(ctx, fasthttp.StatusBadRequest, "Invalid cache key") return } - if err := h.plugin.ClearCacheForKey(cacheKey); err != nil { + if err := plugin.ClearCacheForKey(cacheKey); err != nil { SendError(ctx, fasthttp.StatusInternalServerError, "Failed to clear cache") return } diff --git a/transports/bifrost-http/handlers/cache_test.go b/transports/bifrost-http/handlers/cache_test.go index a27e763c9b..c92b50b7dc 100644 --- a/transports/bifrost-http/handlers/cache_test.go +++ b/transports/bifrost-http/handlers/cache_test.go @@ -11,10 +11,10 @@ import ( // fakeCacheClearer records calls and returns configured errors so the handler // branches can be exercised without a real semantic cache plugin. type fakeCacheClearer struct { - clearByID func(string) error - clearByKey func(string) error - idCalls []string - keyCalls []string + clearByID func(string) error + clearByKey func(string) error + idCalls []string + keyCalls []string } func (f *fakeCacheClearer) ClearCacheForCacheID(id string) error { @@ -41,13 +41,19 @@ func newCacheCtx(userKey, userVal string) *fasthttp.RequestCtx { return ctx } +// newCacheHandler builds a CacheHandler whose resolver always returns the +// given fake — mimics a steady-state "plugin loaded" environment. +func newCacheHandler(clearer CacheClearer) *CacheHandler { + return NewCacheHandler(func() CacheClearer { return clearer }) +} + // ----------------------------------------------------------------------------- // clearCache (DELETE /api/cache/clear/{cacheId}) // ----------------------------------------------------------------------------- func TestClearCache_OK(t *testing.T) { clearer := &fakeCacheClearer{} - h := &CacheHandler{plugin: clearer} + h := newCacheHandler(clearer) ctx := newCacheCtx("cacheId", "abc-123") h.clearCache(ctx) @@ -62,7 +68,7 @@ func TestClearCache_OK(t *testing.T) { func TestClearCache_RejectsEmptyID(t *testing.T) { clearer := &fakeCacheClearer{} - h := &CacheHandler{plugin: clearer} + h := newCacheHandler(clearer) ctx := newCacheCtx("cacheId", "") h.clearCache(ctx) @@ -77,7 +83,7 @@ func TestClearCache_RejectsEmptyID(t *testing.T) { func TestClearCache_MissingUserValue(t *testing.T) { clearer := &fakeCacheClearer{} - h := &CacheHandler{plugin: clearer} + h := newCacheHandler(clearer) // No user value set at all (simulates a routing misconfiguration). ctx := &fasthttp.RequestCtx{} @@ -92,7 +98,7 @@ func TestClearCache_PluginErrorReturns500(t *testing.T) { clearer := &fakeCacheClearer{ clearByID: func(string) error { return errors.New("store unavailable") }, } - h := &CacheHandler{plugin: clearer} + h := newCacheHandler(clearer) ctx := newCacheCtx("cacheId", "abc-123") h.clearCache(ctx) @@ -105,13 +111,30 @@ func TestClearCache_PluginErrorReturns500(t *testing.T) { } } +// TestClearCache_PluginNotLoaded covers the regression where the handler +// would 405 (route absent) or panic on a nil pointer when the plugin +// wasn't loaded at boot. The new resolver-based handler must return 400. +func TestClearCache_PluginNotLoaded(t *testing.T) { + h := NewCacheHandler(func() CacheClearer { return nil }) + + ctx := newCacheCtx("cacheId", "abc-123") + h.clearCache(ctx) + + if got := ctx.Response.StatusCode(); got != fasthttp.StatusBadRequest { + t.Fatalf("expected 400 when plugin not loaded, got %d", got) + } + if !strings.Contains(string(ctx.Response.Body()), "semantic_cache plugin is not loaded") { + t.Fatalf("expected plugin-not-loaded message, got %s", ctx.Response.Body()) + } +} + // ----------------------------------------------------------------------------- // clearCacheByKey (DELETE /api/cache/clear-by-key/{cacheKey}) // ----------------------------------------------------------------------------- func TestClearCacheByKey_OK(t *testing.T) { clearer := &fakeCacheClearer{} - h := &CacheHandler{plugin: clearer} + h := newCacheHandler(clearer) ctx := newCacheCtx("cacheKey", "session-42") h.clearCacheByKey(ctx) @@ -128,7 +151,7 @@ func TestClearCacheByKey_PluginErrorReturns500(t *testing.T) { clearer := &fakeCacheClearer{ clearByKey: func(string) error { return errors.New("vector store down") }, } - h := &CacheHandler{plugin: clearer} + h := newCacheHandler(clearer) ctx := newCacheCtx("cacheKey", "session-42") h.clearCacheByKey(ctx) @@ -137,3 +160,17 @@ func TestClearCacheByKey_PluginErrorReturns500(t *testing.T) { t.Fatalf("expected 500 on plugin error, got %d", got) } } + +func TestClearCacheByKey_PluginNotLoaded(t *testing.T) { + h := NewCacheHandler(func() CacheClearer { return nil }) + + ctx := newCacheCtx("cacheKey", "session-42") + h.clearCacheByKey(ctx) + + if got := ctx.Response.StatusCode(); got != fasthttp.StatusBadRequest { + t.Fatalf("expected 400 when plugin not loaded, got %d", got) + } + if !strings.Contains(string(ctx.Response.Body()), "semantic_cache plugin is not loaded") { + t.Fatalf("expected plugin-not-loaded message, got %s", ctx.Response.Body()) + } +} diff --git a/transports/bifrost-http/handlers/plugins.go b/transports/bifrost-http/handlers/plugins.go index 71613c15ba..465144afa5 100644 --- a/transports/bifrost-http/handlers/plugins.go +++ b/transports/bifrost-http/handlers/plugins.go @@ -226,7 +226,10 @@ func (h *PluginsHandler) getPlugin(ctx *fasthttp.RequestCtx) { SendError(ctx, 500, "Failed to retrieve plugin") return } - SendJSON(ctx, plugin) + // Return the same shape as list/create/update — with runtime status + // merged in — so the UI doesn't see an empty status when refetching a + // single plugin via useGetPluginQuery. + SendJSON(ctx, h.buildPluginResponse(ctx, plugin)) } // createPlugin creates a new plugin diff --git a/transports/bifrost-http/server/server.go b/transports/bifrost-http/server/server.go index 3dea79bdb4..c47ad3892e 100644 --- a/transports/bifrost-http/server/server.go +++ b/transports/bifrost-http/server/server.go @@ -1113,11 +1113,18 @@ func (s *BifrostHTTPServer) RegisterAPIRoutes(ctx context.Context, callbacks Ser return fmt.Errorf("failed to initialize governance handler: %v", err) } } - var cacheHandler *handlers.CacheHandler - semanticCachePlugin, _ := lib.FindPluginAs[*semanticcache.Plugin](s.Config, semanticcache.PluginName) - if semanticCachePlugin != nil { - cacheHandler = handlers.NewCacheHandler(semanticCachePlugin) - } + // Resolve the semantic_cache plugin per request so plugin reloads via + // /api/plugins are honored — the previous boot-time capture left stale + // references and (worse) skipped route registration entirely when the + // plugin wasn't in config.json at startup, causing 405 on all cache-clear + // endpoints for the process lifetime. + cacheHandler := handlers.NewCacheHandler(func() handlers.CacheClearer { + p, err := lib.FindPluginAs[*semanticcache.Plugin](s.Config, semanticcache.PluginName) + if err != nil || p == nil { + return nil + } + return p + }) var promptsReloader handlers.PromptCacheReloader if promptsPlugin, err := lib.FindPluginAs[handlers.PromptCacheReloader](s.Config, s.getPromptsPluginName()); err == nil && promptsPlugin != nil { promptsReloader = promptsPlugin @@ -1162,9 +1169,7 @@ func (s *BifrostHTTPServer) RegisterAPIRoutes(ctx context.Context, callbacks Ser if promptsHandler != nil { promptsHandler.RegisterRoutes(s.Router, middlewares...) } - if cacheHandler != nil { - cacheHandler.RegisterRoutes(s.Router, middlewares...) - } + cacheHandler.RegisterRoutes(s.Router, middlewares...) if governanceHandler != nil { governanceHandler.RegisterRoutes(s.Router, middlewares...) } From 7df5e38e963036a41c02b699a9a63976fdd7ea0e Mon Sep 17 00:00:00 2001 From: Pratham Mishra <99235987+Pratham-Mishra04@users.noreply.github.com> Date: Wed, 13 May 2026 01:12:42 +0530 Subject: [PATCH 13/81] fix: decouple cache telemetry from write decision and guard no-op search paths in semantic cache (#3424) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Fixes several correctness issues in the semantic cache plugin's `PostLLMHook` and related helpers: cache telemetry (`cache_debug`) was previously invisible to callers using `no-store`, cache-hit replay detection was fragile, non-positive per-request TTL overrides could silently kill cache writes, and requests with a `cache_type` header narrowed to a path the plugin cannot serve would still produce orphan cache entries. ## Changes - **Early exit for unsupported search paths in `PreLLMHook`**: When `resolveCacheTypes` resolves to a path the plugin cannot actually serve (e.g. `x-bf-cache-type=semantic` against a direct-only plugin, or an unknown header value), the hook now clears cache state and returns early instead of proceeding to generate an embedding or write an orphan entry under a random request UUID that no future read can match. - **Separated cache-hit replay handling from write-skip logic**: The `shouldSkipCaching` method (which conflated cache-hit detection with write-skip conditions) is replaced by `shouldSkipCacheWrite`. Cache-hit replay is now handled as a dedicated early return in `PostLLMHook` before any telemetry stamping, while `shouldSkipCacheWrite` gates only the write decision after telemetry is already stamped. This ensures `cache_debug` is always populated for callers using `no-store` or large-payload modes. - **Telemetry stamped before write decision**: `stampCacheDebugForMiss` is now called before `shouldSkipCacheWrite` is consulted, so observability is not conditional on whether the entry is ultimately written. - **Non-positive TTL overrides fall back to plugin default**: `resolveTTL` now treats a zero or negative per-request TTL override as "use default" rather than applying it, which would have set `expires_at=now` and silently discarded the cache write. - **Cleaned up stale comments**: Removed an outdated ordering constraint comment in `PostLLMHook` that no longer applies after the restructuring. - **Tests updated**: Test cases for `shouldSkipCaching` are renamed and updated to reflect the new `shouldSkipCacheWrite` contract. The cache-hit replay test case is removed from this suite (it is now an early return in `PostLLMHook`, not a condition inside the helper). A new default-is-false test is added. ## Type of change - [x] Bug fix - [ ] Feature - [x] Refactor - [ ] Documentation - [ ] Chore/CI ## Affected areas - [ ] Core (Go) - [ ] Transports (HTTP) - [ ] Providers/Integrations - [x] Plugins - [ ] UI (React) - [ ] Docs ## How to test ```sh go test ./plugins/semanticcache/... ``` Validate the following scenarios: - A request with `x-bf-cache-type=semantic` against a plugin configured with `Provider=""` or `Dimension=1` should log a warning and skip caching entirely — no orphan entry should appear in the store. - A request with `Cache-Control: no-store` should still produce a populated `cache_debug` field in the response with `cache_hit=false`. - A per-request TTL override of `0s` should fall back to the plugin's configured default TTL and not silently discard the cache write. ## Breaking changes - [x] No ## Security considerations None. ## Checklist - [ ] I read `docs/contributing/README.md` and followed the guidelines - [x] I added/updated tests where appropriate - [ ] I updated documentation where needed - [x] I verified builds succeed (Go and UI) - [ ] I verified the CI pipeline passes locally if applicable --- plugins/semanticcache/main.go | 114 ++++++++++++++------- plugins/semanticcache/plugin_paths_test.go | 46 ++++----- 2 files changed, 96 insertions(+), 64 deletions(-) diff --git a/plugins/semanticcache/main.go b/plugins/semanticcache/main.go index 9fefc497f2..62695b75ac 100644 --- a/plugins/semanticcache/main.go +++ b/plugins/semanticcache/main.go @@ -360,6 +360,33 @@ func (plugin *Plugin) PreLLMHook(ctx *schemas.BifrostContext, req *schemas.Bifro performDirectSearch, performSemanticSearch := plugin.resolveCacheTypes(ctx) + // If neither search path can produce a lookup in the current plugin + // configuration, skip caching entirely (no read, no write). Concretely: + // - x-bf-cache-type=semantic against a direct-only plugin (Provider="", + // Dimension=1) — generateEmbedding would fail with "provider is + // required", PostLLMHook would still write an orphan entry under a + // random request UUID that no future read can find. + // - x-bf-cache-type=direct against a misconfigured semantic-only plugin + // where direct search is disabled. + // - An unknown cache-type header value (resolveCacheTypes returns false + // for both paths). + // The embedding executor alone isn't a sufficient gate — the framework + // wires it on every plugin, but the plugin's config decides whether + // semantic search is actually viable. + canDoSemanticSearch := plugin.embeddingRequestExecutor != nil && + plugin.config.Provider != "" && + plugin.config.EmbeddingModel != "" && + plugin.config.Dimension > 1 && + req.EmbeddingRequest == nil && + req.TranscriptionRequest == nil + if !performDirectSearch && (!performSemanticSearch || !canDoSemanticSearch) { + plugin.clearCacheState(requestID) + msg := "skipping cache: no search path available for this request (cache_type narrowed to a path that the current plugin configuration cannot serve)" + plugin.logger.Warn(msg) + ctx.Log(schemas.LogLevelWarn, msg) + return req, nil, nil + } + // Compute metadata + paramsHash once and reuse across both search paths. metadata, err := plugin.buildRequestMetadataForCaching(state, req) if err != nil { @@ -387,12 +414,11 @@ func (plugin *Plugin) PreLLMHook(ctx *schemas.BifrostContext, req *schemas.Bifro } if performSemanticSearch { - // Suppress semantic for ineligible cases (no executor, or request - // types whose input cannot itself be embedded). - semanticEligible := plugin.embeddingRequestExecutor != nil && - req.EmbeddingRequest == nil && - req.TranscriptionRequest == nil - if !semanticEligible { + // Reuse canDoSemanticSearch so the default cache-type path (both flags + // true) applies the same provider/model/dimension gate as the explicit + // semantic-only path — otherwise a misconfigured plugin wastes one + // generateEmbedding round-trip per request before failing downstream. + if !canDoSemanticSearch { plugin.setZeroVectorIfRequired(state) } else { shortCircuit, err := plugin.performSemanticSearch(ctx, state, req, cacheKey, paramsHash) @@ -486,10 +512,7 @@ func (plugin *Plugin) PostLLMHook(ctx *schemas.BifrostContext, res *schemas.Bifr // Final-chunk signaling for cache replays: stampCacheDebugForHit only // stamps CacheDebug.CacheHit=true on the LAST replay chunk (see search.go). // When we see that stamp, we set the stream-end indicator on the root ctx - // synchronously — same goroutine as the rest of the post-hook chain. This - // MUST run before shouldSkipCaching, otherwise we early-return without - // setting the indicator and downstream plugins (logging) never see - // isFinalChunk=true on the final replay chunk. + // synchronously — same goroutine as the rest of the post-hook chain. // // Why not set the indicator from the cache replay goroutine instead? It // races: the producer can advance to its next iteration (and SetValue) @@ -498,15 +521,11 @@ func (plugin *Plugin) PostLLMHook(ctx *schemas.BifrostContext, res *schemas.Bifr if bifrost.IsStreamRequestType(requestType) && cacheDebug != nil && cacheDebug.CacheHit { ctx.SetValue(schemas.BifrostContextKeyStreamEndIndicator, true) } - if plugin.shouldSkipCaching(ctx, res) { - // Clear state on the same condition the defer at the end of this - // function uses — otherwise the early return below leaks *cacheState - // (notably the ~6 KB Embeddings slice) until the periodic reaper - // runs. Non-final stream chunks of an in-flight short-circuit replay - // keep their state because they need it for later chunks. - if !bifrost.IsStreamRequestType(requestType) || bifrost.IsFinalChunk(ctx) { - plugin.clearCacheState(requestID) - } + // Cache hit replay: cache_debug was already stamped in PreLLMHook by + // stampCacheDebugForHit. There's nothing further to do here — no new + // telemetry to stamp, no write to perform. + if cacheDebug != nil && cacheDebug.CacheHit { + plugin.clearCacheState(requestID) return res, nil, nil } @@ -523,8 +542,8 @@ func (plugin *Plugin) PostLLMHook(ctx *schemas.BifrostContext, res *schemas.Bifr if state == nil || state.ParamsHash == "" { // PreLLMHook bailed before computing the params hash (unsupported // request type, conversation-history threshold, metadata error, - // etc.). Caching now would write an entry without params_hash that - // no future lookup can match. + // no-search-path narrow, etc.). Without state we have no telemetry + // to stamp and no entry to write. return res, nil, nil } @@ -536,20 +555,32 @@ func (plugin *Plugin) PostLLMHook(ctx *schemas.BifrostContext, res *schemas.Bifr } }() - // PreLLMHook short-circuited from cache; chunks here are the cached - // replay, not a fresh upstream response. shouldSkipCaching only catches - // the FINAL chunk (the only one carrying CacheDebug.CacheHit=true via - // stampCacheDebugForHit) — without this guard the non-final chunks - // would slip into addStreamingResponse and trigger a duplicate write - // at the same directCacheID (Weaviate 422 "id already exists"). + // PreLLMHook short-circuited from cache (non-final stream chunks of a + // replay land here). Telemetry is already stamped on the final chunk by + // stampCacheDebugForHit; non-final chunks have no telemetry to add. + // Without this guard non-final chunks would slip into addStreamingResponse + // and trigger a duplicate write at the same directCacheID + // (Weaviate 422 "id already exists"). if state.ShortCircuited { return res, nil, nil } storageID, embedding, shouldStoreEmbeddings := plugin.resolveStorageIDAndEmbedding(ctx, state, requestID, requestType) + // Stamp cache_debug telemetry FIRST so callers can observe that the + // plugin ran a lookup, regardless of whether we then choose to skip + // writing the entry (no-store header, large-payload modes, etc.). + // Observability shouldn't depend on the write decision — that was + // previously the case and made the cache layer invisible to callers + // using no-store. plugin.stampCacheDebugForMiss(state, extraFields, storageID, isStream, isFinalChunk) + // Now decide whether to actually write. Skipping the write still + // leaves cache_debug stamped above. + if plugin.shouldSkipCacheWrite(ctx) { + return res, nil, nil + } + cacheTTL := plugin.resolveTTL(ctx) paramsHash := state.ParamsHash @@ -579,19 +610,20 @@ func (plugin *Plugin) PostLLMHook(ctx *schemas.BifrostContext, res *schemas.Bifr return res, nil, nil } -// shouldSkipCaching returns true if the response cannot or should not be -// written to the cache (large payload mode, cache hit replay, or explicit -// no-store). -func (plugin *Plugin) shouldSkipCaching(ctx *schemas.BifrostContext, res *schemas.BifrostResponse) bool { +// shouldSkipCacheWrite returns true if the upstream response should NOT be +// written to the cache store. Telemetry (cache_debug) is stamped before this +// is consulted, so callers retain observability on misses even when no_store +// or large-payload modes are in effect. The cache-hit-replay case is handled +// separately as an early return in PostLLMHook because it must short-circuit +// before stamping (cache_debug for hits is already populated by +// stampCacheDebugForHit during PreLLMHook). +func (plugin *Plugin) shouldSkipCacheWrite(ctx *schemas.BifrostContext) bool { if isLargePayload, ok := ctx.Value(schemas.BifrostContextKeyLargePayloadMode).(bool); ok && isLargePayload { return true } if isLargeResponse, ok := ctx.Value(schemas.BifrostContextKeyLargeResponseMode).(bool); ok && isLargeResponse { return true } - if cacheDebug := res.GetExtraFields().CacheDebug; cacheDebug != nil && cacheDebug.CacheHit { - return true - } if noStore, ok := ctx.Value(CacheNoStoreKey).(bool); ok && noStore { return true } @@ -650,13 +682,21 @@ func (plugin *Plugin) stampCacheDebugForMiss(state *cacheState, extraFields *sch } } -// resolveTTL returns the per-request TTL override if present, else the plugin default. +// resolveTTL returns the per-request TTL override if present, else the plugin +// default. A non-positive override (0 or negative) is treated as "use default" +// to mirror how Config.UnmarshalJSON + Init treat TTL=0 at construction time — +// otherwise a header of "0s" would yield expires_at=now and silently kill the +// cache write for the affected request, which is rarely what the caller wants. func (plugin *Plugin) resolveTTL(ctx *schemas.BifrostContext) time.Duration { if v := ctx.Value(CacheTTLKey); v != nil { if ttl, ok := v.(time.Duration); ok { - return ttl + if ttl > 0 { + return ttl + } + plugin.logger.Debug("ignoring non-positive per-request TTL override %v, falling back to plugin default", ttl) + } else { + plugin.logger.Warn("TTL is not a time.Duration, using default TTL") } - plugin.logger.Warn("TTL is not a time.Duration, using default TTL") } return plugin.config.TTL } diff --git a/plugins/semanticcache/plugin_paths_test.go b/plugins/semanticcache/plugin_paths_test.go index fc6f54920b..a1de790276 100644 --- a/plugins/semanticcache/plugin_paths_test.go +++ b/plugins/semanticcache/plugin_paths_test.go @@ -57,59 +57,51 @@ func TestPostLLMHook_SkipsOnBifrostError(t *testing.T) { } // ----------------------------------------------------------------------------- -// shouldSkipCaching paths +// shouldSkipCacheWrite paths +// +// shouldSkipCacheWrite gates only the cache WRITE — cache_debug telemetry is +// stamped before this is consulted (see PostLLMHook). The cache-hit replay +// case is handled separately as an early return in PostLLMHook and is not +// exercised here. // ----------------------------------------------------------------------------- -func TestShouldSkipCaching_LargePayloadMode(t *testing.T) { +func TestShouldSkipCacheWrite_LargePayloadMode(t *testing.T) { plugin := newTestPlugin(t, newObservableStore()) ctx := newBaseTestContext() ctx.SetValue(schemas.BifrostContextKeyLargePayloadMode, true) - res := &schemas.BifrostResponse{ChatResponse: &schemas.BifrostChatResponse{}} - if !plugin.shouldSkipCaching(ctx, res) { - t.Fatal("expected LargePayloadMode to skip caching") + if !plugin.shouldSkipCacheWrite(ctx) { + t.Fatal("expected LargePayloadMode to skip the cache write") } } -func TestShouldSkipCaching_LargeResponseMode(t *testing.T) { +func TestShouldSkipCacheWrite_LargeResponseMode(t *testing.T) { plugin := newTestPlugin(t, newObservableStore()) ctx := newBaseTestContext() ctx.SetValue(schemas.BifrostContextKeyLargeResponseMode, true) - res := &schemas.BifrostResponse{ChatResponse: &schemas.BifrostChatResponse{}} - if !plugin.shouldSkipCaching(ctx, res) { - t.Fatal("expected LargeResponseMode to skip caching") + if !plugin.shouldSkipCacheWrite(ctx) { + t.Fatal("expected LargeResponseMode to skip the cache write") } } -func TestShouldSkipCaching_CacheHitReplay(t *testing.T) { +func TestShouldSkipCacheWrite_NoStoreFlag(t *testing.T) { plugin := newTestPlugin(t, newObservableStore()) ctx := newBaseTestContext() - res := &schemas.BifrostResponse{ - ChatResponse: &schemas.BifrostChatResponse{ - ExtraFields: schemas.BifrostResponseExtraFields{ - CacheDebug: &schemas.BifrostCacheDebug{CacheHit: true}, - }, - }, - } + ctx.SetValue(CacheNoStoreKey, true) - if !plugin.shouldSkipCaching(ctx, res) { - t.Fatal("expected cache-hit replay to skip re-caching") + if !plugin.shouldSkipCacheWrite(ctx) { + t.Fatal("expected CacheNoStoreKey=true to skip the cache write") } } -func TestShouldSkipCaching_NoStoreFlag(t *testing.T) { +func TestShouldSkipCacheWrite_DefaultIsFalse(t *testing.T) { plugin := newTestPlugin(t, newObservableStore()) - - ctx := newBaseTestContext() - ctx.SetValue(CacheNoStoreKey, true) - res := &schemas.BifrostResponse{ChatResponse: &schemas.BifrostChatResponse{}} - - if !plugin.shouldSkipCaching(ctx, res) { - t.Fatal("expected CacheNoStoreKey=true to skip caching") + if plugin.shouldSkipCacheWrite(newBaseTestContext()) { + t.Fatal("expected default context to allow the cache write") } } From 8e4684ab6f3ed0db6049bc15aa0457dd50010787 Mon Sep 17 00:00:00 2001 From: Pratham Mishra <99235987+Pratham-Mishra04@users.noreply.github.com> Date: Wed, 13 May 2026 01:13:45 +0530 Subject: [PATCH 14/81] test: add semantic cache e2e test suite skeleton (#3425) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Adds a standalone end-to-end test suite for the `semantic_cache` plugin under `tests/semanticcache`. The suite validates the full caching lifecycle against a live Bifrost instance — plugin creation/teardown, cache miss/hit assertions, cross-provider behavior, streaming, and log cross-checking — without provisioning any infrastructure itself. ## Changes - **`e2e_test.go`** — `TestMain` entry point: loads config, initializes the report directory, checks Bifrost reachability, enforces plugin-absent precondition (with `RUN_FORCE=1` auto-delete), runs all phases, and performs best-effort teardown on exit. - **`preconditions_test.go`** — Phase 0 checks: Bifrost reachable, OpenAI configured, optional providers (Gemini, Anthropic) present with warnings if absent. - **`http_test.go`** — HTTP helpers for all request types: chat completions (streaming and non-streaming), text completions, embeddings, image generation, and the Responses API. Each helper dumps full request/response bodies to the report directory for forensics. - **`plugin_test.go`** — Plugin lifecycle helpers (`pluginCreate`, `pluginUpdate`, `pluginDelete`, `pluginGet`) mirroring the exact wire format the UI sends to `/api/plugins`. - **`assert_test.go`** — Assertion helpers (`assertMiss`, `assertHit`, `assertNoCacheDebug`, `assertSameCacheID`, `assertDifferentCacheID`) plus a configurable async write-settle wait (`SC_WRITE_SETTLE_MS`) to account for the plugin's async PostLLMHook store write. - **`cache_test.go`** — Cache management helpers (`clearByCacheID`, `clearByCacheKey`) wrapping the `/api/cache/clear/*` endpoints. - **`logs_crosscheck_test.go`** — Cross-checks the persisted log row's `cache_debug` against the in-flight response stamp, with polling to handle Bifrost's async logging pipeline and float epsilon tolerance for JSON encoder differences. - **`fixtures_test.go`** — Hand-curated paraphrase pairs for Phase 2 semantic cases, designed to land well above (canonical→paraphrase) or well below (canonical→unrelated) the default 0.8 similarity threshold. - **`log_test.go`** — Structured per-run logging to `reports//run.log` with optional `TRAIL_SESSION_ID` stamping for trail integration. - **`go.mod`** — Standalone module (`github.com/maximhq/bifrost/tests/semanticcache`), consistent with the `tests/governance` pattern, excluded from the repo's `go.work`. - **`README.md`** — Documents prerequisites, env vars, run commands, trail integration, and report output format. - **`.gitignore`** — Excludes `reports/` and `*.log` from version control. Notable design decisions: the suite is intentionally verify-only (no infrastructure provisioning), uses a dedicated vector store namespace (`BifrostSemanticCachePluginE2E`) to isolate test data, and writes full wire-level request/response artifacts per step to support post-mortem debugging without re-running. ## Type of change - [ ] Bug fix - [ ] Feature - [ ] Refactor - [ ] Documentation - [x] Chore/CI ## Affected areas - [ ] Core (Go) - [ ] Transports (HTTP) - [ ] Providers/Integrations - [x] Plugins - [ ] UI (React) - [ ] Docs ## How to test Requires a running Bifrost instance with Weaviate configured, OpenAI (required), and optionally Gemini and Anthropic providers. ```sh cd tests/semanticcache # All phases GOWORK=off go test -v ./... # Single phase GOWORK=off go test -v -run TestPhase1_DirectOnly ./... # Auto-delete any pre-existing plugin row before run RUN_FORCE=1 GOWORK=off go test -v ./... # Keep plugin after run for post-mortem inspection RUN_KEEP_PLUGIN=1 GOWORK=off go test -v ./... ``` Environment variables: | Variable | Default | Purpose | |---|---|---| | `BIFROST_URL` | `http://localhost:8080` | Bifrost base URL | | `SC_CHAT_MODEL_OPENAI` | `openai/gpt-4o-mini` | OpenAI chat model | | `SC_CHAT_MODEL_OPENAI_ALT` | `openai/gpt-4o` | Alternate OpenAI model for cache-by-model cases | | `SC_EMBED_MODEL_OPENAI` | `text-embedding-3-small` | Embedding model for Phase 2 | | `SC_CHAT_MODEL_GEMINI` | `gemini/gemini-2.5-flash` | Gemini chat model | | `SC_CHAT_MODEL_ANTHROPIC` | `anthropic/claude-haiku-4-5` | Anthropic chat model | | `SC_NAMESPACE` | `BifrostSemanticCachePluginE2E` | Vector store namespace | | `SC_WRITE_SETTLE_MS` | `500` | Async write settle wait in ms | | `RUN_FORCE` | unset | `1` to delete pre-existing plugin before run | | `RUN_KEEP_PLUGIN` | unset | `1` to skip teardown on exit | | `TRAIL_SESSION_ID` | unset | Stamped onto every log line for trail integration | ## Screenshots/Recordings N/A ## Breaking changes - [x] No ## Related issues N/A ## Security considerations No secrets are stored in the test suite. API keys are consumed from the existing Bifrost provider configuration and never passed directly through the test harness. ## Checklist - [ ] I read `docs/contributing/README.md` and followed the guidelines - [x] I added/updated tests where appropriate - [x] I updated documentation where needed - [x] I verified builds succeed (Go and UI) - [ ] I verified the CI pipeline passes locally if applicable --- tests/semanticcache/.gitignore | 2 + tests/semanticcache/README.md | 77 +++ tests/semanticcache/assert_test.go | 144 +++++ tests/semanticcache/cache_test.go | 42 ++ tests/semanticcache/e2e_test.go | 141 ++++ tests/semanticcache/fixtures_test.go | 100 +++ tests/semanticcache/go.mod | 3 + tests/semanticcache/http_test.go | 674 ++++++++++++++++++++ tests/semanticcache/log_test.go | 95 +++ tests/semanticcache/logs_crosscheck_test.go | 188 ++++++ tests/semanticcache/plugin_test.go | 212 ++++++ tests/semanticcache/preconditions_test.go | 72 +++ 12 files changed, 1750 insertions(+) create mode 100644 tests/semanticcache/.gitignore create mode 100644 tests/semanticcache/README.md create mode 100644 tests/semanticcache/assert_test.go create mode 100644 tests/semanticcache/cache_test.go create mode 100644 tests/semanticcache/e2e_test.go create mode 100644 tests/semanticcache/fixtures_test.go create mode 100644 tests/semanticcache/go.mod create mode 100644 tests/semanticcache/http_test.go create mode 100644 tests/semanticcache/log_test.go create mode 100644 tests/semanticcache/logs_crosscheck_test.go create mode 100644 tests/semanticcache/plugin_test.go create mode 100644 tests/semanticcache/preconditions_test.go diff --git a/tests/semanticcache/.gitignore b/tests/semanticcache/.gitignore new file mode 100644 index 0000000000..6c7f6431d4 --- /dev/null +++ b/tests/semanticcache/.gitignore @@ -0,0 +1,2 @@ +reports/ +*.log diff --git a/tests/semanticcache/README.md b/tests/semanticcache/README.md new file mode 100644 index 0000000000..3a4d087984 --- /dev/null +++ b/tests/semanticcache/README.md @@ -0,0 +1,77 @@ +# Semantic Cache E2E + +End-to-end test suite for the `semantic_cache` plugin. See `PLAN.md` for the full case list. + +## Prerequisites + +The suite assumes a properly-provisioned test environment — it verifies but does not provision. + +- **Bifrost running** at `BIFROST_URL` (default `http://localhost:8080`). Required endpoints: `/api/plugins/*`, `/api/cache/*`, `/api/providers`, `/api/logs/{id}`, `/v1/chat/completions`. +- **Vector store** configured in `config.json`, type **`weaviate`**, reachable from Bifrost. The plugin will create/use namespace `BifrostSemanticCachePluginE2E` by default (override via `SC_NAMESPACE`). +- **Providers configured with API keys**: + - **OpenAI** — required. Must have a chat model (default `openai/gpt-4o-mini`), an alternate chat model used in cache-by-model cases (default `openai/gpt-4o`), and the embedding model `text-embedding-3-small` (used in Phase 2). + - **Gemini** — optional. When absent, cross-provider cases are skipped with a `WARN` in `0.3_optional_providers`. Chat model: default `gemini/gemini-2.5-flash`. + - **Anthropic** — optional. Same behavior as Gemini: absence skips cross-provider cases instead of aborting. Chat model: default `anthropic/claude-haiku-4-5`. +- **`semantic_cache` plugin must be ABSENT** at run start. Set `RUN_FORCE=1` to auto-delete a pre-existing row before the run. + +## Running + +```bash +# All phases (recommended) +GOWORK=off go test -v ./... + +# Single phase +GOWORK=off go test -v -run TestPhase1_DirectOnly ./... + +# Single case +GOWORK=off go test -v -run TestPhase1_DirectOnly/1.1_exact_match_chat ./... + +# Auto-delete pre-existing plugin row +RUN_FORCE=1 GOWORK=off go test -v ./... + +# Keep the plugin around for post-mortem +RUN_KEEP_PLUGIN=1 GOWORK=off go test -v ./... +``` + +`GOWORK=off` is required because this module isn't in the repo's `go.work` (test modules under `tests/*` follow the same pattern as `tests/governance` — standalone). + +## Env vars + +| var | default | purpose | +| --- | --- | --- | +| `BIFROST_URL` | `http://localhost:8080` | Bifrost base URL | +| `SC_CHAT_MODEL_OPENAI` | `openai/gpt-4o-mini` | OpenAI chat model used in cases | +| `SC_CHAT_MODEL_OPENAI_ALT` | `openai/gpt-4o` | second OpenAI chat model for cache-by-model cases | +| `SC_EMBED_MODEL_OPENAI` | `text-embedding-3-small` | embedding model for Phase 2 | +| `SC_CHAT_MODEL_GEMINI` | `gemini/gemini-2.5-flash` | Gemini chat model | +| `SC_CHAT_MODEL_ANTHROPIC` | `anthropic/claude-haiku-4-5` | Anthropic chat model | +| `SC_NAMESPACE` | `BifrostSemanticCachePluginE2E` | vector store namespace (isolates test data from prod) | +| `RUN_FORCE` | unset | `1` → delete pre-existing plugin row before run | +| `RUN_KEEP_PLUGIN` | unset | `1` → skip teardown DELETE on exit | +| `TRAIL_SESSION_ID` | unset | stamped onto every log line when running under `trail` | + +## Trail integration + +Start Bifrost under `trail`, capture the session id, export it, then run: + +```bash +trail run --label semantic-cache-e2e -- ./bifrost-http -port 8080 -config config.json +# capture the printed session id, then in another shell: +export TRAIL_SESSION_ID= +RUN_FORCE=1 GOWORK=off go test -v ./... +``` + +Every log line carries `trail_sid=`, so a single `trail get_logs` call with that session id reconstructs both the test harness output and the Bifrost stdout for the run. + +## Output + +Each run writes to `reports//`: +- `run.log` — one structured line per step (mirrors `t.Logf` output) +- `p--s.req.json` / `.resp.json` — full request/response bodies for forensics +- `*.plugin_create.req.json` / `.plugin_update.req.json` — exact wire bodies sent to `/api/plugins` (for parity audit against the UI) + +On any FAIL the matching `*.resp.json` and `run.log` line carry enough info to grep via `trail` (look for `bifrost_req_id=` or `[SC-E2E] case=`). + +## What's implemented so far + +Skeleton + Phase 0 preconditions + Phase 1 smallest viable loop (cases 1.1, 1.2, 1.3, 1.13). See `PLAN.md` §11 for the full implementation roadmap. diff --git a/tests/semanticcache/assert_test.go b/tests/semanticcache/assert_test.go new file mode 100644 index 0000000000..4c47e29e8d --- /dev/null +++ b/tests/semanticcache/assert_test.go @@ -0,0 +1,144 @@ +package semanticcache + +import ( + "os" + "strconv" + "testing" + "time" +) + +// cacheWriteSettle is the gap we wait between a miss-and-store request and a +// subsequent expected-hit, since PostLLMHook writes to the vector store in a +// goroutine (main.go:553-569) — the HTTP response returns before the write +// commits. 500ms covers typical Weaviate write latency including first-write +// cold start. Override via SC_WRITE_SETTLE_MS for environments with slower stores. +var cacheWriteSettle = func() time.Duration { + if v := os.Getenv("SC_WRITE_SETTLE_MS"); v != "" { + if ms, err := strconv.Atoi(v); err == nil && ms > 0 { + return time.Duration(ms) * time.Millisecond + } + } + return 500 * time.Millisecond +}() + +// waitForCacheWrite pauses long enough for the plugin's async PostLLMHook +// store write to commit before a follow-up read. Logged so timing is visible +// in run.log. +func waitForCacheWrite(t *testing.T, lc logCtx, step int) { + t.Helper() + logf(t, lc.at(step), "INFO", "wait_for_cache_write", map[string]any{ + "settle_ms": cacheWriteSettle.Milliseconds(), + }) + time.Sleep(cacheWriteSettle) +} + +// cacheDebugged is implemented by any HTTP response type that carries +// `extra_fields.cache_debug`. Lets the assertion helpers work across chat, +// text-completion, responses, embedding, image-gen, etc. without per-type +// duplication. +type cacheDebugged interface { + cacheDebug() *cacheDebug +} + +// assertMiss verifies the response is a cache miss with a non-empty cache_id stamped. +// cache_debug must be present (plugin ran), CacheHit must be false, cache_id must be set. +func assertMiss(t *testing.T, lc logCtx, step int, resp cacheDebugged) string { + t.Helper() + cd := resp.cacheDebug() + if cd == nil { + logf(t, lc.at(step), "FAIL", "assert_miss", map[string]any{"reason": "cache_debug absent"}) + t.Fatalf("expected miss with cache_debug stamped; cache_debug is nil") + } + if cd.CacheHit { + logf(t, lc.at(step), "FAIL", "assert_miss", map[string]any{"cache_hit": true}) + t.Fatalf("expected miss, got cache_hit=true cache_id=%s", deref(cd.CacheID)) + } + if cd.CacheID == nil || *cd.CacheID == "" { + logf(t, lc.at(step), "FAIL", "assert_miss", map[string]any{"reason": "cache_id empty on miss"}) + t.Fatalf("expected cache_id stamped on miss; got nil/empty") + } + logf(t, lc.at(step), "PASS", "assert_miss", map[string]any{"cache_id": *cd.CacheID}) + return *cd.CacheID +} + +// assertHit verifies the response is a cache hit with the expected hit_type. +// Returns the cache_id for further chaining (e.g. same_cache_id checks). +func assertHit(t *testing.T, lc logCtx, step int, resp cacheDebugged, wantType string) string { + t.Helper() + cd := resp.cacheDebug() + if cd == nil { + logf(t, lc.at(step), "FAIL", "assert_hit", map[string]any{"reason": "cache_debug absent"}) + t.Fatalf("expected hit with cache_debug stamped; cache_debug is nil") + } + if !cd.CacheHit { + logf(t, lc.at(step), "FAIL", "assert_hit", map[string]any{"cache_hit": false}) + t.Fatalf("expected hit, got cache_hit=false cache_id=%s", deref(cd.CacheID)) + } + if wantType != "" { + if cd.HitType == nil || *cd.HitType != wantType { + logf(t, lc.at(step), "FAIL", "assert_hit_type", map[string]any{ + "want": wantType, "got": deref(cd.HitType), + }) + t.Fatalf("expected hit_type=%q, got %q", wantType, deref(cd.HitType)) + } + } + if cd.CacheID == nil || *cd.CacheID == "" { + t.Fatalf("expected cache_id stamped on hit; got nil/empty") + } + if cd.CacheHitLatency == nil { + t.Logf("warning: cache_hit_latency not stamped on hit") + } + logf(t, lc.at(step), "PASS", "assert_hit", map[string]any{ + "cache_id": *cd.CacheID, + "hit_type": deref(cd.HitType), + "latency": derefInt64(cd.CacheHitLatency), + }) + return *cd.CacheID +} + +// assertNoCacheDebug verifies the plugin did NOT run (no cache_debug stamped). +// Used for plugin-disabled and skipped-caching cases. +func assertNoCacheDebug(t *testing.T, lc logCtx, step int, resp cacheDebugged) { + t.Helper() + cd := resp.cacheDebug() + if cd != nil { + logf(t, lc.at(step), "FAIL", "assert_no_cache_debug", map[string]any{ + "cache_hit": cd.CacheHit, + "cache_id": deref(cd.CacheID), + }) + t.Fatalf("expected no cache_debug, got cache_hit=%v cache_id=%s", cd.CacheHit, deref(cd.CacheID)) + } + logf(t, lc.at(step), "PASS", "assert_no_cache_debug", nil) +} + +func assertSameCacheID(t *testing.T, lc logCtx, step int, got, want string) { + t.Helper() + if got != want { + logf(t, lc.at(step), "FAIL", "assert_same_cache_id", map[string]any{"want": want, "got": got}) + t.Fatalf("expected same cache_id %q, got %q", want, got) + } + logf(t, lc.at(step), "PASS", "assert_same_cache_id", map[string]any{"cache_id": got}) +} + +func assertDifferentCacheID(t *testing.T, lc logCtx, step int, a, b string) { + t.Helper() + if a == b { + logf(t, lc.at(step), "FAIL", "assert_diff_cache_id", map[string]any{"cache_id": a}) + t.Fatalf("expected different cache_ids, both = %q", a) + } + logf(t, lc.at(step), "PASS", "assert_diff_cache_id", map[string]any{"a": a, "b": b}) +} + +func deref(p *string) string { + if p == nil { + return "" + } + return *p +} + +func derefInt64(p *int64) int64 { + if p == nil { + return 0 + } + return *p +} diff --git a/tests/semanticcache/cache_test.go b/tests/semanticcache/cache_test.go new file mode 100644 index 0000000000..222c74090a --- /dev/null +++ b/tests/semanticcache/cache_test.go @@ -0,0 +1,42 @@ +package semanticcache + +import ( + "net/http" + "net/url" + "testing" +) + +// clearByCacheID hits DELETE /api/cache/clear/{cacheId}. Returns the HTTP +// status code so callers in §3.3-style cases can assert specific contracts. +func clearByCacheID(t *testing.T, lc logCtx, step int, cacheID string) int { + t.Helper() + status, body, _, err := doJSON(t, "DELETE", "/api/cache/clear/"+url.PathEscape(cacheID), nil, nil) + if err != nil { + t.Fatalf("clearByCacheID http error: %v", err) + } + logf(t, lc.at(step), "INFO", "clear_by_id", map[string]any{ + "cache_id": cacheID, + "status": status, + }) + if status != http.StatusOK && status != http.StatusNotFound { + t.Logf("clearByCacheID body: %s", truncate(string(body), 200)) + } + return status +} + +// clearByCacheKey hits DELETE /api/cache/clear-by-key/{cacheKey}. +func clearByCacheKey(t *testing.T, lc logCtx, step int, key string) int { + t.Helper() + status, body, _, err := doJSON(t, "DELETE", "/api/cache/clear-by-key/"+url.PathEscape(key), nil, nil) + if err != nil { + t.Fatalf("clearByCacheKey http error: %v", err) + } + logf(t, lc.at(step), "INFO", "clear_by_key", map[string]any{ + "cache_key": key, + "status": status, + }) + if status != http.StatusOK && status != http.StatusNotFound { + t.Logf("clearByCacheKey body: %s", truncate(string(body), 200)) + } + return status +} diff --git a/tests/semanticcache/e2e_test.go b/tests/semanticcache/e2e_test.go new file mode 100644 index 0000000000..43309c96bf --- /dev/null +++ b/tests/semanticcache/e2e_test.go @@ -0,0 +1,141 @@ +package semanticcache + +import ( + "encoding/json" + "fmt" + "net/http" + "os" + "testing" +) + +// TestMain wires up the run: loads env-based config, sets up a per-run report +// directory, checks Bifrost reachability, ensures the plugin is absent (or +// deletes it under RUN_FORCE=1), then defers to the test functions. +// +// On exit, attempts a teardown DELETE so the env is clean for the next run +// — unless RUN_KEEP_PLUGIN=1. +func TestMain(m *testing.M) { + loadConfig() + if err := initLog(); err != nil { + fmt.Fprintf(os.Stderr, "init log failed: %v\n", err) + os.Exit(2) + } + exitCode := 1 + defer func() { + closeLog() + os.Exit(exitCode) + }() + + // Sanity: Bifrost reachable. + status, _, _, err := doRaw("GET", "/api/plugins") + if err != nil { + fmt.Fprintf(os.Stderr, "[SC-E2E] FATAL: cannot reach Bifrost at %s: %v\n", cfg.BifrostURL, err) + return + } + if status != http.StatusOK { + fmt.Fprintf(os.Stderr, "[SC-E2E] FATAL: GET /api/plugins returned %d (Bifrost up at %s?)\n", status, cfg.BifrostURL) + return + } + + // Plugin pre-check: must be absent unless RUN_FORCE=1. + status, body, _, err := doRaw("GET", "/api/plugins/"+pluginName) + if err != nil { + fmt.Fprintf(os.Stderr, "[SC-E2E] FATAL: pre-check GET /api/plugins/%s failed at %s: %v\n", + pluginName, cfg.BifrostURL, err) + return + } + if status == http.StatusOK { + if os.Getenv("RUN_FORCE") != "1" { + fmt.Fprintf(os.Stderr, + "[SC-E2E] FATAL: plugin %q already exists at %s. "+ + "Set RUN_FORCE=1 to delete it and proceed.\nGET body: %s\n", + pluginName, cfg.BifrostURL, truncate(string(body), 300)) + return + } + fmt.Fprintf(os.Stderr, "[SC-E2E] WARN: RUN_FORCE=1 → deleting pre-existing %q plugin\n", pluginName) + ds, dbody, _, derr := doRaw("DELETE", "/api/plugins/"+pluginName) + if derr != nil || (ds != http.StatusOK && ds != http.StatusNotFound) { + fmt.Fprintf(os.Stderr, "[SC-E2E] FATAL: cannot delete pre-existing plugin: status=%d err=%v body=%s\n", + ds, derr, truncate(string(dbody), 300)) + return + } + } + + fmt.Fprintf(os.Stderr, "[SC-E2E] run starting: bifrost=%s namespace=%s reports=%s trail_sid=%q\n", + cfg.BifrostURL, cfg.Namespace, runReportDir, trailSID) + + exitCode = m.Run() + + // Teardown — best-effort cleanup so the next run starts clean. + if os.Getenv("RUN_KEEP_PLUGIN") != "1" { + ds, _, _, _ := doRaw("DELETE", "/api/plugins/"+pluginName) + fmt.Fprintf(os.Stderr, "[SC-E2E] teardown: delete plugin → status=%d\n", ds) + } + fmt.Fprintf(os.Stderr, "[SC-E2E] run finished: exit=%d reports=%s\n", exitCode, runReportDir) +} + +// doRaw is a lightweight stdout-only HTTP helper for TestMain (no *testing.T available). +func doRaw(method, path string) (int, []byte, http.Header, error) { + req, err := http.NewRequest(method, cfg.BifrostURL+path, nil) + if err != nil { + return 0, nil, nil, err + } + resp, err := cfg.HTTPClient.Do(req) + if err != nil { + return 0, nil, nil, err + } + defer resp.Body.Close() + var b []byte + if resp.Body != nil { + b, _ = readAllSafe(resp.Body) + } + return resp.StatusCode, b, resp.Header, nil +} + +func readAllSafe(r interface{ Read([]byte) (int, error) }) ([]byte, error) { + buf := make([]byte, 0, 4096) + tmp := make([]byte, 4096) + for { + n, err := r.Read(tmp) + if n > 0 { + buf = append(buf, tmp[:n]...) + } + if err != nil { + if err.Error() == "EOF" { + return buf, nil + } + return buf, err + } + } +} + +// providersList fetches the configured providers; used by Phase 0 checks. +type providerSummary struct { + Name string `json:"name"` +} + +func providersList(t *testing.T, lc logCtx, step int) []providerSummary { + t.Helper() + status, body, _, err := doJSON(t, "GET", "/api/providers", nil, nil) + if err != nil { + t.Fatalf("providersList: %v", err) + } + if status != http.StatusOK { + t.Fatalf("providersList status=%d body=%s", status, truncate(string(body), 300)) + } + // /api/providers returns {providers: [...]} based on convention. + var wrap struct { + Providers []providerSummary `json:"providers"` + } + if err := json.Unmarshal(body, &wrap); err == nil && wrap.Providers != nil { + logf(t, lc.at(step), "INFO", "providers_list", map[string]any{"count": len(wrap.Providers)}) + return wrap.Providers + } + // Fallback: response may be a bare list. + var bare []providerSummary + if err := json.Unmarshal(body, &bare); err != nil { + t.Fatalf("providersList decode: %v\nbody=%s", err, truncate(string(body), 500)) + } + logf(t, lc.at(step), "INFO", "providers_list", map[string]any{"count": len(bare)}) + return bare +} diff --git a/tests/semanticcache/fixtures_test.go b/tests/semanticcache/fixtures_test.go new file mode 100644 index 0000000000..f590ccbb21 --- /dev/null +++ b/tests/semanticcache/fixtures_test.go @@ -0,0 +1,100 @@ +package semanticcache + +import "testing" + +// paraphrasePair holds two prompts that are SEMANTICALLY equivalent (near +// rephrasings, expected cosine ≥ ~0.92 with text-embedding-3-small) plus an +// UNRELATED prompt from a completely different domain (expected cosine +// ≤ ~0.4). The gap from the default 0.8 threshold is intentionally large +// on both sides so Phase 2 hit/miss assertions never sit on a flaky +// boundary. +// +// Pair design rules (when adding new ones): +// - Canonical vs Paraphrase: only swap 1-2 words/phrases (e.g. "What is" +// ↔ "Tell me"), keep ALL content nouns and proper nouns identical, keep +// overall sentence shape. This pushes cosine into 0.92-0.97. +// - Unrelated: pick a topic from a completely different domain (cooking +// vs astronomy, history vs electronics, etc.). Single-domain switches +// ("dogs" ↔ "cats") creep up to 0.6+ and would be flaky. +// - Sentences should be long enough (>= ~8 content words) that small +// wording changes don't dominate the embedding. +type paraphrasePair struct { + Name string + Canonical string + Paraphrase string + Unrelated string +} + +// paraphrasePairs is the chat/text-paraphrase corpus used by Phase 2 semantic +// cases. Each pair is hand-curated to land WELL above (canonical→paraphrase) +// or WELL below (canonical→unrelated) the default 0.8 threshold. +var paraphrasePairs = []paraphrasePair{ + { + Name: "capital_france", + Canonical: "What is the capital city of France in modern times?", + Paraphrase: "Tell me the capital city of France in modern times.", + Unrelated: "Explain how a transistor works at the silicon level.", + }, + { + Name: "boiling_water", + Canonical: "At what temperature does pure water boil at sea level?", + Paraphrase: "What is the boiling point of pure water at sea level?", + Unrelated: "Recommend a well-known jazz album recorded in the 1960s.", + }, + { + Name: "vinaigrette", + Canonical: "How do I make a basic vinaigrette salad dressing at home?", + Paraphrase: "What are the steps to make a basic vinaigrette salad dressing at home?", + Unrelated: "Describe quantum entanglement in a single paragraph for a beginner.", + }, + { + Name: "opera_composer", + Canonical: "Name a famous Italian opera composer from the nineteenth century.", + Paraphrase: "Tell me one famous Italian opera composer from the nineteenth century.", + Unrelated: "What is the average distance from Earth to the planet Mars?", + }, + { + Name: "photosynthesis", + Canonical: "Briefly explain how photosynthesis works in green plants.", + Paraphrase: "In a few sentences, describe how photosynthesis works in green plants.", + Unrelated: "How do you knit a basic scarf using stockinette stitch?", + }, +} + +// imagePromptPairs is the image-generation paraphrase corpus used by Phase 2 +// case 2.25 (image_gen_semantic_paraphrase). Image prompts tend to be shorter +// than chat prompts so we leave the content nouns identical and only vary +// modifiers slightly. +var imagePromptPairs = []paraphrasePair{ + { + Name: "red_apple", + Canonical: "A bright red apple sitting on a wooden kitchen table in daylight.", + Paraphrase: "A vivid red apple resting on a wooden kitchen table in daylight.", + Unrelated: "A futuristic silver spaceship orbiting Saturn against a starry void.", + }, +} + +// pairByName looks up a paraphrase pair by name. Fatal if not defined — the +// suite should fail loudly if a case references a pair that was removed. +func pairByName(t *testing.T, name string) paraphrasePair { + t.Helper() + for _, p := range paraphrasePairs { + if p.Name == name { + return p + } + } + t.Fatalf("paraphrase pair %q not defined in paraphrasePairs", name) + return paraphrasePair{} +} + +// imagePairByName looks up an image prompt pair by name. Fatal if not defined. +func imagePairByName(t *testing.T, name string) paraphrasePair { + t.Helper() + for _, p := range imagePromptPairs { + if p.Name == name { + return p + } + } + t.Fatalf("image prompt pair %q not defined in imagePromptPairs", name) + return paraphrasePair{} +} diff --git a/tests/semanticcache/go.mod b/tests/semanticcache/go.mod new file mode 100644 index 0000000000..5417c6abe8 --- /dev/null +++ b/tests/semanticcache/go.mod @@ -0,0 +1,3 @@ +module github.com/maximhq/bifrost/tests/semanticcache + +go 1.26.2 diff --git a/tests/semanticcache/http_test.go b/tests/semanticcache/http_test.go new file mode 100644 index 0000000000..c3497870d1 --- /dev/null +++ b/tests/semanticcache/http_test.go @@ -0,0 +1,674 @@ +package semanticcache + +import ( + "bufio" + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "strings" + "testing" + "time" +) + +type runConfig struct { + BifrostURL string + OpenAIModel string + OpenAIModelAlt string // different model, same provider — for cache_by_model cases + OpenAIEmbed string + GeminiModel string + AnthroModel string + Namespace string + HTTPClient *http.Client +} + +var cfg runConfig + +func loadConfig() { + cfg.BifrostURL = strings.TrimRight(getenv("BIFROST_URL", "http://localhost:8080"), "/") + cfg.OpenAIModel = getenv("SC_CHAT_MODEL_OPENAI", "openai/gpt-4o-mini") + cfg.OpenAIModelAlt = getenv("SC_CHAT_MODEL_OPENAI_ALT", "openai/gpt-4o") + cfg.OpenAIEmbed = getenv("SC_EMBED_MODEL_OPENAI", "text-embedding-3-small") + cfg.GeminiModel = getenv("SC_CHAT_MODEL_GEMINI", "gemini/gemini-2.5-flash") + cfg.AnthroModel = getenv("SC_CHAT_MODEL_ANTHROPIC", "anthropic/claude-haiku-4-5") + cfg.Namespace = getenv("SC_NAMESPACE", "BifrostSemanticCachePluginE2E") + cfg.HTTPClient = &http.Client{Timeout: 120 * time.Second} +} + +func getenv(k, fallback string) string { + if v, ok := os.LookupEnv(k); ok && v != "" { + return v + } + return fallback +} + +// cacheDebug mirrors schemas.BifrostCacheDebug as it arrives over the wire. +type cacheDebug struct { + CacheHit bool `json:"cache_hit"` + CacheID *string `json:"cache_id,omitempty"` + HitType *string `json:"hit_type,omitempty"` + RequestedProvider *string `json:"requested_provider,omitempty"` + RequestedModel *string `json:"requested_model,omitempty"` + ProviderUsed *string `json:"provider_used,omitempty"` + ModelUsed *string `json:"model_used,omitempty"` + InputTokens *int `json:"input_tokens,omitempty"` + Threshold *float64 `json:"threshold,omitempty"` + Similarity *float64 `json:"similarity,omitempty"` + CacheHitLatency *int64 `json:"cache_hit_latency,omitempty"` +} + +// extraFields subset — only what we read in assertions. +type extraFields struct { + RequestType string `json:"request_type,omitempty"` + Provider string `json:"provider,omitempty"` + CacheDebug *cacheDebug `json:"cache_debug,omitempty"` +} + +type chatChoice struct { + Index int `json:"index"` + Message json.RawMessage `json:"message"` + FinishReason *string `json:"finish_reason,omitempty"` +} + +type chatResponse struct { + ID string `json:"id"` + Object string `json:"object,omitempty"` + Model string `json:"model,omitempty"` + Choices []chatChoice `json:"choices"` + ExtraFields *extraFields `json:"extra_fields,omitempty"` + // Captured at HTTP layer, not part of body. + bodyRaw []byte + respHeader http.Header + statusCode int +} + +func (c *chatResponse) cacheDebug() *cacheDebug { + if c.ExtraFields == nil { + return nil + } + return c.ExtraFields.CacheDebug +} + +// chatRequest is the minimum we need on the wire — OpenAI-compatible. Optional +// pointer fields keep "unset" distinguishable from "zero" for cache_key +// composition tests (e.g. seed=0 differs from seed unset). +type chatRequest struct { + Model string `json:"model"` + Messages []chatMessage `json:"messages"` + Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` + MaxTokens *int `json:"max_tokens,omitempty"` + Seed *int `json:"seed,omitempty"` + Stream *bool `json:"stream,omitempty"` + Tools []chatTool `json:"tools,omitempty"` + PromptCacheKey *string `json:"prompt_cache_key,omitempty"` + ServiceTier *string `json:"service_tier,omitempty"` + Store *bool `json:"store,omitempty"` + LogProbs *bool `json:"logprobs,omitempty"` + TopLogProbs *int `json:"top_logprobs,omitempty"` +} + +// chatMessage uses RawContent so it can carry either a plain string or a +// content-block array (image_url, text, etc.). Helpers below build both shapes. +type chatMessage struct { + Role string `json:"role"` + Content json.RawMessage `json:"content,omitempty"` + ToolCalls []chatToolCall `json:"tool_calls,omitempty"` + ToolCallID string `json:"tool_call_id,omitempty"` +} + +type chatToolCall struct { + ID string `json:"id"` + Type string `json:"type"` + Function chatToolCallFunc `json:"function"` +} + +type chatToolCallFunc struct { + Name string `json:"name"` + Arguments string `json:"arguments"` +} + +// textContent returns a JSON-encoded plain-string content payload. +func textContent(s string) json.RawMessage { + b, _ := json.Marshal(s) + return json.RawMessage(b) +} + +// blocksContent returns a JSON-encoded content-block array (used for image_url +// inputs and other multi-modal messages). +func blocksContent(blocks []map[string]any) json.RawMessage { + b, _ := json.Marshal(blocks) + return json.RawMessage(b) +} + +type chatTool struct { + Type string `json:"type"` // "function" + Function *toolFunction `json:"function,omitempty"` // required when type=function +} + +type toolFunction struct { + Name string `json:"name"` + Description string `json:"description,omitempty"` + Parameters map[string]any `json:"parameters,omitempty"` +} + +type cacheHeaders struct { + Key string // x-bf-cache-key + TTL string // x-bf-cache-ttl + Threshold *float64 + Type string // x-bf-cache-type + NoStore string // x-bf-cache-no-store +} + +func (h cacheHeaders) apply(req *http.Request) { + if h.Key != "" { + req.Header.Set("x-bf-cache-key", h.Key) + } + if h.TTL != "" { + req.Header.Set("x-bf-cache-ttl", h.TTL) + } + if h.Threshold != nil { + req.Header.Set("x-bf-cache-threshold", fmt.Sprintf("%v", *h.Threshold)) + } + if h.Type != "" { + req.Header.Set("x-bf-cache-type", h.Type) + } + if h.NoStore != "" { + req.Header.Set("x-bf-cache-no-store", h.NoStore) + } +} + +// doJSON sends a JSON request and returns status, body, headers. +func doJSON(t *testing.T, method, path string, body any, extra http.Header) (int, []byte, http.Header, error) { + t.Helper() + var rdr io.Reader + if body != nil { + buf, err := json.Marshal(body) + if err != nil { + return 0, nil, nil, fmt.Errorf("marshal: %w", err) + } + rdr = bytes.NewReader(buf) + } + url := cfg.BifrostURL + path + req, err := http.NewRequest(method, url, rdr) + if err != nil { + return 0, nil, nil, fmt.Errorf("new request: %w", err) + } + if body != nil { + req.Header.Set("Content-Type", "application/json") + } + for k, vv := range extra { + for _, v := range vv { + req.Header.Add(k, v) + } + } + resp, err := cfg.HTTPClient.Do(req) + if err != nil { + return 0, nil, nil, fmt.Errorf("http do: %w", err) + } + defer resp.Body.Close() + respBytes, err := io.ReadAll(resp.Body) + if err != nil { + return resp.StatusCode, nil, resp.Header, fmt.Errorf("read body: %w", err) + } + return resp.StatusCode, respBytes, resp.Header, nil +} + +// postChat sends a chat completion and parses the response. +func postChat(t *testing.T, lc logCtx, step int, req chatRequest, ch cacheHeaders) *chatResponse { + t.Helper() + logf(t, lc.at(step), "INFO", "request", map[string]any{ + "method": "POST", + "path": "/v1/chat/completions", + "model": req.Model, + "cache_key": ch.Key, + "ttl": ch.TTL, + "type": ch.Type, + "no_store": ch.NoStore, + }) + + // Dump request body for forensics. + if rb, err := json.MarshalIndent(req, "", " "); err == nil { + dumpJSON(t, fmt.Sprintf("p%s-%s-s%d.req.json", lc.phase, lc.name, step), rb) + } + + hdr := http.Header{} + ch.apply(&http.Request{Header: hdr}) + + status, body, respHdr, err := doJSON(t, "POST", "/v1/chat/completions", req, hdr) + if err != nil { + t.Fatalf("postChat http error: %v", err) + } + dumpJSON(t, fmt.Sprintf("p%s-%s-s%d.resp.json", lc.phase, lc.name, step), body) + + out := &chatResponse{bodyRaw: body, respHeader: respHdr, statusCode: status} + if status != http.StatusOK { + logf(t, lc.at(step), "ERROR", "response", map[string]any{ + "status": status, + "body_len": len(body), + }) + t.Fatalf("chat completion failed: status=%d body=%s", status, truncate(string(body), 500)) + } + if err := json.Unmarshal(body, out); err != nil { + t.Fatalf("decode chat response: %v\nbody=%s", err, truncate(string(body), 500)) + } + cd := out.cacheDebug() + fields := map[string]any{"status": status} + if cd != nil { + fields["cache_hit"] = cd.CacheHit + if cd.CacheID != nil { + fields["cache_id"] = *cd.CacheID + } + if cd.HitType != nil { + fields["hit_type"] = *cd.HitType + } + if cd.CacheHitLatency != nil { + fields["cache_hit_latency"] = *cd.CacheHitLatency + } + } else { + fields["cache_debug"] = "" + } + logf(t, lc.at(step), "INFO", "response", fields) + return out +} + +func truncate(s string, n int) string { + if len(s) <= n { + return s + } + return s[:n] + "…" +} + +// ----------------------------------------------------------------------------- +// Text completion (/v1/completions) +// ----------------------------------------------------------------------------- + +type textCompletionRequest struct { + Model string `json:"model"` + Prompt string `json:"prompt"` + MaxTokens *int `json:"max_tokens,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` +} + +type textCompletionResponse struct { + ExtraFields *extraFields `json:"extra_fields,omitempty"` + bodyRaw []byte + statusCode int +} + +func (r *textCompletionResponse) cacheDebug() *cacheDebug { + if r.ExtraFields == nil { + return nil + } + return r.ExtraFields.CacheDebug +} + +func postTextCompletion(t *testing.T, lc logCtx, step int, req textCompletionRequest, ch cacheHeaders) *textCompletionResponse { + t.Helper() + logf(t, lc.at(step), "INFO", "request", map[string]any{ + "method": "POST", "path": "/v1/completions", "model": req.Model, "cache_key": ch.Key, + }) + if rb, err := json.MarshalIndent(req, "", " "); err == nil { + dumpJSON(t, fmt.Sprintf("p%s-%s-s%d.req.json", lc.phase, lc.name, step), rb) + } + hdr := http.Header{} + ch.apply(&http.Request{Header: hdr}) + status, body, _, err := doJSON(t, "POST", "/v1/completions", req, hdr) + if err != nil { + t.Fatalf("postTextCompletion http error: %v", err) + } + dumpJSON(t, fmt.Sprintf("p%s-%s-s%d.resp.json", lc.phase, lc.name, step), body) + if status != http.StatusOK { + t.Fatalf("text completion failed: status=%d body=%s", status, truncate(string(body), 500)) + } + out := &textCompletionResponse{bodyRaw: body, statusCode: status} + if err := json.Unmarshal(body, out); err != nil { + t.Fatalf("decode text completion response: %v\nbody=%s", err, truncate(string(body), 500)) + } + logCacheDebugFields(t, lc.at(step), out.cacheDebug()) + return out +} + +// ----------------------------------------------------------------------------- +// Embeddings (/v1/embeddings) +// ----------------------------------------------------------------------------- + +type embeddingRequest struct { + Model string `json:"model"` + Input string `json:"input"` +} + +type embeddingResponse struct { + ExtraFields *extraFields `json:"extra_fields,omitempty"` + bodyRaw []byte + statusCode int +} + +func (r *embeddingResponse) cacheDebug() *cacheDebug { + if r.ExtraFields == nil { + return nil + } + return r.ExtraFields.CacheDebug +} + +func postEmbedding(t *testing.T, lc logCtx, step int, req embeddingRequest, ch cacheHeaders) *embeddingResponse { + t.Helper() + logf(t, lc.at(step), "INFO", "request", map[string]any{ + "method": "POST", "path": "/v1/embeddings", "model": req.Model, "cache_key": ch.Key, + }) + if rb, err := json.MarshalIndent(req, "", " "); err == nil { + dumpJSON(t, fmt.Sprintf("p%s-%s-s%d.req.json", lc.phase, lc.name, step), rb) + } + hdr := http.Header{} + ch.apply(&http.Request{Header: hdr}) + status, body, _, err := doJSON(t, "POST", "/v1/embeddings", req, hdr) + if err != nil { + t.Fatalf("postEmbedding http error: %v", err) + } + dumpJSON(t, fmt.Sprintf("p%s-%s-s%d.resp.json", lc.phase, lc.name, step), body) + if status != http.StatusOK { + t.Fatalf("embedding failed: status=%d body=%s", status, truncate(string(body), 500)) + } + out := &embeddingResponse{bodyRaw: body, statusCode: status} + if err := json.Unmarshal(body, out); err != nil { + t.Fatalf("decode embedding response: %v\nbody=%s", err, truncate(string(body), 500)) + } + logCacheDebugFields(t, lc.at(step), out.cacheDebug()) + return out +} + +// ----------------------------------------------------------------------------- +// Image generation (/v1/images/generations) +// ----------------------------------------------------------------------------- + +type imageGenRequest struct { + Model string `json:"model"` + Prompt string `json:"prompt"` + N *int `json:"n,omitempty"` + Size string `json:"size,omitempty"` +} + +type imageGenResponse struct { + ExtraFields *extraFields `json:"extra_fields,omitempty"` + bodyRaw []byte + statusCode int +} + +func (r *imageGenResponse) cacheDebug() *cacheDebug { + if r.ExtraFields == nil { + return nil + } + return r.ExtraFields.CacheDebug +} + +func postImageGen(t *testing.T, lc logCtx, step int, req imageGenRequest, ch cacheHeaders) *imageGenResponse { + t.Helper() + logf(t, lc.at(step), "INFO", "request", map[string]any{ + "method": "POST", "path": "/v1/images/generations", "model": req.Model, "cache_key": ch.Key, + }) + if rb, err := json.MarshalIndent(req, "", " "); err == nil { + dumpJSON(t, fmt.Sprintf("p%s-%s-s%d.req.json", lc.phase, lc.name, step), rb) + } + hdr := http.Header{} + ch.apply(&http.Request{Header: hdr}) + status, body, _, err := doJSON(t, "POST", "/v1/images/generations", req, hdr) + if err != nil { + t.Fatalf("postImageGen http error: %v", err) + } + dumpJSON(t, fmt.Sprintf("p%s-%s-s%d.resp.json", lc.phase, lc.name, step), body) + if status != http.StatusOK { + t.Fatalf("image gen failed: status=%d body=%s", status, truncate(string(body), 500)) + } + out := &imageGenResponse{bodyRaw: body, statusCode: status} + if err := json.Unmarshal(body, out); err != nil { + t.Fatalf("decode image gen response: %v\nbody=%s", err, truncate(string(body), 500)) + } + logCacheDebugFields(t, lc.at(step), out.cacheDebug()) + return out +} + +// ----------------------------------------------------------------------------- +// Responses API (/v1/responses) — OpenAI's newer interface +// ----------------------------------------------------------------------------- + +type responsesRequest struct { + Model string `json:"model"` + Input string `json:"input"` + Instructions *string `json:"instructions,omitempty"` + PreviousResponseID *string `json:"previous_response_id,omitempty"` +} + +type responsesResponse struct { + ExtraFields *extraFields `json:"extra_fields,omitempty"` + bodyRaw []byte + statusCode int +} + +func (r *responsesResponse) cacheDebug() *cacheDebug { + if r.ExtraFields == nil { + return nil + } + return r.ExtraFields.CacheDebug +} + +func postResponses(t *testing.T, lc logCtx, step int, req responsesRequest, ch cacheHeaders) *responsesResponse { + t.Helper() + logf(t, lc.at(step), "INFO", "request", map[string]any{ + "method": "POST", "path": "/v1/responses", "model": req.Model, "cache_key": ch.Key, + }) + if rb, err := json.MarshalIndent(req, "", " "); err == nil { + dumpJSON(t, fmt.Sprintf("p%s-%s-s%d.req.json", lc.phase, lc.name, step), rb) + } + hdr := http.Header{} + ch.apply(&http.Request{Header: hdr}) + status, body, _, err := doJSON(t, "POST", "/v1/responses", req, hdr) + if err != nil { + t.Fatalf("postResponses http error: %v", err) + } + dumpJSON(t, fmt.Sprintf("p%s-%s-s%d.resp.json", lc.phase, lc.name, step), body) + if status != http.StatusOK { + t.Fatalf("responses API failed: status=%d body=%s", status, truncate(string(body), 500)) + } + out := &responsesResponse{bodyRaw: body, statusCode: status} + if err := json.Unmarshal(body, out); err != nil { + t.Fatalf("decode responses API response: %v\nbody=%s", err, truncate(string(body), 500)) + } + logCacheDebugFields(t, lc.at(step), out.cacheDebug()) + return out +} + +// ----------------------------------------------------------------------------- +// Streaming chat (/v1/chat/completions with stream:true) — SSE +// ----------------------------------------------------------------------------- + +// streamChunk is one decoded SSE data event from a chat completion stream. +type streamChunk struct { + Index int + Raw []byte + Parsed map[string]any + ExtraFields *extraFields + Done bool // true for the terminal [DONE] sentinel +} + +func (c *streamChunk) cacheDebug() *cacheDebug { + if c.ExtraFields == nil { + return nil + } + return c.ExtraFields.CacheDebug +} + +// chunkText extracts choices[0].delta.content (or .message.content) as a +// string. Used to compare chunk order/content across A and B in case 1.25. +func (c *streamChunk) chunkText() string { + if c.Parsed == nil { + return "" + } + choices, _ := c.Parsed["choices"].([]any) + if len(choices) == 0 { + return "" + } + c0, _ := choices[0].(map[string]any) + if c0 == nil { + return "" + } + if delta, ok := c0["delta"].(map[string]any); ok { + if s, ok := delta["content"].(string); ok { + return s + } + } + if msg, ok := c0["message"].(map[string]any); ok { + if s, ok := msg["content"].(string); ok { + return s + } + } + return "" +} + +// streamResponse aggregates every chunk received from one streamed chat +// completion. cacheDebug() returns the stamp from the final chunk — that's +// the only chunk the plugin tags (search.go:628 guard). +type streamResponse struct { + Chunks []streamChunk + statusCode int + headers http.Header +} + +func (s *streamResponse) cacheDebug() *cacheDebug { + for i := len(s.Chunks) - 1; i >= 0; i-- { + if cd := s.Chunks[i].cacheDebug(); cd != nil { + return cd + } + } + return nil +} + +// dataChunks returns the chunks excluding the terminal [DONE] sentinel. +func (s *streamResponse) dataChunks() []streamChunk { + out := make([]streamChunk, 0, len(s.Chunks)) + for _, c := range s.Chunks { + if !c.Done { + out = append(out, c) + } + } + return out +} + +func postChatStream(t *testing.T, lc logCtx, step int, req chatRequest, ch cacheHeaders) *streamResponse { + t.Helper() + streamFlag := true + req.Stream = &streamFlag + + logf(t, lc.at(step), "INFO", "request", map[string]any{ + "method": "POST", "path": "/v1/chat/completions", "model": req.Model, + "cache_key": ch.Key, "stream": true, + }) + if rb, err := json.MarshalIndent(req, "", " "); err == nil { + dumpJSON(t, fmt.Sprintf("p%s-%s-s%d.req.json", lc.phase, lc.name, step), rb) + } + + bodyBytes, err := json.Marshal(req) + if err != nil { + t.Fatalf("marshal stream req: %v", err) + } + httpReq, err := http.NewRequest("POST", cfg.BifrostURL+"/v1/chat/completions", bytes.NewReader(bodyBytes)) + if err != nil { + t.Fatalf("new stream req: %v", err) + } + httpReq.Header.Set("Content-Type", "application/json") + httpReq.Header.Set("Accept", "text/event-stream") + ch.apply(httpReq) + + resp, err := cfg.HTTPClient.Do(httpReq) + if err != nil { + t.Fatalf("stream do: %v", err) + } + defer resp.Body.Close() + + out := &streamResponse{statusCode: resp.StatusCode, headers: resp.Header} + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + t.Fatalf("stream request failed: status=%d body=%s", resp.StatusCode, truncate(string(body), 500)) + } + + scanner := bufio.NewScanner(resp.Body) + scanner.Buffer(make([]byte, 64*1024), 4*1024*1024) + rawDump := &bytes.Buffer{} + idx := 0 + for scanner.Scan() { + line := scanner.Bytes() + rawDump.Write(line) + rawDump.WriteByte('\n') + if !bytes.HasPrefix(line, []byte("data: ")) { + continue + } + payload := bytes.TrimPrefix(line, []byte("data: ")) + payload = bytes.TrimSpace(payload) + if len(payload) == 0 { + continue + } + if bytes.Equal(payload, []byte("[DONE]")) { + out.Chunks = append(out.Chunks, streamChunk{Index: idx, Done: true}) + idx++ + break + } + ck := streamChunk{Index: idx, Raw: append([]byte(nil), payload...)} + if err := json.Unmarshal(payload, &ck.Parsed); err != nil { + t.Logf("warning: chunk %d unparseable JSON: %v\nraw=%s", idx, err, truncate(string(payload), 200)) + } else { + var ef struct { + ExtraFields *extraFields `json:"extra_fields,omitempty"` + } + _ = json.Unmarshal(payload, &ef) + ck.ExtraFields = ef.ExtraFields + } + out.Chunks = append(out.Chunks, ck) + idx++ + } + if err := scanner.Err(); err != nil { + t.Fatalf("stream scanner: %v", err) + } + + dumpJSON(t, fmt.Sprintf("p%s-%s-s%d.resp.sse.txt", lc.phase, lc.name, step), rawDump.Bytes()) + + fields := map[string]any{ + "status": resp.StatusCode, + "chunk_count": len(out.dataChunks()), + } + if cd := out.cacheDebug(); cd != nil { + fields["cache_hit"] = cd.CacheHit + if cd.CacheID != nil { + fields["cache_id"] = *cd.CacheID + } + if cd.HitType != nil { + fields["hit_type"] = *cd.HitType + } + } else { + fields["cache_debug"] = "" + } + logf(t, lc.at(step), "INFO", "response", fields) + return out +} + +// logCacheDebugFields emits a single response-event log line with the standard +// cache_debug fields, used by every postXxx helper above. +func logCacheDebugFields(t *testing.T, lc logCtx, cd *cacheDebug) { + t.Helper() + fields := map[string]any{"status": 200} + if cd != nil { + fields["cache_hit"] = cd.CacheHit + if cd.CacheID != nil { + fields["cache_id"] = *cd.CacheID + } + if cd.HitType != nil { + fields["hit_type"] = *cd.HitType + } + if cd.CacheHitLatency != nil { + fields["cache_hit_latency"] = *cd.CacheHitLatency + } + } else { + fields["cache_debug"] = "" + } + logf(t, lc, "INFO", "response", fields) +} diff --git a/tests/semanticcache/log_test.go b/tests/semanticcache/log_test.go new file mode 100644 index 0000000000..767288d260 --- /dev/null +++ b/tests/semanticcache/log_test.go @@ -0,0 +1,95 @@ +package semanticcache + +import ( + "fmt" + "os" + "path/filepath" + "sort" + "strings" + "sync" + "testing" + "time" +) + +var ( + logMu sync.Mutex + runReportDir string + runLogFile *os.File + trailSID string +) + +func initLog() error { + base := filepath.Join("reports", time.Now().UTC().Format("20060102T150405Z")) + if err := os.MkdirAll(base, 0o755); err != nil { + return err + } + f, err := os.Create(filepath.Join(base, "run.log")) + if err != nil { + return err + } + runReportDir = base + runLogFile = f + trailSID = strings.TrimSpace(os.Getenv("TRAIL_SESSION_ID")) + return nil +} + +func closeLog() { + logMu.Lock() + defer logMu.Unlock() + if runLogFile != nil { + _ = runLogFile.Close() + runLogFile = nil + } +} + +type logCtx struct { + phase string + name string + step int +} + +func newLogCtx(phase, name string) logCtx { return logCtx{phase: phase, name: name} } + +func (lc logCtx) at(step int) logCtx { lc.step = step; return lc } + +func logf(t *testing.T, lc logCtx, lvl, event string, fields map[string]any) { + t.Helper() + var b strings.Builder + fmt.Fprintf(&b, "[SC-E2E] ts=%s lvl=%-5s phase=%s case=%s step=%d event=%s", + time.Now().UTC().Format("2006-01-02T15:04:05.000Z"), + lvl, lc.phase, lc.name, lc.step, event) + if trailSID != "" { + fmt.Fprintf(&b, " trail_sid=%s", trailSID) + } + keys := make([]string, 0, len(fields)) + for k := range fields { + keys = append(keys, k) + } + sort.Strings(keys) + for _, k := range keys { + fmt.Fprintf(&b, " %s=%v", k, fields[k]) + } + line := b.String() + t.Log(line) + logMu.Lock() + if runLogFile != nil { + fmt.Fprintln(runLogFile, line) + } + logMu.Unlock() +} + +func reportPath(parts ...string) string { + if runReportDir == "" { + return filepath.Join(parts...) + } + return filepath.Join(append([]string{runReportDir}, parts...)...) +} + +func dumpJSON(t *testing.T, name string, body []byte) string { + t.Helper() + p := reportPath(name) + if err := os.WriteFile(p, body, 0o644); err != nil { + t.Logf("warning: dump %s failed: %v", p, err) + } + return p +} diff --git a/tests/semanticcache/logs_crosscheck_test.go b/tests/semanticcache/logs_crosscheck_test.go new file mode 100644 index 0000000000..2781f3a450 --- /dev/null +++ b/tests/semanticcache/logs_crosscheck_test.go @@ -0,0 +1,188 @@ +package semanticcache + +import ( + "encoding/json" + "fmt" + "math" + "net/http" + "testing" + "time" +) + +// floatEpsilon is the tolerance for cache_debug float field comparison between +// the in-flight response stamp and the persisted log row. The two paths use +// different JSON encoders (encoding/json for the HTTP response, sonic for +// the log store at logstore/payload.go:509), and sonic's default precision +// produces small (~1e-5) differences for similarity/threshold values. Not +// semantic drift — just round-trip noise. 1e-4 is comfortably above the +// observed delta while still tight enough to catch any real divergence. +const floatEpsilon = 1e-4 + +// logEntry is the minimum slice of a Bifrost log row we need for cross-checking +// the persisted cache_debug against the in-flight response. The full Log row +// has dozens of fields — we only care about ID, Timestamp, and CacheDebug. +type logEntry struct { + ID string `json:"id"` + Timestamp string `json:"timestamp"` + CacheDebug *cacheDebug `json:"cache_debug,omitempty"` +} + +// findLogByCacheDebug polls /api/logs descending-by-timestamp looking for an +// entry whose cache_debug matches the response stamp's (cache_id, cache_hit) +// pair. Returns the matching log row or fatal-fails after the timeout. +// +// Why match BOTH fields: for a semantic hit, A's miss-and-store log row and +// B's hit-replay log row carry the SAME cache_id (B's stamped cache_id points +// to A's storage entry). Without the cache_hit discriminator the helper would +// return whichever row was persisted first (usually A's miss). +// +// Polling exists because Bifrost's logging pipeline is asynchronous — the HTTP +// response returns before the row is persisted. +func findLogByCacheDebug(t *testing.T, lc logCtx, step int, want *cacheDebug) *logEntry { + t.Helper() + if want == nil || want.CacheID == nil { + t.Fatalf("findLogByCacheDebug: response cache_debug or cache_id is nil") + } + wantID := *want.CacheID + deadline := time.Now().Add(5 * time.Second) + attempts := 0 + for time.Now().Before(deadline) { + attempts++ + status, body, _, err := doJSON(t, "GET", + "/api/logs?limit=50&sort_by=timestamp&order=desc", nil, nil) + if err != nil { + t.Fatalf("findLogByCacheDebug GET err: %v", err) + } + if status != http.StatusOK { + t.Fatalf("findLogByCacheDebug status=%d body=%s", status, truncate(string(body), 300)) + } + var resp struct { + Logs []logEntry `json:"logs"` + } + if err := json.Unmarshal(body, &resp); err != nil { + t.Fatalf("findLogByCacheDebug decode: %v\nbody=%s", err, truncate(string(body), 300)) + } + for i := range resp.Logs { + l := &resp.Logs[i] + if l.CacheDebug == nil || l.CacheDebug.CacheID == nil { + continue + } + if *l.CacheDebug.CacheID != wantID { + continue + } + if l.CacheDebug.CacheHit != want.CacheHit { + continue + } + logf(t, lc.at(step), "INFO", "log_found", map[string]any{ + "cache_id": wantID, "log_id": l.ID, "cache_hit": l.CacheDebug.CacheHit, "attempts": attempts, + }) + return l + } + time.Sleep(200 * time.Millisecond) + } + t.Fatalf("log entry with cache_id=%s cache_hit=%v not found after %d attempts", wantID, want.CacheHit, attempts) + return nil +} + +// assertLogMatchesResponseCacheDebug verifies every field of the persisted +// log's cache_debug matches the in-flight response's cache_debug. Catches +// drift between PostLLMHook stamping and the durable log write — same data +// path the UI Logs view reads, so this guards a real production contract. +func assertLogMatchesResponseCacheDebug(t *testing.T, lc logCtx, step int, respCD, logCD *cacheDebug) { + t.Helper() + if respCD == nil { + t.Fatalf("response cache_debug is nil; nothing to cross-check") + } + if logCD == nil { + t.Fatalf("log row has no cache_debug; expected matching stamp") + } + mismatches := []string{} + if logCD.CacheHit != respCD.CacheHit { + mismatches = append(mismatches, fmt.Sprintf("cache_hit: resp=%v log=%v", respCD.CacheHit, logCD.CacheHit)) + } + if deref(logCD.CacheID) != deref(respCD.CacheID) { + mismatches = append(mismatches, fmt.Sprintf("cache_id: resp=%q log=%q", deref(respCD.CacheID), deref(logCD.CacheID))) + } + if deref(logCD.HitType) != deref(respCD.HitType) { + mismatches = append(mismatches, fmt.Sprintf("hit_type: resp=%q log=%q", deref(respCD.HitType), deref(logCD.HitType))) + } + if deref(logCD.RequestedProvider) != deref(respCD.RequestedProvider) { + mismatches = append(mismatches, fmt.Sprintf("requested_provider: resp=%q log=%q", deref(respCD.RequestedProvider), deref(logCD.RequestedProvider))) + } + if deref(logCD.RequestedModel) != deref(respCD.RequestedModel) { + mismatches = append(mismatches, fmt.Sprintf("requested_model: resp=%q log=%q", deref(respCD.RequestedModel), deref(logCD.RequestedModel))) + } + if deref(logCD.ProviderUsed) != deref(respCD.ProviderUsed) { + mismatches = append(mismatches, fmt.Sprintf("provider_used: resp=%q log=%q", deref(respCD.ProviderUsed), deref(logCD.ProviderUsed))) + } + if deref(logCD.ModelUsed) != deref(respCD.ModelUsed) { + mismatches = append(mismatches, fmt.Sprintf("model_used: resp=%q log=%q", deref(respCD.ModelUsed), deref(logCD.ModelUsed))) + } + // Numeric float fields aren't expected to differ but float64 round-trip + // through sonic JSON is exact for these magnitudes; equality check is fine. + if !floatPtrEq(logCD.Threshold, respCD.Threshold) { + mismatches = append(mismatches, fmt.Sprintf("threshold: resp=%v log=%v", respPtrStr(respCD.Threshold), respPtrStr(logCD.Threshold))) + } + if !floatPtrEq(logCD.Similarity, respCD.Similarity) { + mismatches = append(mismatches, fmt.Sprintf("similarity: resp=%v log=%v", respPtrStr(respCD.Similarity), respPtrStr(logCD.Similarity))) + } + if !intPtrEq(logCD.InputTokens, respCD.InputTokens) { + mismatches = append(mismatches, fmt.Sprintf("input_tokens: resp=%v log=%v", intPtrStr(respCD.InputTokens), intPtrStr(logCD.InputTokens))) + } + // cache_hit_latency is not cross-checked: the log row may be persisted + // after the response was sent, and the field can be the same OR slightly + // different depending on where in PostLLMHook the stamp lands. + + if len(mismatches) > 0 { + t.Fatalf("cache_debug response/log mismatch:\n - %s", joinLines(mismatches)) + } + logf(t, lc.at(step), "PASS", "log_matches_response_cache_debug", map[string]any{ + "cache_id": deref(respCD.CacheID), + "hit_type": deref(respCD.HitType), + "fields_compared": []string{ + "cache_hit", "cache_id", "hit_type", + "requested_provider", "requested_model", + "provider_used", "model_used", "input_tokens", + "threshold", "similarity", + }, + }) +} + +func floatPtrEq(a, b *float64) bool { + if a == nil || b == nil { + return a == b + } + return math.Abs(*a-*b) < floatEpsilon +} + +func intPtrEq(a, b *int) bool { + if a == nil || b == nil { + return a == b + } + return *a == *b +} + +func respPtrStr(p *float64) string { + if p == nil { + return "" + } + return fmt.Sprintf("%.6f", *p) +} + +func intPtrStr(p *int) string { + if p == nil { + return "" + } + return fmt.Sprintf("%d", *p) +} + +func joinLines(s []string) string { + out := "" + for i, v := range s { + if i > 0 { + out += "\n - " + } + out += v + } + return out +} diff --git a/tests/semanticcache/plugin_test.go b/tests/semanticcache/plugin_test.go new file mode 100644 index 0000000000..98a12526a3 --- /dev/null +++ b/tests/semanticcache/plugin_test.go @@ -0,0 +1,212 @@ +package semanticcache + +import ( + "encoding/json" + "fmt" + "net/http" + "testing" +) + +const pluginName = "semantic_cache" + +// createPluginRequest mirrors handlers.CreatePluginRequest + ui/lib/types/plugins.ts. +// path is always sent (UI sends "" for built-ins; backend normalizes empty → nil). +type createPluginRequest struct { + Name string `json:"name"` + Path string `json:"path"` + Enabled bool `json:"enabled"` + Config map[string]any `json:"config"` + Placement *string `json:"placement,omitempty"` + Order *int `json:"order,omitempty"` +} + +// updatePluginRequest mirrors handlers.UpdatePluginRequest. The UI ALWAYS re-sends +// the current config alongside enabled, never PUTs `{enabled:false}` alone — +// that would wipe the DB config row (handlers/plugins.go:399). +type updatePluginRequest struct { + Enabled bool `json:"enabled"` + Path *string `json:"path,omitempty"` + Config map[string]any `json:"config,omitempty"` + Placement *string `json:"placement,omitempty"` + Order *int `json:"order,omitempty"` +} + +type pluginStatus struct { + Name string `json:"name"` + Status string `json:"status"` + Logs []string `json:"logs"` +} + +type pluginResponse struct { + Name string `json:"name"` + ActualName string `json:"actualName"` + Enabled bool `json:"enabled"` + Config map[string]any `json:"config"` + IsCustom bool `json:"isCustom"` + Path *string `json:"path,omitempty"` + Placement *string `json:"placement,omitempty"` + Order *int `json:"order,omitempty"` + Status pluginStatus `json:"status"` +} + +type pluginEnvelope struct { + Message string `json:"message"` + Plugin pluginResponse `json:"plugin"` +} + +// directOnlyConfig returns the plugin config blob for direct-only mode. +// Mirrors what cachingView.tsx buildPayload produces for mode="direct". +func directOnlyConfig(ttl string, defaultKey string) map[string]any { + c := map[string]any{ + "dimension": 1, + "ttl": ttl, + "threshold": 0.8, + "conversation_history_threshold": 3, + "exclude_system_prompt": false, + "cache_by_model": true, + "cache_by_provider": true, + "vector_store_namespace": cfg.Namespace, + } + if defaultKey != "" { + c["default_cache_key"] = defaultKey + } + return c +} + +// semanticConfig returns the plugin config blob for semantic mode. +func semanticConfig(provider, embedModel string, dimension int, ttl string, threshold float64, defaultKey string) map[string]any { + c := map[string]any{ + "provider": provider, + "embedding_model": embedModel, + "dimension": dimension, + "ttl": ttl, + "threshold": threshold, + "conversation_history_threshold": 3, + "exclude_system_prompt": false, + "cache_by_model": true, + "cache_by_provider": true, + "vector_store_namespace": cfg.Namespace, + } + if defaultKey != "" { + c["default_cache_key"] = defaultKey + } + return c +} + +// pluginGet fetches the plugin row; returns (resp, true) if found, (nil, false) on 404. +func pluginGet(t *testing.T, lc logCtx, step int) (*pluginResponse, bool) { + t.Helper() + status, body, _, err := doJSON(t, "GET", "/api/plugins/"+pluginName, nil, nil) + if err != nil { + t.Fatalf("pluginGet http error: %v", err) + } + if status == http.StatusNotFound { + logf(t, lc.at(step), "INFO", "plugin_get", map[string]any{"status": status, "exists": false}) + return nil, false + } + if status != http.StatusOK { + t.Fatalf("pluginGet unexpected status=%d body=%s", status, truncate(string(body), 300)) + } + var p pluginResponse + if err := json.Unmarshal(body, &p); err != nil { + t.Fatalf("pluginGet decode: %v\nbody=%s", err, truncate(string(body), 300)) + } + logf(t, lc.at(step), "INFO", "plugin_get", map[string]any{ + "status": status, + "exists": true, + "enabled": p.Enabled, + "plugin_status": p.Status.Status, + }) + return &p, true +} + +// pluginCreate matches the UI flow: POST /api/plugins with path:"" for built-ins. +func pluginCreate(t *testing.T, lc logCtx, step int, enabled bool, config map[string]any) *pluginResponse { + t.Helper() + req := createPluginRequest{ + Name: pluginName, + Path: "", // UI always sends "" for built-ins (cachingView.tsx:225) + Enabled: enabled, + Config: config, + } + if reqJSON, _ := json.MarshalIndent(req, "", " "); reqJSON != nil { + dumpJSON(t, fmt.Sprintf("p%s-%s-s%d.plugin_create.req.json", lc.phase, lc.name, step), reqJSON) + } + logf(t, lc.at(step), "INFO", "plugin_create", map[string]any{ + "enabled": enabled, + "mode": modeFromConfig(config), + "namespace": fmt.Sprintf("%v", config["vector_store_namespace"]), + }) + status, body, _, err := doJSON(t, "POST", "/api/plugins", req, nil) + if err != nil { + t.Fatalf("pluginCreate http error: %v", err) + } + dumpJSON(t, fmt.Sprintf("p%s-%s-s%d.plugin_create.resp.json", lc.phase, lc.name, step), body) + if status != http.StatusCreated { + t.Fatalf("pluginCreate status=%d body=%s", status, truncate(string(body), 500)) + } + var env pluginEnvelope + if err := json.Unmarshal(body, &env); err != nil { + t.Fatalf("pluginCreate decode: %v\nbody=%s", err, truncate(string(body), 500)) + } + logf(t, lc.at(step), "PASS", "plugin_created", map[string]any{ + "status": env.Plugin.Status.Status, + "enabled": env.Plugin.Enabled, + }) + return &env.Plugin +} + +// pluginUpdate matches the UI flow: PUT with {enabled, config} — always re-send +// config when toggling enabled, never PUT bare {enabled:false} (would wipe DB row). +func pluginUpdate(t *testing.T, lc logCtx, step int, enabled bool, config map[string]any) *pluginResponse { + t.Helper() + req := updatePluginRequest{ + Enabled: enabled, + Config: config, + } + if reqJSON, _ := json.MarshalIndent(req, "", " "); reqJSON != nil { + dumpJSON(t, fmt.Sprintf("p%s-%s-s%d.plugin_update.req.json", lc.phase, lc.name, step), reqJSON) + } + logf(t, lc.at(step), "INFO", "plugin_update", map[string]any{ + "enabled": enabled, + "mode": modeFromConfig(config), + }) + status, body, _, err := doJSON(t, "PUT", "/api/plugins/"+pluginName, req, nil) + if err != nil { + t.Fatalf("pluginUpdate http error: %v", err) + } + dumpJSON(t, fmt.Sprintf("p%s-%s-s%d.plugin_update.resp.json", lc.phase, lc.name, step), body) + if status != http.StatusOK { + t.Fatalf("pluginUpdate status=%d body=%s", status, truncate(string(body), 500)) + } + var env pluginEnvelope + if err := json.Unmarshal(body, &env); err != nil { + t.Fatalf("pluginUpdate decode: %v\nbody=%s", err, truncate(string(body), 500)) + } + logf(t, lc.at(step), "PASS", "plugin_updated", map[string]any{ + "status": env.Plugin.Status.Status, + "enabled": env.Plugin.Enabled, + }) + return &env.Plugin +} + +// pluginDelete removes the plugin row + in-memory instance. +func pluginDelete(t *testing.T, lc logCtx, step int) { + t.Helper() + status, body, _, err := doJSON(t, "DELETE", "/api/plugins/"+pluginName, nil, nil) + if err != nil { + t.Fatalf("pluginDelete http error: %v", err) + } + if status != http.StatusOK && status != http.StatusNotFound { + t.Fatalf("pluginDelete status=%d body=%s", status, truncate(string(body), 300)) + } + logf(t, lc.at(step), "INFO", "plugin_deleted", map[string]any{"status": status}) +} + +// modeFromConfig describes a config blob in one word for log fields. +func modeFromConfig(c map[string]any) string { + if p, _ := c["provider"].(string); p != "" { + return "semantic" + } + return "direct-only" +} diff --git a/tests/semanticcache/preconditions_test.go b/tests/semanticcache/preconditions_test.go new file mode 100644 index 0000000000..6dbd94576d --- /dev/null +++ b/tests/semanticcache/preconditions_test.go @@ -0,0 +1,72 @@ +package semanticcache + +import ( + "net/http" + "strings" + "testing" +) + +// TestPreconditions verifies the test env is ready (Bifrost reachable, +// providers configured, plugin absent at run start). Pure checks, no state +// changes. Trusts env for vector-store config (per plan §13.4). +func TestPreconditions(t *testing.T) { + lc := newLogCtx("preconditions", "preconditions") + logf(t, lc.at(0), "SETUP", "phase_start", map[string]any{"bifrost_url": cfg.BifrostURL}) + + t.Run("0.1_bifrost_reachable", func(t *testing.T) { + lc := lc + lc.name = "0.1_bifrost_reachable" + status, body, _, err := doJSON(t, "GET", "/api/plugins", nil, nil) + if err != nil || status != http.StatusOK { + logf(t, lc.at(1), "FAIL", "bifrost_unreachable", map[string]any{ + "status": status, "err": err, + }) + t.Fatalf("GET /api/plugins failed: status=%d err=%v body=%s", + status, err, truncate(string(body), 200)) + } + logf(t, lc.at(1), "PASS", "bifrost_reachable", map[string]any{"status": status}) + }) + + t.Run("0.2_openai_configured", func(t *testing.T) { + lc := lc + lc.name = "0.2_openai_configured" + ps := providersList(t, lc, 1) + if !hasProvider(ps, "openai") { + logf(t, lc.at(2), "FAIL", "openai_missing", nil) + t.Fatalf("openai provider not configured (got %d providers)", len(ps)) + } + logf(t, lc.at(2), "PASS", "openai_present", nil) + }) + + t.Run("0.3_optional_providers", func(t *testing.T) { + lc := lc + lc.name = "0.3_optional_providers" + ps := providersList(t, lc, 1) + for _, want := range []string{"gemini", "anthropic"} { + if hasProvider(ps, want) { + logf(t, lc.at(2), "PASS", "provider_present", map[string]any{"provider": want}) + } else { + logf(t, lc.at(2), "WARN", "provider_absent", map[string]any{ + "provider": want, + "effect": "cross-provider cases will skip", + }) + } + } + }) + + // The plugin-absent precondition is enforced in TestMain (with RUN_FORCE=1 + // auto-deleting a pre-existing row). We don't re-check here because tests + // run in alphabetical file order — TestDirect / TestSemantic / TestLifecycle + // create their own plugin and may leave it loaded for the next test. + + logf(t, lc.at(99), "TEARDOWN", "phase_end", nil) +} + +func hasProvider(ps []providerSummary, name string) bool { + for _, p := range ps { + if strings.EqualFold(p.Name, name) { + return true + } + } + return false +} From b71899dabe0c8122a99788059243c6c1e26f293d Mon Sep 17 00:00:00 2001 From: Pratham Mishra <99235987+Pratham-Mishra04@users.noreply.github.com> Date: Wed, 13 May 2026 01:14:41 +0530 Subject: [PATCH 15/81] test: add direct cache e2e test suite (#3426) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Adds a comprehensive end-to-end test suite (`TestDirect`) for the semantic cache plugin operating in direct-only mode. The suite covers 55 test cases (plan §1.1–1.55) validating cache hit/miss behavior, key isolation, TTL handling, config flag mutations, normalization, streaming, multi-endpoint support, parameter hashing, tool definitions, and cache management operations. ## Changes - Introduces `tests/semanticcache/direct_test.go` with `TestDirect`, covering: - **Basic hit/miss and key isolation** (1.1, 1.2, 1.3, 1.4) - **`cache_by_model` and `cache_by_provider` flag behavior** (1.5–1.8), including serial config-mutation cases that restore baseline via `t.Cleanup` - **`exclude_system_prompt` flag** (1.9, 1.10) - **Conversation threshold boundary conditions** (1.11, 1.12) - **TTL expiry, per-request TTL override, invalid TTL fallback, and zero/negative TTL fallback** (1.13, 1.14, 1.15, 1.54) - **`no-store` header semantics**, including case-sensitivity and explicit `false` value (1.16, 1.17, 1.45, 1.46) - **`cache-type` header behavior** in direct-only mode, including the `semantic` header bug case (1.18, 1.19) - **Streaming SSE**: hit/miss, chunk replay order, and non-final chunk cache_debug absence (1.24, 1.25, 1.47) - **Multi-endpoint coverage**: text completions, responses API, embeddings, and image generation (1.20–1.23) - **Input normalization**: case folding, whitespace trimming, Unicode, and large prompts (1.26–1.29) - **Image attachment hashing**: same URL hits, different URL misses (1.30, 1.31) - **Edge cases**: nil content messages, empty messages array, unknown cache ID deletion (1.42, 1.43, 1.40) - **Parameter hash isolation**: temperature, top_p, seed, max_tokens, top_logprobs, tools (order-independent and name-change), prompt_cache_key, service_tier, store flag (1.32–1.37, 1.48–1.52) - **Cache management**: clear by cache ID, clear by key (1.38, 1.39) - **Plugin status round-trip** via GET (1.44) - **`/api/logs` cross-check**: verifies persisted `cache_debug` matches in-flight response stamp (1.55) - **`responses` API `previous_response_id` isolation** (1.53) - **Threshold header no-op in direct-only mode** (1.41) - Adds helper functions: `simpleChat`, `chatWithSystem`, `chatWithImage`, `restoreDirectBaseline`, `assertHitAndReturnCacheDebug` - Establishes a parallelism contract: cases that mutate plugin config run serially (no `t.Parallel()`); all others run concurrently with unique cache keys to prevent collisions ## Type of change - [ ] Bug fix - [ ] Feature - [ ] Refactor - [ ] Documentation - [x] Chore/CI ## Affected areas - [ ] Core (Go) - [ ] Transports (HTTP) - [ ] Providers/Integrations - [x] Plugins - [ ] UI (React) - [ ] Docs ## How to test ```sh # Run the full direct-mode suite go test ./tests/semanticcache/... -run TestDirect -v -timeout 300s # Skip the expensive image generation case SC_SKIP_IMAGE_GEN=1 go test ./tests/semanticcache/... -run TestDirect -v -timeout 300s ``` Required environment variables (same as the broader semantic cache e2e suite): - `OPENAI_MODEL` — primary OpenAI-compatible model (e.g. `openai/gpt-4o-mini`) - `OPENAI_MODEL_ALT` — secondary model for cross-model isolation cases - `OPENAI_EMBED` — embedding model name (e.g. `text-embedding-3-small`) - `ANTHRO_MODEL` — (optional) Anthropic model; cases 1.7 and 1.8 skip if unset - `SC_SKIP_IMAGE_GEN=1` — (optional) skip case 1.23 to avoid DALL-E costs ## Screenshots/Recordings N/A — test-only change. ## Breaking changes - [x] No ## Related issues ## Security considerations No new auth, secrets, or PII surface. Test prompts are benign and do not contain sensitive data. ## Checklist - [ ] I read `docs/contributing/README.md` and followed the guidelines - [x] I added/updated tests where appropriate - [ ] I updated documentation where needed - [x] I verified builds succeed (Go and UI) - [ ] I verified the CI pipeline passes locally if applicable --- tests/semanticcache/direct_test.go | 1429 ++++++++++++++++++++++++++++ 1 file changed, 1429 insertions(+) create mode 100644 tests/semanticcache/direct_test.go diff --git a/tests/semanticcache/direct_test.go b/tests/semanticcache/direct_test.go new file mode 100644 index 0000000000..a7a59591a2 --- /dev/null +++ b/tests/semanticcache/direct_test.go @@ -0,0 +1,1429 @@ +package semanticcache + +import ( + "encoding/json" + "net/http" + "os" + "strings" + "testing" + "time" +) + +// Test image URLs copied from core/internal/llmtests/utils.go so the e2e +// suite uses the same fixtures the rest of the test-suite has validated +// providers against. +const ( + testImageURL1 = "https://pestworldcdn-dcf2a8gbggazaghf.z01.azurefd.net/media/561791/carpenter-ant4.jpg" + testImageURL2 = "https://images.pexels.com/photos/30662605/pexels-photo-30662605/free-photo-of-eiffel-tower-view-from-the-seine-river-in-paris.jpeg" +) + +// TestDirect runs the direct-only caching cases from the plan (1.1–1.55). +// +// Parallelism rules — IMPORTANT for anyone adding new cases: +// +// - Cases that ONLY exercise per-request behavior (different cache keys, +// headers, params, attachments) call `t.Parallel()` at the top of the +// subtest body. Cache keys are unique per case so they never collide. +// +// - Cases that mutate the plugin's CONFIG via `pluginUpdate` (e.g. flipping +// cache_by_model, exclude_system_prompt, default_cache_key) must NOT +// call `t.Parallel()`. They run synchronously inside the parent loop, +// one at a time. Each such case restores the baseline config via +// `t.Cleanup` before returning. +// +// Go's test framework guarantees the order: every `t.Parallel()` subtest +// PAUSES until the parent function reaches its end, then all paused +// subtests unblock and run concurrently. So all 4 mutating cases (1.4, +// 1.6, 1.8, 1.10) execute serially first; the remaining parallel cases +// then fire off together against the restored baseline plugin. +// +// Adding a new mutating case → omit `t.Parallel()` + add a `// Serial:` +// comment so the next person sees the intent. +func TestDirect(t *testing.T) { + lc := newLogCtx("direct", "setup") + logf(t, lc.at(0), "SETUP", "phase_start", map[string]any{ + "mode": "direct-only", + "ttl": ttlDirect, + }) + + // Setup: create the plugin in direct-only mode with a default cache key + // scoped to phase1, so case 1.3 can test the default-key path. Cases that + // mutate config PUT during the case and restore baseline via t.Cleanup. + created := pluginCreate(t, lc, 1, true, directOnlyConfig(ttlDirect, defaultKeyDirect)) + if created.Status.Status != "active" && created.Status.Status != "ready" && created.Status.Status != "Ready" && created.Status.Status != "Initialized" { + t.Logf("note: plugin status=%q (continuing — status field naming may vary)", created.Status.Status) + } + + // Cleanup at end of phase — clear every key used. Plugin stays loaded so + // later phases can PUT-update it. + allKeys := []string{ + defaultKeyDirect, + "phase1-k1-a", "phase1-k1-b", "phase1-k2", "phase1-ttl", + "phase1-k5", "phase1-k6", "phase1-k7", "phase1-k8", + "phase1-k9", "phase1-k10", "phase1-k11", "phase1-k12", + "phase1-k14", "phase1-k15", "phase1-k16", "phase1-k17", + "phase1-k18", "phase1-k19", "phase1-k41", + "phase1-k45", "phase1-k46", "phase1-k54", + "phase1-k32", "phase1-k33", "phase1-k34", "phase1-k35", "phase1-k36", + "phase1-k48", "phase1-k49", "phase1-k50", "phase1-k51", "phase1-k52", + "phase1-k26", "phase1-k27", "phase1-k28", "phase1-k29", + "phase1-k30", "phase1-k31", "phase1-k42", "phase1-k43", + "phase1-k20", "phase1-k21", "phase1-k22", "phase1-k23", + "phase1-k53", "phase1-k53-seed", + "phase1-k24", "phase1-k25", "phase1-k47", + "phase1-k38", "phase1-k39", "phase1-k37", + "phase1-k55", + } + t.Cleanup(func() { + for _, k := range allKeys { + _ = clearByCacheKey(t, lc.at(99), 99, k) + } + }) + + // 1.1 exact_match_chat + t.Run("1.1_exact_match_chat", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.1_exact_match_chat") + req := simpleChat(cfg.OpenAIModel, "What is the capital of France?") + + respA := postChat(t, lc, 1, req, cacheHeaders{Key: "phase1-k1-a"}) + idA := assertMiss(t, lc, 2, respA) + + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, req, cacheHeaders{Key: "phase1-k1-a"}) + idB := assertHit(t, lc, 5, respB, "direct") + assertSameCacheID(t, lc, 6, idB, idA) + }) + + // 1.2 key_isolation + t.Run("1.2_key_isolation", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.2_key_isolation") + req := simpleChat(cfg.OpenAIModel, "Recommend a science fiction book to read.") + + respA := postChat(t, lc, 1, req, cacheHeaders{Key: "phase1-k1-b"}) + idA := assertMiss(t, lc, 2, respA) + + respB := postChat(t, lc, 3, req, cacheHeaders{Key: "phase1-k2"}) + idB := assertMiss(t, lc, 4, respB) + assertDifferentCacheID(t, lc, 5, idA, idB) + }) + + // 1.3 default_cache_key — no header, default key on plugin applies. + t.Run("1.3_default_cache_key", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.3_default_cache_key") + req := simpleChat(cfg.OpenAIModel, "Give me one fun fact about octopuses.") + + respA := postChat(t, lc, 1, req, cacheHeaders{}) + idA := assertMiss(t, lc, 2, respA) + + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, req, cacheHeaders{}) + idB := assertHit(t, lc, 5, respB, "direct") + assertSameCacheID(t, lc, 6, idB, idA) + }) + + // 1.4 no_key_no_default — when DefaultCacheKey="" and no x-bf-cache-key, + // the plugin's PreLLMHook bails before any cache work (`resolveCacheKey` returns false). + // PostLLMHook also bails because state was never created. So no cache_debug is stamped. + t.Run("1.4_no_key_no_default", func(t *testing.T) { + // Serial: this case mutates plugin config (default_cache_key=""). + lc := newLogCtx("direct", "1.4_no_key_no_default") + + // Flip default_cache_key off. + pluginUpdate(t, lc, 1, true, directOnlyConfig(ttlDirect, "")) + t.Cleanup(func() { restoreDirectBaseline(t, lc, 99) }) + + req := simpleChat(cfg.OpenAIModel, "Tell me a one-line joke about teapots.") + respA := postChat(t, lc, 2, req, cacheHeaders{}) + assertNoCacheDebug(t, lc, 3, respA) + + respB := postChat(t, lc, 4, req, cacheHeaders{}) + assertNoCacheDebug(t, lc, 5, respB) + }) + + // 1.5 cache_by_model_default_true — model in cache key by default, so two + // requests with same body but different models → distinct cache_ids. + t.Run("1.5_cache_by_model_default_true", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.5_cache_by_model_default_true") + key := "phase1-k5" + body := "What is the speed of light in vacuum?" + + respA := postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, body), cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + + respB := postChat(t, lc, 3, simpleChat(cfg.OpenAIModelAlt, body), cacheHeaders{Key: key}) + idB := assertMiss(t, lc, 4, respB) + assertDifferentCacheID(t, lc, 5, idA, idB) + }) + + // 1.6 cache_by_model_false — flip the flag, same body across two models + // should produce the same cache_id; B hits the entry stored by A. + t.Run("1.6_cache_by_model_false", func(t *testing.T) { + // Serial: this case mutates plugin config (cache_by_model=false). + lc := newLogCtx("direct", "1.6_cache_by_model_false") + + cfgBlob := directOnlyConfig(ttlDirect, defaultKeyDirect) + cfgBlob["cache_by_model"] = false + pluginUpdate(t, lc, 1, true, cfgBlob) + t.Cleanup(func() { restoreDirectBaseline(t, lc, 99) }) + + key := "phase1-k6" + body := "Recommend one short walk-friendly podcast." + + respA := postChat(t, lc, 2, simpleChat(cfg.OpenAIModel, body), cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 3, respA) + waitForCacheWrite(t, lc, 4) + + respB := postChat(t, lc, 5, simpleChat(cfg.OpenAIModelAlt, body), cacheHeaders{Key: key}) + idB := assertHit(t, lc, 6, respB, "direct") + assertSameCacheID(t, lc, 7, idB, idA) + }) + + // 1.7 cache_by_provider_default_true — provider in cache key by default. + t.Run("1.7_cache_by_provider_default_true", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.7_cache_by_provider_default_true") + if os.Getenv("SC_CHAT_MODEL_ANTHROPIC") == "" { + t.Skip("anthropic model not configured (SC_CHAT_MODEL_ANTHROPIC unset)") + } + key := "phase1-k7" + body := "Give one tip for staying focused while reading." + + respA := postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, body), cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + + respB := postChat(t, lc, 3, simpleChat(cfg.AnthroModel, body), cacheHeaders{Key: key}) + idB := assertMiss(t, lc, 4, respB) + assertDifferentCacheID(t, lc, 5, idA, idB) + }) + + // 1.8 cache_by_provider_false — with both cache_by_provider and + // cache_by_model off, providers can share cache entries. + t.Run("1.8_cache_by_provider_false", func(t *testing.T) { + // Serial: this case mutates plugin config (cache_by_* = false). + lc := newLogCtx("direct", "1.8_cache_by_provider_false") + if os.Getenv("SC_CHAT_MODEL_ANTHROPIC") == "" { + t.Skip("anthropic model not configured (SC_CHAT_MODEL_ANTHROPIC unset)") + } + + cfgBlob := directOnlyConfig(ttlDirect, defaultKeyDirect) + cfgBlob["cache_by_provider"] = false + cfgBlob["cache_by_model"] = false + pluginUpdate(t, lc, 1, true, cfgBlob) + t.Cleanup(func() { restoreDirectBaseline(t, lc, 99) }) + + key := "phase1-k8" + body := "Say hi in three words." + + respA := postChat(t, lc, 2, simpleChat(cfg.OpenAIModel, body), cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 3, respA) + waitForCacheWrite(t, lc, 4) + + respB := postChat(t, lc, 5, simpleChat(cfg.AnthroModel, body), cacheHeaders{Key: key}) + idB := assertHit(t, lc, 6, respB, "direct") + assertSameCacheID(t, lc, 7, idB, idA) + }) + + // 1.9 exclude_system_prompt_false — system message is part of the hash + // by default; different systems → different cache_ids. + t.Run("1.9_exclude_system_prompt_false", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.9_exclude_system_prompt_false") + key := "phase1-k9" + user := "What's 2+2?" + + respA := postChat(t, lc, 1, chatWithSystem(cfg.OpenAIModel, "You are a math tutor.", user), cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + + respB := postChat(t, lc, 3, chatWithSystem(cfg.OpenAIModel, "You are a pirate.", user), cacheHeaders{Key: key}) + idB := assertMiss(t, lc, 4, respB) + assertDifferentCacheID(t, lc, 5, idA, idB) + }) + + // 1.10 exclude_system_prompt_true — flag flips system message out of the + // hash; identical user message hits regardless of system. + t.Run("1.10_exclude_system_prompt_true", func(t *testing.T) { + // Serial: this case mutates plugin config (exclude_system_prompt=true). + lc := newLogCtx("direct", "1.10_exclude_system_prompt_true") + + cfgBlob := directOnlyConfig(ttlDirect, defaultKeyDirect) + cfgBlob["exclude_system_prompt"] = true + pluginUpdate(t, lc, 1, true, cfgBlob) + t.Cleanup(func() { restoreDirectBaseline(t, lc, 99) }) + + key := "phase1-k10" + user := "What's the powerhouse of the cell?" + + respA := postChat(t, lc, 2, chatWithSystem(cfg.OpenAIModel, "You are a biology teacher.", user), cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 3, respA) + waitForCacheWrite(t, lc, 4) + + respB := postChat(t, lc, 5, chatWithSystem(cfg.OpenAIModel, "You are Sherlock Holmes.", user), cacheHeaders{Key: key}) + idB := assertHit(t, lc, 6, respB, "direct") + assertSameCacheID(t, lc, 7, idB, idA) + }) + + // 1.11 conversation_threshold_skips — len(messages) > threshold (default 3) + // → plugin bails before any cache work. No cache_debug on either response. + t.Run("1.11_conversation_threshold_skips", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.11_conversation_threshold_skips") + key := "phase1-k11" + + msgs := []chatMessage{ + {Role: "user", Content: textContent("Hi.")}, + {Role: "assistant", Content: textContent("Hello! How can I help?")}, + {Role: "user", Content: textContent("What's the weather like in Paris?")}, + {Role: "user", Content: textContent("Actually, give me one travel tip for Paris.")}, + } + req := chatRequest{Model: cfg.OpenAIModel, Messages: msgs} + + respA := postChat(t, lc, 1, req, cacheHeaders{Key: key}) + assertNoCacheDebug(t, lc, 2, respA) + + respB := postChat(t, lc, 3, req, cacheHeaders{Key: key}) + assertNoCacheDebug(t, lc, 4, respB) + }) + + // 1.12 conversation_threshold_boundary — len(messages) == threshold (3) + // is still cached (code uses `>`, not `>=`). Boundary case. + t.Run("1.12_conversation_threshold_boundary", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.12_conversation_threshold_boundary") + key := "phase1-k12" + + msgs := []chatMessage{ + {Role: "user", Content: textContent("Hi.")}, + {Role: "assistant", Content: textContent("Hello!")}, + {Role: "user", Content: textContent("Recommend one calming tea.")}, + } + req := chatRequest{Model: cfg.OpenAIModel, Messages: msgs} + + respA := postChat(t, lc, 1, req, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, req, cacheHeaders{Key: key}) + idB := assertHit(t, lc, 5, respB, "direct") + assertSameCacheID(t, lc, 6, idB, idA) + }) + + // 1.13 ttl_expiry_default + t.Run("1.13_ttl_expiry_default", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.13_ttl_expiry_default") + req := simpleChat(cfg.OpenAIModel, "Name a primary color.") + key := "phase1-ttl" + + respA := postChat(t, lc, 1, req, cacheHeaders{Key: key}) + _ = assertMiss(t, lc, 2, respA) + + waitForCacheWrite(t, lc, 3) + + // Confirm a fresh read hits within TTL. + respB := postChat(t, lc, 4, req, cacheHeaders{Key: key}) + _ = assertHit(t, lc, 5, respB, "direct") + + // Sleep past TTL + 2s safety margin. + wait := ttlDirectDuration + 2*time.Second + logf(t, lc.at(6), "INFO", "sleep_for_ttl", map[string]any{"seconds": wait.Seconds()}) + time.Sleep(wait) + + respC := postChat(t, lc, 7, req, cacheHeaders{Key: key}) + _ = assertMiss(t, lc, 8, respC) + }) + + // 1.14 ttl_per_request_override — x-bf-cache-ttl=3s overrides plugin default (10s). + // B hit within 3s, C miss after sleeping past 3s + safety. + t.Run("1.14_ttl_per_request_override", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.14_ttl_per_request_override") + req := simpleChat(cfg.OpenAIModel, "Name a noble gas.") + key := "phase1-k14" + + respA := postChat(t, lc, 1, req, cacheHeaders{Key: key, TTL: "3s"}) + _ = assertMiss(t, lc, 2, respA) + + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, req, cacheHeaders{Key: key, TTL: "3s"}) + _ = assertHit(t, lc, 5, respB, "direct") + + // Sleep past per-request TTL but well under plugin default (10s). + wait := 4 * time.Second + logf(t, lc.at(6), "INFO", "sleep_past_per_request_ttl", map[string]any{"seconds": wait.Seconds()}) + time.Sleep(wait) + + respC := postChat(t, lc, 7, req, cacheHeaders{Key: key, TTL: "3s"}) + _ = assertMiss(t, lc, 8, respC) + }) + + // 1.15 ttl_invalid_header_ignored — bogus x-bf-cache-ttl is silently ignored + // (lib/ctx.go:381). Plugin default TTL applies; B still hits within default. + t.Run("1.15_ttl_invalid_header_ignored", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.15_ttl_invalid_header_ignored") + req := simpleChat(cfg.OpenAIModel, "What is a haiku?") + key := "phase1-k15" + + respA := postChat(t, lc, 1, req, cacheHeaders{Key: key, TTL: "garbage"}) + _ = assertMiss(t, lc, 2, respA) + + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, req, cacheHeaders{Key: key, TTL: "also-garbage"}) + _ = assertHit(t, lc, 5, respB, "direct") + }) + + // 1.16 no_store_header — both A and B send x-bf-cache-no-store=true; nothing + // is ever written, so both miss. cache_debug IS stamped (plugin runs, but + // PostLLMHook's shouldSkipCaching short-circuits the write). + t.Run("1.16_no_store_header", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.16_no_store_header") + req := simpleChat(cfg.OpenAIModel, "Define entropy in one sentence.") + key := "phase1-k16" + + respA := postChat(t, lc, 1, req, cacheHeaders{Key: key, NoStore: "true"}) + idA := assertMiss(t, lc, 2, respA) + + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, req, cacheHeaders{Key: key, NoStore: "true"}) + idB := assertMiss(t, lc, 5, respB) + // Same body + key → same deterministic cache_id even though no entry exists. + assertSameCacheID(t, lc, 6, idB, idA) + }) + + // 1.17 no_store_with_hit — A writes normally; B sends no-store=true but the + // header only blocks WRITES, not reads. B still hits the entry A stored. + t.Run("1.17_no_store_with_hit", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.17_no_store_with_hit") + req := simpleChat(cfg.OpenAIModel, "What's the boiling point of water in Celsius?") + key := "phase1-k17" + + respA := postChat(t, lc, 1, req, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, req, cacheHeaders{Key: key, NoStore: "true"}) + idB := assertHit(t, lc, 5, respB, "direct") + assertSameCacheID(t, lc, 6, idB, idA) + }) + + // 1.18 cache_type_direct_header — explicit x-bf-cache-type=direct in direct-only + // mode is a no-op narrow (direct is already the only path). B still hits. + t.Run("1.18_cache_type_direct_header", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.18_cache_type_direct_header") + req := simpleChat(cfg.OpenAIModel, "Name the Roman god of war.") + key := "phase1-k18" + + respA := postChat(t, lc, 1, req, cacheHeaders{Key: key, Type: "direct"}) + idA := assertMiss(t, lc, 2, respA) + + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, req, cacheHeaders{Key: key, Type: "direct"}) + idB := assertHit(t, lc, 5, respB, "direct") + assertSameCacheID(t, lc, 6, idB, idA) + }) + + // 1.19 cache_type_semantic_in_direct_only — STRICT assertion of PLAN §12 bug #2. + // In direct-only mode with x-bf-cache-type=semantic, the plugin has no + // embedding executor → no semantic search can run. Direct search is also + // suppressed by the header. The canDoSemanticSearch early-exit guard in + // PreLLMHook (plugins/semanticcache/main.go) returns before any cache + // activity, so no cache_debug is stamped and no orphan entry is written. + // If either appears, the guard has regressed. + t.Run("1.19_cache_type_semantic_in_direct_only", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.19_cache_type_semantic_in_direct_only") + req := simpleChat(cfg.OpenAIModel, "Tell me one famous quote about courage.") + key := "phase1-k19" + + respA := postChat(t, lc, 1, req, cacheHeaders{Key: key, Type: "semantic"}) + assertNoCacheDebug(t, lc, 2, respA) + + respB := postChat(t, lc, 3, req, cacheHeaders{Key: key, Type: "semantic"}) + assertNoCacheDebug(t, lc, 4, respB) + }) + + // 1.41 threshold_header_ignored_direct_only — x-bf-cache-threshold has no + // effect on direct lookups (it's only consulted in performSemanticSearch). + // B with threshold=0.0 still finds A's deterministic entry. + t.Run("1.41_threshold_header_ignored_direct_only", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.41_threshold_header_ignored_direct_only") + req := simpleChat(cfg.OpenAIModel, "Name a famous bridge.") + key := "phase1-k41" + zero := 0.0 + + respA := postChat(t, lc, 1, req, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, req, cacheHeaders{Key: key, Threshold: &zero}) + idB := assertHit(t, lc, 5, respB, "direct") + assertSameCacheID(t, lc, 6, idB, idA) + }) + + // 1.45 no_store_explicit_false — header value MUST be the literal "true" to + // disable writes (ctx.go:406). Sending "false" does NOT block writes. + t.Run("1.45_no_store_explicit_false", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.45_no_store_explicit_false") + req := simpleChat(cfg.OpenAIModel, "What's a synonym for happy?") + key := "phase1-k45" + + respA := postChat(t, lc, 1, req, cacheHeaders{Key: key, NoStore: "false"}) + idA := assertMiss(t, lc, 2, respA) + + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, req, cacheHeaders{Key: key, NoStore: "false"}) + idB := assertHit(t, lc, 5, respB, "direct") + assertSameCacheID(t, lc, 6, idB, idA) + }) + + // 1.46 no_store_uppercase_true — header match is case-sensitive. "TRUE" does + // not toggle the no-store flag; writes proceed normally. + t.Run("1.46_no_store_uppercase_true", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.46_no_store_uppercase_true") + req := simpleChat(cfg.OpenAIModel, "Name a famous painter.") + key := "phase1-k46" + + respA := postChat(t, lc, 1, req, cacheHeaders{Key: key, NoStore: "TRUE"}) + idA := assertMiss(t, lc, 2, respA) + + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, req, cacheHeaders{Key: key, NoStore: "TRUE"}) + idB := assertHit(t, lc, 5, respB, "direct") + assertSameCacheID(t, lc, 6, idB, idA) + }) + + // 1.37 params_top_logprobs — top_logprobs is a non-trivial chat parameter + // that lands in the params metadata (utils.go:795). Distinct values must + // produce distinct cache_ids. Stands in for the "extra_params" case in + // the plan since extra_params is hard to wire on the OpenAI-compat + // endpoint — same isolation contract, less plumbing. + t.Run("1.37_params_top_logprobs", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.37_params_top_logprobs") + key := "phase1-k37" + body := "Name one mountain range." + yes := true + + reqA := simpleChat(cfg.OpenAIModel, body) + reqA.LogProbs = &yes + t1 := 2 + reqA.TopLogProbs = &t1 + + reqB := simpleChat(cfg.OpenAIModel, body) + reqB.LogProbs = &yes + t2 := 5 + reqB.TopLogProbs = &t2 + + respA := postChat(t, lc, 1, reqA, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + + respB := postChat(t, lc, 3, reqB, cacheHeaders{Key: key}) + idB := assertMiss(t, lc, 4, respB) + assertDifferentCacheID(t, lc, 5, idA, idB) + }) + + // 1.38 clear_by_cache_id — populate an entry, delete it by id, verify the + // same body now misses. + t.Run("1.38_clear_by_cache_id", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.38_clear_by_cache_id") + key := "phase1-k38" + req := simpleChat(cfg.OpenAIModel, "Name one type of tree.") + + respA := postChat(t, lc, 1, req, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + waitForCacheWrite(t, lc, 3) + + // Confirm the entry is queryable before we delete it. + respB := postChat(t, lc, 4, req, cacheHeaders{Key: key}) + _ = assertHit(t, lc, 5, respB, "direct") + + // Delete by id. + if got := clearByCacheID(t, lc, 6, idA); got != http.StatusOK { + t.Fatalf("expected 200 from clear-by-id, got %d", got) + } + + // Subsequent identical request must miss again. + respC := postChat(t, lc, 7, req, cacheHeaders{Key: key}) + _ = assertMiss(t, lc, 8, respC) + }) + + // 1.39 clear_by_key — populate two distinct bodies under the same cache + // key, then bulk-delete by key; both should miss afterwards. + t.Run("1.39_clear_by_key", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.39_clear_by_key") + key := "phase1-k39" + reqA := simpleChat(cfg.OpenAIModel, "Recommend one mystery novel.") + reqB := simpleChat(cfg.OpenAIModel, "Recommend one biography.") + + respA1 := postChat(t, lc, 1, reqA, cacheHeaders{Key: key}) + _ = assertMiss(t, lc, 2, respA1) + respB1 := postChat(t, lc, 3, reqB, cacheHeaders{Key: key}) + _ = assertMiss(t, lc, 4, respB1) + waitForCacheWrite(t, lc, 5) + + // Both should now hit before we clear. + _ = assertHit(t, lc, 7, postChat(t, lc, 6, reqA, cacheHeaders{Key: key}), "direct") + _ = assertHit(t, lc, 9, postChat(t, lc, 8, reqB, cacheHeaders{Key: key}), "direct") + + // Bulk-clear the whole key. + if got := clearByCacheKey(t, lc, 10, key); got != http.StatusOK { + t.Fatalf("expected 200 from clear-by-key, got %d", got) + } + + // Both should miss again. + _ = assertMiss(t, lc, 12, postChat(t, lc, 11, reqA, cacheHeaders{Key: key})) + _ = assertMiss(t, lc, 14, postChat(t, lc, 13, reqB, cacheHeaders{Key: key})) + }) + + // 1.40 clear_unknown_id — DELETE with a random uuid. Whether Bifrost returns + // 200 (idempotent delete) or 404 (strict not-found), the contract is: + // no 5xx and no crash. Documents the actual behavior in the log so PLAN + // can pin it down later. + t.Run("1.40_clear_unknown_id", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.40_clear_unknown_id") + unknownID := "00000000-0000-0000-0000-000000000000" + status := clearByCacheID(t, lc, 1, unknownID) + if status >= 500 { + t.Fatalf("clear unknown id returned %d (server error); expected idempotent 200 or 404", status) + } + // Accept either contract; surface which one in the log for PLAN docs. + logf(t, lc.at(2), "PASS", "clear_unknown_id_documented", map[string]any{ + "status": status, + "contract": "idempotent" + (map[bool]string{true: "_or_404"}[status == http.StatusNotFound]), + }) + }) + + // 1.24 streaming_chat — SSE chat, A→B identical. B replays cached chunks; + // final chunk on B has cache_hit=true with hit_type=direct, and chunk count + // matches A's chunk count. + t.Run("1.24_streaming_chat", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.24_streaming_chat") + key := "phase1-k24" + req := simpleChat(cfg.OpenAIModel, "Recite three colors of the rainbow, one per line.") + + respA := postChatStream(t, lc, 1, req, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + if len(respA.dataChunks()) < 2 { + t.Fatalf("expected ≥2 data chunks on miss stream, got %d", len(respA.dataChunks())) + } + waitForCacheWrite(t, lc, 3) + + respB := postChatStream(t, lc, 4, req, cacheHeaders{Key: key}) + idB := assertHit(t, lc, 5, respB, "direct") + assertSameCacheID(t, lc, 6, idB, idA) + if got, want := len(respB.dataChunks()), len(respA.dataChunks()); got != want { + t.Fatalf("expected B chunk count %d to match A's %d", got, want) + } + }) + + // 1.25 streaming_replay_order — chunk-by-chunk content should be identical + // between A (live stream) and B (cached replay). Plugin stores chunks as a + // JSON array and replays them in order (search.go:351). + t.Run("1.25_streaming_replay_order", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.25_streaming_replay_order") + key := "phase1-k25" + req := simpleChat(cfg.OpenAIModel, "Count from one to five.") + + respA := postChatStream(t, lc, 1, req, cacheHeaders{Key: key}) + _ = assertMiss(t, lc, 2, respA) + waitForCacheWrite(t, lc, 3) + + respB := postChatStream(t, lc, 4, req, cacheHeaders{Key: key}) + _ = assertHit(t, lc, 5, respB, "direct") + + a := respA.dataChunks() + b := respB.dataChunks() + if len(a) != len(b) { + t.Fatalf("chunk count mismatch: A=%d B=%d", len(a), len(b)) + } + for i := range a { + ta, tb := a[i].chunkText(), b[i].chunkText() + if ta != tb { + t.Fatalf("chunk %d text mismatch:\nA=%q\nB=%q", i, ta, tb) + } + } + logf(t, lc.at(6), "PASS", "chunks_identical_in_order", map[string]any{"count": len(a)}) + }) + + // 1.47 streaming_non_final_chunks_have_no_cache_debug — only the final + // data chunk carries the cache_debug stamp (stampCacheDebugForMiss / + // stampCacheDebugForHit skip non-final chunks). All earlier chunks must + // have cache_debug absent on both A (miss) and B (hit). + t.Run("1.47_streaming_non_final_chunks_no_cache_debug", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.47_streaming_non_final_chunks_no_cache_debug") + key := "phase1-k47" + req := simpleChat(cfg.OpenAIModel, "List two breakfast foods.") + + check := func(stage string, resp *streamResponse) { + data := resp.dataChunks() + if len(data) == 0 { + t.Fatalf("[%s] no data chunks received", stage) + } + for i := 0; i < len(data)-1; i++ { + if cd := data[i].cacheDebug(); cd != nil { + t.Fatalf("[%s] non-final chunk %d had cache_debug stamped: %+v", stage, i, cd) + } + } + finalCD := data[len(data)-1].cacheDebug() + if finalCD == nil { + t.Fatalf("[%s] final chunk missing cache_debug stamp", stage) + } + } + + respA := postChatStream(t, lc, 1, req, cacheHeaders{Key: key}) + _ = assertMiss(t, lc, 2, respA) + check("miss", respA) + waitForCacheWrite(t, lc, 3) + + respB := postChatStream(t, lc, 4, req, cacheHeaders{Key: key}) + _ = assertHit(t, lc, 5, respB, "direct") + check("hit", respB) + logf(t, lc.at(6), "PASS", "non_final_chunks_clean", map[string]any{ + "a_count": len(respA.dataChunks()), + "b_count": len(respB.dataChunks()), + }) + }) + + // 1.20 text_completion — /v1/completions with same prompt → hit. Plugin's + // metadata extractor handles TextCompletionRequest specifically. + t.Run("1.20_text_completion", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.20_text_completion") + key := "phase1-k20" + maxTok := 30 + req := textCompletionRequest{ + Model: "openai/gpt-3.5-turbo-instruct", + Prompt: "The capital of Japan is", + MaxTokens: &maxTok, + } + + respA := postTextCompletion(t, lc, 1, req, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + waitForCacheWrite(t, lc, 3) + + respB := postTextCompletion(t, lc, 4, req, cacheHeaders{Key: key}) + idB := assertHit(t, lc, 5, respB, "direct") + assertSameCacheID(t, lc, 6, idB, idA) + }) + + // 1.21 responses_api — /v1/responses with identical input → hit. + t.Run("1.21_responses_api", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.21_responses_api") + key := "phase1-k21" + req := responsesRequest{ + Model: cfg.OpenAIModel, + Input: "Name one type of cloud.", + } + + respA := postResponses(t, lc, 1, req, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + waitForCacheWrite(t, lc, 3) + + respB := postResponses(t, lc, 4, req, cacheHeaders{Key: key}) + idB := assertHit(t, lc, 5, respB, "direct") + assertSameCacheID(t, lc, 6, idB, idA) + }) + + // 1.22 embedding_endpoint — /v1/embeddings with identical input → hit. + // Plugin's EmbeddingRequest path is direct-cache-only (semantic search is + // suppressed for embedding requests — see PreLLMHook semanticEligible check). + t.Run("1.22_embedding_endpoint", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.22_embedding_endpoint") + key := "phase1-k22" + req := embeddingRequest{ + Model: "openai/" + cfg.OpenAIEmbed, + Input: "The quick brown fox jumps over the lazy dog.", + } + + respA := postEmbedding(t, lc, 1, req, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + waitForCacheWrite(t, lc, 3) + + respB := postEmbedding(t, lc, 4, req, cacheHeaders{Key: key}) + idB := assertHit(t, lc, 5, respB, "direct") + assertSameCacheID(t, lc, 6, idB, idA) + }) + + // 1.23 image_generation — /v1/images/generations with identical prompt → hit. + // Note: this case is expensive ($0.04/image on dall-e-3). Skip by setting + // SC_SKIP_IMAGE_GEN=1. + t.Run("1.23_image_generation", func(t *testing.T) { + t.Parallel() + if os.Getenv("SC_SKIP_IMAGE_GEN") == "1" { + t.Skip("SC_SKIP_IMAGE_GEN=1") + } + lc := newLogCtx("direct", "1.23_image_generation") + key := "phase1-k23" + n := 1 + req := imageGenRequest{ + Model: "openai/dall-e-3", + Prompt: "A minimalist line drawing of a red teapot on a white background.", + N: &n, + Size: "1024x1024", + } + + respA := postImageGen(t, lc, 1, req, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + waitForCacheWrite(t, lc, 3) + + respB := postImageGen(t, lc, 4, req, cacheHeaders{Key: key}) + idB := assertHit(t, lc, 5, respB, "direct") + assertSameCacheID(t, lc, 6, idB, idA) + }) + + // 1.53 responses_previous_response_id — different previous_response_id + // values must produce distinct cache_ids (it's in params_hash via utils.go:834). + // We use placeholder IDs since we only check params_hash isolation, not + // the actual conversation chain. + t.Run("1.53_responses_previous_response_id", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.53_responses_previous_response_id") + key := "phase1-k53" + + // Need a real previous_response_id for the provider to accept the call. + // Create one by first making a /v1/responses call and capturing its id. + seed := postResponses(t, lc, 1, responsesRequest{ + Model: cfg.OpenAIModel, + Input: "Say 'one'.", + }, cacheHeaders{Key: "phase1-k53-seed"}) + + var seedBody struct { + ID string `json:"id"` + } + if err := json.Unmarshal(seed.bodyRaw, &seedBody); err != nil || seedBody.ID == "" { + t.Skipf("could not extract response id to seed previous_response_id: %v", err) + } + + // Make a second seed call so we have two distinct previous_response_ids. + seed2 := postResponses(t, lc, 2, responsesRequest{ + Model: cfg.OpenAIModel, + Input: "Say 'two'.", + }, cacheHeaders{Key: "phase1-k53-seed"}) + var seed2Body struct { + ID string `json:"id"` + } + if err := json.Unmarshal(seed2.bodyRaw, &seed2Body); err != nil || seed2Body.ID == "" { + t.Skipf("could not extract second response id: %v", err) + } + + input := "Continue." + reqA := responsesRequest{Model: cfg.OpenAIModel, Input: input, PreviousResponseID: &seedBody.ID} + reqB := responsesRequest{Model: cfg.OpenAIModel, Input: input, PreviousResponseID: &seed2Body.ID} + + respA := postResponses(t, lc, 3, reqA, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 4, respA) + + respB := postResponses(t, lc, 5, reqB, cacheHeaders{Key: key}) + idB := assertMiss(t, lc, 6, respB) + assertDifferentCacheID(t, lc, 7, idA, idB) + }) + + // 1.26 normalization_case — getNormalizedInputForCaching lowercases + trims + // (utils.go:122). "Hello" and "hello " hash identically. + t.Run("1.26_normalization_case", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.26_normalization_case") + key := "phase1-k26" + + reqA := simpleChat(cfg.OpenAIModel, "Hello, who wrote 1984?") + reqB := simpleChat(cfg.OpenAIModel, "hello, who wrote 1984? ") + + respA := postChat(t, lc, 1, reqA, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, reqB, cacheHeaders{Key: key}) + idB := assertHit(t, lc, 5, respB, "direct") + assertSameCacheID(t, lc, 6, idB, idA) + }) + + // 1.27 normalization_whitespace — leading/trailing whitespace trimmed; inner + // whitespace preserved verbatim. + t.Run("1.27_normalization_whitespace", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.27_normalization_whitespace") + key := "phase1-k27" + + reqA := simpleChat(cfg.OpenAIModel, " Name one type of pasta. ") + reqB := simpleChat(cfg.OpenAIModel, "Name one type of pasta.") + + respA := postChat(t, lc, 1, reqA, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, reqB, cacheHeaders{Key: key}) + idB := assertHit(t, lc, 5, respB, "direct") + assertSameCacheID(t, lc, 6, idB, idA) + }) + + // 1.28 unicode_prompt — non-ASCII + emoji round-trips through hash + cache. + t.Run("1.28_unicode_prompt", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.28_unicode_prompt") + key := "phase1-k28" + body := "🚀 Quel est le sens de la vie? 寿司は美味しい。" + + respA := postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, body), cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, simpleChat(cfg.OpenAIModel, body), cacheHeaders{Key: key}) + idB := assertHit(t, lc, 5, respB, "direct") + assertSameCacheID(t, lc, 6, idB, idA) + }) + + // 1.29 large_prompt — ~10KB prompt; the second call's wall-clock should be + // dominated by cache_hit_latency (~ms), not provider latency (~s). + t.Run("1.29_large_prompt", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.29_large_prompt") + key := "phase1-k29" + // Repeat a sentence to ~10KB. + body := strings.Repeat("In a region far away, beneath the silver moon, a curious traveler set out at dawn carrying a worn leather satchel and a heart full of questions. ", 70) + body += " Summarize the above in one sentence." + + respA := postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, body), cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, simpleChat(cfg.OpenAIModel, body), cacheHeaders{Key: key}) + idB := assertHit(t, lc, 5, respB, "direct") + assertSameCacheID(t, lc, 6, idB, idA) + // cache_hit_latency is stamped at hit time — assert it's at least set. + // (Sanity check; provider latency would be much higher.) + if cd := respB.cacheDebug(); cd == nil || cd.CacheHitLatency == nil { + t.Fatalf("expected cache_hit_latency stamped on large_prompt hit") + } + }) + + // 1.30 image_in_message — identical image_url block in both A and B → hit. + // Verifies extractAttachmentsForCaching contributes consistently to the hash. + t.Run("1.30_image_in_message", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.30_image_in_message") + key := "phase1-k30" + + reqA := chatWithImage(cfg.OpenAIModel, "What is shown in this image?", testImageURL1) + reqB := chatWithImage(cfg.OpenAIModel, "What is shown in this image?", testImageURL1) + + respA := postChat(t, lc, 1, reqA, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, reqB, cacheHeaders{Key: key}) + idB := assertHit(t, lc, 5, respB, "direct") + assertSameCacheID(t, lc, 6, idB, idA) + }) + + // 1.31 image_attachment_diff — same text, different image URL → distinct cache_ids. + t.Run("1.31_image_attachment_diff", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.31_image_attachment_diff") + key := "phase1-k31" + prompt := "What is shown in this image?" + + reqA := chatWithImage(cfg.OpenAIModel, prompt, testImageURL1) + reqB := chatWithImage(cfg.OpenAIModel, prompt, testImageURL2) + + respA := postChat(t, lc, 1, reqA, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + + respB := postChat(t, lc, 3, reqB, cacheHeaders{Key: key}) + idB := assertMiss(t, lc, 4, respB) + assertDifferentCacheID(t, lc, 5, idA, idB) + }) + + // 1.42 nil_content_msg — a 3-message conversation including an assistant + // tool-call message with nil content (followed by a tool response). + // extractChatMessageContent handles nil content as empty string (utils.go:312) + // so the hash is stable across runs. + t.Run("1.42_nil_content_msg", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.42_nil_content_msg") + key := "phase1-k42" + + mkReq := func() chatRequest { + return chatRequest{ + Model: cfg.OpenAIModel, + Messages: []chatMessage{ + {Role: "user", Content: textContent("What's the weather in NYC?")}, + { + Role: "assistant", + // Content intentionally omitted (nil) — assistant + // tool-call messages set content=null per OpenAI spec. + ToolCalls: []chatToolCall{{ + ID: "call_abc", + Type: "function", + Function: chatToolCallFunc{ + Name: "get_weather", + Arguments: `{"city":"NYC"}`, + }, + }}, + }, + {Role: "tool", ToolCallID: "call_abc", Content: textContent("Sunny, 72°F")}, + }, + } + } + + respA := postChat(t, lc, 1, mkReq(), cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, mkReq(), cacheHeaders{Key: key}) + idB := assertHit(t, lc, 5, respB, "direct") + assertSameCacheID(t, lc, 6, idB, idA) + }) + + // 1.43 empty_messages — sending messages:[] should be rejected by the + // provider (or Bifrost validation) without crashing Bifrost. Accept any + // non-2xx response; the contract is "no crash, no orphan cache entry." + t.Run("1.43_empty_messages", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.43_empty_messages") + key := "phase1-k43" + + req := chatRequest{Model: cfg.OpenAIModel, Messages: []chatMessage{}} + hdr := http.Header{} + (cacheHeaders{Key: key}).apply(&http.Request{Header: hdr}) + + status, body, _, err := doJSON(t, "POST", "/v1/chat/completions", req, hdr) + if err != nil { + t.Fatalf("empty_messages http error: %v", err) + } + logf(t, lc.at(1), "INFO", "response", map[string]any{ + "status": status, + "body_len": len(body), + }) + if status >= 200 && status < 300 { + t.Fatalf("expected non-success status for empty messages, got %d body=%s", + status, truncate(string(body), 200)) + } + // Subsequent identical request should also fail — and crucially + // shouldn't return a stale cache hit. + status2, body2, _, _ := doJSON(t, "POST", "/v1/chat/completions", req, hdr) + if status2 >= 200 && status2 < 300 { + t.Fatalf("expected non-success status on retry, got %d body=%s", + status2, truncate(string(body2), 200)) + } + logf(t, lc.at(2), "PASS", "no_crash_on_empty_messages", map[string]any{ + "status_a": status, "status_b": status2, + }) + }) + + // 1.44 plugin_get_status — GET /api/plugins/semantic_cache after the phase + // is warm. status should be "active" and config should round-trip what we PUT. + t.Run("1.44_plugin_get_status", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.44_plugin_get_status") + p, exists := pluginGet(t, lc, 1) + if !exists { + t.Fatalf("plugin %q should exist mid-phase", pluginName) + } + if !p.Enabled { + t.Fatalf("expected plugin enabled=true, got %v", p.Enabled) + } + validStatuses := map[string]bool{"active": true, "ready": true, "Ready": true, "Initialized": true} + if got := p.Status.Status; !validStatuses[got] { + t.Fatalf("expected plugin status to be one of active/ready/Ready/Initialized, got %q", got) + } + // Config blob round-trip checks — backend may coerce numeric types + // when re-serializing from the DB. + gotDim, _ := p.Config["dimension"].(float64) + if int(gotDim) != 1 { + t.Fatalf("expected dimension=1 (direct-only), got %v", p.Config["dimension"]) + } + if got, _ := p.Config["default_cache_key"].(string); got != defaultKeyDirect { + t.Fatalf("expected default_cache_key=%q, got %q", defaultKeyDirect, got) + } + logf(t, lc.at(2), "PASS", "plugin_status_validated", map[string]any{ + "status": p.Status.Status, + "enabled": p.Enabled, + "dimension": p.Config["dimension"], + "default_cache_key": p.Config["default_cache_key"], + }) + }) + + // 1.32 params_temperature_isolation — temperature is part of params hash, + // so the same body with different temperatures produces distinct cache_ids. + t.Run("1.32_params_temperature_isolation", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.32_params_temperature_isolation") + key := "phase1-k32" + body := "Pick one number between 1 and 10." + + reqA := simpleChat(cfg.OpenAIModel, body) + t1 := 0.3 + reqA.Temperature = &t1 + + reqB := simpleChat(cfg.OpenAIModel, body) + t2 := 0.7 + reqB.Temperature = &t2 + + respA := postChat(t, lc, 1, reqA, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + + respB := postChat(t, lc, 3, reqB, cacheHeaders{Key: key}) + idB := assertMiss(t, lc, 4, respB) + assertDifferentCacheID(t, lc, 5, idA, idB) + }) + + // 1.33 params_top_p_isolation — top_p in params hash. + t.Run("1.33_params_top_p_isolation", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.33_params_top_p_isolation") + key := "phase1-k33" + body := "Name a Greek philosopher." + + reqA := simpleChat(cfg.OpenAIModel, body) + tp1 := 0.5 + reqA.TopP = &tp1 + + reqB := simpleChat(cfg.OpenAIModel, body) + tp2 := 0.9 + reqB.TopP = &tp2 + + respA := postChat(t, lc, 1, reqA, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + + respB := postChat(t, lc, 3, reqB, cacheHeaders{Key: key}) + idB := assertMiss(t, lc, 4, respB) + assertDifferentCacheID(t, lc, 5, idA, idB) + }) + + // 1.34 params_seed_same — same seed, same body → hit. + t.Run("1.34_params_seed_same", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.34_params_seed_same") + key := "phase1-k34" + body := "Recommend one Latin saying." + seed := 42 + + reqA := simpleChat(cfg.OpenAIModel, body) + reqA.Seed = &seed + reqB := simpleChat(cfg.OpenAIModel, body) + reqB.Seed = &seed + + respA := postChat(t, lc, 1, reqA, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, reqB, cacheHeaders{Key: key}) + idB := assertHit(t, lc, 5, respB, "direct") + assertSameCacheID(t, lc, 6, idB, idA) + }) + + // 1.35 params_seed_diff — different seeds → miss. + t.Run("1.35_params_seed_diff", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.35_params_seed_diff") + key := "phase1-k35" + body := "Recommend one quote about patience." + + reqA := simpleChat(cfg.OpenAIModel, body) + s1 := 42 + reqA.Seed = &s1 + + reqB := simpleChat(cfg.OpenAIModel, body) + s2 := 99 + reqB.Seed = &s2 + + respA := postChat(t, lc, 1, reqA, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + + respB := postChat(t, lc, 3, reqB, cacheHeaders{Key: key}) + idB := assertMiss(t, lc, 4, respB) + assertDifferentCacheID(t, lc, 5, idA, idB) + }) + + // 1.36 params_max_tokens_isolation — max_tokens in params hash. + t.Run("1.36_params_max_tokens_isolation", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.36_params_max_tokens_isolation") + key := "phase1-k36" + body := "List two healthy snacks." + + reqA := simpleChat(cfg.OpenAIModel, body) + m1 := 60 + reqA.MaxTokens = &m1 + + reqB := simpleChat(cfg.OpenAIModel, body) + m2 := 120 + reqB.MaxTokens = &m2 + + respA := postChat(t, lc, 1, reqA, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + + respB := postChat(t, lc, 3, reqB, cacheHeaders{Key: key}) + idB := assertMiss(t, lc, 4, respB) + assertDifferentCacheID(t, lc, 5, idA, idB) + }) + + // 1.48 tools_order_independent — Tools is hashed as a sorted set (utils.go:801-813), + // so reordering identical tool definitions must NOT change the cache_id. + // This catches the MCP-randomized-map regression the docstring calls out. + t.Run("1.48_tools_order_independent", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.48_tools_order_independent") + key := "phase1-k48" + body := "Look up the current weather in Tokyo." + + toolA := chatTool{Type: "function", Function: &toolFunction{ + Name: "get_weather", Description: "Get current weather", + Parameters: map[string]any{"type": "object", "properties": map[string]any{"city": map[string]any{"type": "string"}}, "required": []string{"city"}}, + }} + toolB := chatTool{Type: "function", Function: &toolFunction{ + Name: "search_web", Description: "Search the web", + Parameters: map[string]any{"type": "object", "properties": map[string]any{"query": map[string]any{"type": "string"}}, "required": []string{"query"}}, + }} + + reqA := simpleChat(cfg.OpenAIModel, body) + reqA.Tools = []chatTool{toolA, toolB} + reqB := simpleChat(cfg.OpenAIModel, body) + reqB.Tools = []chatTool{toolB, toolA} // swapped order + + respA := postChat(t, lc, 1, reqA, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, reqB, cacheHeaders{Key: key}) + idB := assertHit(t, lc, 5, respB, "direct") + assertSameCacheID(t, lc, 6, idB, idA) + }) + + // 1.49 tools_function_name_change — different tool names → distinct params hash → miss. + t.Run("1.49_tools_function_name_change", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.49_tools_function_name_change") + key := "phase1-k49" + body := "Search for top hiking trails near Seattle." + + mkTool := func(name string) chatTool { + return chatTool{Type: "function", Function: &toolFunction{ + Name: name, Description: "do a search", + Parameters: map[string]any{"type": "object", "properties": map[string]any{"q": map[string]any{"type": "string"}}, "required": []string{"q"}}, + }} + } + + reqA := simpleChat(cfg.OpenAIModel, body) + reqA.Tools = []chatTool{mkTool("search")} + + reqB := simpleChat(cfg.OpenAIModel, body) + reqB.Tools = []chatTool{mkTool("lookup")} + + respA := postChat(t, lc, 1, reqA, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + + respB := postChat(t, lc, 3, reqB, cacheHeaders{Key: key}) + idB := assertMiss(t, lc, 4, respB) + assertDifferentCacheID(t, lc, 5, idA, idB) + }) + + // 1.50 prompt_cache_key_in_metadata — params.PromptCacheKey is extracted + // into the metadata map (utils.go:781) so different values → different cache_ids. + t.Run("1.50_prompt_cache_key_in_metadata", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.50_prompt_cache_key_in_metadata") + key := "phase1-k50" + body := "Translate 'hello' to French." + + reqA := simpleChat(cfg.OpenAIModel, body) + pckA := "tenant-X" + reqA.PromptCacheKey = &pckA + + reqB := simpleChat(cfg.OpenAIModel, body) + pckB := "tenant-Y" + reqB.PromptCacheKey = &pckB + + respA := postChat(t, lc, 1, reqA, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + + respB := postChat(t, lc, 3, reqB, cacheHeaders{Key: key}) + idB := assertMiss(t, lc, 4, respB) + assertDifferentCacheID(t, lc, 5, idA, idB) + }) + + // 1.51 service_tier_in_metadata — service_tier is in params hash. + t.Run("1.51_service_tier_in_metadata", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.51_service_tier_in_metadata") + key := "phase1-k51" + body := "Define empathy in one sentence." + + // "auto" and "default" are both accepted by gpt-4o-mini ("flex" is gated + // on premium models). The point is to differ; the values matter only + // for params_hash isolation. + reqA := simpleChat(cfg.OpenAIModel, body) + stA := "default" + reqA.ServiceTier = &stA + + reqB := simpleChat(cfg.OpenAIModel, body) + stB := "auto" + reqB.ServiceTier = &stB + + respA := postChat(t, lc, 1, reqA, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + + respB := postChat(t, lc, 3, reqB, cacheHeaders{Key: key}) + idB := assertMiss(t, lc, 4, respB) + assertDifferentCacheID(t, lc, 5, idA, idB) + }) + + // 1.52 store_flag_in_metadata — params.Store toggle changes params hash. + t.Run("1.52_store_flag_in_metadata", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.52_store_flag_in_metadata") + key := "phase1-k52" + body := "Name one chess opening." + + reqA := simpleChat(cfg.OpenAIModel, body) + storeA := true + reqA.Store = &storeA + + reqB := simpleChat(cfg.OpenAIModel, body) + storeB := false + reqB.Store = &storeB + + respA := postChat(t, lc, 1, reqA, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + + respB := postChat(t, lc, 3, reqB, cacheHeaders{Key: key}) + idB := assertMiss(t, lc, 4, respB) + assertDifferentCacheID(t, lc, 5, idA, idB) + }) + + // 1.54 ttl_zero_per_request — x-bf-cache-ttl=0s (or negative) falls back to + // the plugin default TTL. Without this contract, "0s" would yield + // expires_at=now and silently break caching for the affected request; + // instead the plugin treats non-positive values as "use default", matching + // Init's behavior for Config.TTL=0. + t.Run("1.54_ttl_zero_per_request", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.54_ttl_zero_per_request") + req := simpleChat(cfg.OpenAIModel, "Name a constellation.") + key := "phase1-k54" + + respA := postChat(t, lc, 1, req, cacheHeaders{Key: key, TTL: "0s"}) + idA := assertMiss(t, lc, 2, respA) + + waitForCacheWrite(t, lc, 3) + + // B with TTL=0s should hit — the override is rejected as non-positive + // and the plugin's default (10s) keeps A's entry alive. + respB := postChat(t, lc, 4, req, cacheHeaders{Key: key, TTL: "0s"}) + idB := assertHit(t, lc, 5, respB, "direct") + assertSameCacheID(t, lc, 6, idB, idA) + + // Negative TTL should follow the same fallback path. + respC := postChat(t, lc, 7, req, cacheHeaders{Key: key, TTL: "-30s"}) + _ = assertHit(t, lc, 8, respC, "direct") + }) + + // 1.55 cache_debug_in_logs_endpoint — cross-check that the persisted log + // row's cache_debug column matches the in-flight response stamp. Guards + // against drift between PostLLMHook stamping and durable storage (same + // data path the UI Logs view reads). + t.Run("1.55_cache_debug_in_logs_endpoint", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("direct", "1.55_cache_debug_in_logs_endpoint") + key := "phase1-k55" + req := simpleChat(cfg.OpenAIModel, "Name one famous lighthouse.") + + // Generate a hit so cache_debug carries the full set of hit-only fields. + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, req, cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 3) + respB := postChat(t, lc, 4, req, cacheHeaders{Key: key}) + respCD := assertHitAndReturnCacheDebug(t, lc, 5, respB, "direct") + + entry := findLogByCacheDebug(t, lc, 6, respCD) + assertLogMatchesResponseCacheDebug(t, lc, 7, respCD, entry.CacheDebug) + }) + + logf(t, newLogCtx("direct", "teardown").at(99), "TEARDOWN", "phase_end", nil) +} + +// assertHitAndReturnCacheDebug is the same as assertHit but also returns the +// full cacheDebug struct (the regular helper returns just the cache_id string). +// Used by the /api/logs cross-check cases that need to compare all fields. +func assertHitAndReturnCacheDebug(t *testing.T, lc logCtx, step int, resp cacheDebugged, wantType string) *cacheDebug { + t.Helper() + _ = assertHit(t, lc, step, resp, wantType) + return resp.cacheDebug() +} + +// restoreDirectBaseline PUTs the canonical direct-only config so cases that +// mutate via pluginUpdate leave a clean slate for the next subtest. +func restoreDirectBaseline(t *testing.T, lc logCtx, step int) { + t.Helper() + pluginUpdate(t, lc, step, true, directOnlyConfig(ttlDirect, defaultKeyDirect)) +} + +// Defaults the phase 1 cases share. Kept narrow so a future case can tighten +// ttl (e.g. case 1.14) without colliding. +const ( + ttlDirect = "10s" + defaultKeyDirect = "phase1-default" +) + +var ttlDirectDuration = 10 * time.Second + +func simpleChat(model, content string) chatRequest { + return chatRequest{ + Model: model, + Messages: []chatMessage{ + {Role: "user", Content: textContent(content)}, + }, + } +} + +func chatWithSystem(model, system, user string) chatRequest { + return chatRequest{ + Model: model, + Messages: []chatMessage{ + {Role: "system", Content: textContent(system)}, + {Role: "user", Content: textContent(user)}, + }, + } +} + +// chatWithImage builds a user message with an image_url + text block. Used to +// exercise the attachments path of buildRequestMetadataForCaching. +func chatWithImage(model, text, imageURL string) chatRequest { + return chatRequest{ + Model: model, + Messages: []chatMessage{{ + Role: "user", + Content: blocksContent([]map[string]any{ + {"type": "text", "text": text}, + {"type": "image_url", "image_url": map[string]any{"url": imageURL}}, + }), + }}, + } +} From 8f8e108fd205ba5278ae5d9ee224fde3078aa53b Mon Sep 17 00:00:00 2001 From: Pratham Mishra <99235987+Pratham-Mishra04@users.noreply.github.com> Date: Wed, 13 May 2026 01:15:45 +0530 Subject: [PATCH 16/81] test: add semantic cache e2e test suite (#3427) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Adds a comprehensive integration test suite for the semantic cache mode (Phase 2), covering the full lifecycle of semantic similarity-based cache hits and misses using Weaviate as the vector store and OpenAI's `text-embedding-3-small` as the embedding model. This suite validates that the semantic cache behaves correctly across a wide range of real-world scenarios, complementing the existing direct-mode (Phase 1) tests. ## Changes - Added `TestParaphraseFixtures` to pre-flight all paraphrase pairs against the live embedding model, asserting cosine similarity thresholds before any semantic cache cases run. This prevents flaky downstream failures caused by borderline fixture pairs. - Added `TestSemantic` containing 44 sub-cases (2.1–2.44) covering: - Semantic hit on paraphrase, miss on unrelated content - Per-request threshold overrides (relax, tighten, clamp above/below valid range) - `x-bf-cache-type` header forcing direct-only or semantic-only lookup paths - Cache key and model/provider isolation in semantic mode - `cache_by_model=false` and `cache_by_provider=false` cross-model/cross-provider hits - Streaming replay of semantic hits, including tool call preservation - TTL expiry, per-request TTL, TTL=0 fallback, and `no-store` header semantics - Namespace isolation and dimension-change silent miss behavior - Embedding endpoint bypass (semantic search skipped for `/v1/embeddings`) - Image generation and Responses API semantic hits - Text completion semantic hits - Gemini provider with OpenAI embedding provider - `params_hash` isolation (temperature, service tier, store flag, prompt cache key, previous response ID) - `exclude_system_prompt` flag effect on semantic matching - Conversation message threshold skipping semantic search - Attachment URL changes causing misses - `cache_debug` field presence and correctness on hits and misses, including log endpoint cross-check - Streaming chunk-level `cache_debug` placement (final chunk only) - Serial (non-parallel) cases that mutate plugin config restore baseline via `t.Cleanup` to avoid test pollution. - A dedicated Weaviate namespace (`cfg.Namespace + "Semantic"`) is used to avoid dimension conflicts with the Phase 1 direct-mode namespace. ## Type of change - [ ] Bug fix - [ ] Feature - [ ] Refactor - [ ] Documentation - [x] Chore/CI ## Affected areas - [ ] Core (Go) - [ ] Transports (HTTP) - [ ] Providers/Integrations - [x] Plugins - [ ] UI (React) - [ ] Docs ## How to test ```sh # Run fixture pre-flight (requires OpenAI embedding access) go test ./tests/semanticcache/... -run TestParaphraseFixtures -v # Run full semantic suite go test ./tests/semanticcache/... -run TestSemantic -v -timeout 10m # Skip fixture verification if embedding access is unavailable SC_SKIP_FIXTURE_VERIFY=1 go test ./tests/semanticcache/... -run TestSemantic -v -timeout 10m # Skip image generation cases if DALL-E is unavailable SC_SKIP_IMAGE_GEN=1 go test ./tests/semanticcache/... -run TestSemantic -v -timeout 10m ``` Required environment/config: - `cfg.OpenAIEmbed` — embedding model name (e.g. `text-embedding-3-small`) - `cfg.OpenAIModel` / `cfg.OpenAIModelAlt` — chat models for isolation tests - `cfg.AnthroModel` — optional; skipped if empty (case 2.13) - `cfg.GeminiModel` — optional; skipped if empty (case 2.28) - `cfg.Namespace` — base Weaviate namespace; suite appends `Semantic` suffix - `SC_SKIP_FIXTURE_VERIFY=1` — skip embedding pre-flight - `SC_SKIP_IMAGE_GEN=1` — skip DALL-E case ## Screenshots/Recordings N/A ## Breaking changes - [x] No ## Related issues N/A ## Security considerations No new auth, secrets, or PII handling introduced. Tests call live external APIs (OpenAI, optionally Anthropic/Gemini) and require valid credentials in the test environment; no credentials are hardcoded. ## Checklist - [ ] I read `docs/contributing/README.md` and followed the guidelines - [x] I added/updated tests where appropriate - [ ] I updated documentation where needed - [x] I verified builds succeed (Go and UI) - [ ] I verified the CI pipeline passes locally if applicable --- tests/semanticcache/semantic_test.go | 1031 ++++++++++++++++++++++++++ 1 file changed, 1031 insertions(+) create mode 100644 tests/semanticcache/semantic_test.go diff --git a/tests/semanticcache/semantic_test.go b/tests/semanticcache/semantic_test.go new file mode 100644 index 0000000000..c3d02193ba --- /dev/null +++ b/tests/semanticcache/semantic_test.go @@ -0,0 +1,1031 @@ +package semanticcache + +import ( + "encoding/json" + "fmt" + "math" + "net/http" + "os" + "testing" + "time" +) + +// TestParaphraseFixtures pre-flights every pair in paraphrasePairs against +// the deployed embedding model. Fails early with the actual cosine values +// if a pair has drifted, so downstream semantic cases never debug a +// borderline-flaky pair. Costs ~10 embedding calls (cents). +// +// Set SC_SKIP_FIXTURE_VERIFY=1 to skip this when running semantic cases +// against an environment with no openai/text-embedding-3-small access. +func TestParaphraseFixtures(t *testing.T) { + if os.Getenv("SC_SKIP_FIXTURE_VERIFY") == "1" { + t.Skip("SC_SKIP_FIXTURE_VERIFY=1") + } + for _, pair := range paraphrasePairs { + p := pair + t.Run(p.Name, func(t *testing.T) { + t.Parallel() + lc := newLogCtx("fixtures", p.Name) + + ec := embedVector(t, lc, 1, p.Canonical) + ep := embedVector(t, lc, 2, p.Paraphrase) + eu := embedVector(t, lc, 3, p.Unrelated) + + simHit := cosine(ec, ep) + simMiss := cosine(ec, eu) + + logf(t, lc.at(4), "INFO", "cosine_check", map[string]any{ + "hit_cosine": fmt.Sprintf("%.4f", simHit), + "miss_cosine": fmt.Sprintf("%.4f", simMiss), + }) + + if simHit < 0.85 { + t.Errorf("HIT cosine %.4f < 0.85 — paraphrase too distant\n canonical=%q\n paraphrase=%q", + simHit, p.Canonical, p.Paraphrase) + } + if simMiss > 0.6 { + t.Errorf("MISS cosine %.4f > 0.6 — unrelated too close\n canonical=%q\n unrelated=%q", + simMiss, p.Canonical, p.Unrelated) + } + }) + } +} + +// embedVector hits /v1/embeddings and parses the float64 vector. Plugin +// state irrelevant — direct API call. +func embedVector(t *testing.T, lc logCtx, step int, text string) []float64 { + t.Helper() + req := embeddingRequest{Model: "openai/" + cfg.OpenAIEmbed, Input: text} + status, body, _, err := doJSON(t, "POST", "/v1/embeddings", req, nil) + if err != nil || status != http.StatusOK { + t.Fatalf("embedVector: status=%d err=%v body=%s", status, err, truncate(string(body), 300)) + } + var resp struct { + Data []struct { + Embedding []float64 `json:"embedding"` + } `json:"data"` + } + if err := json.Unmarshal(body, &resp); err != nil { + t.Fatalf("embedVector decode: %v", err) + } + if len(resp.Data) == 0 || len(resp.Data[0].Embedding) == 0 { + t.Fatalf("embedVector: empty data in response %s", truncate(string(body), 300)) + } + logf(t, lc.at(step), "INFO", "embedding_computed", map[string]any{ + "dim": len(resp.Data[0].Embedding), + "text_len": len(text), + }) + return resp.Data[0].Embedding +} + +func cosine(a, b []float64) float64 { + if len(a) != len(b) || len(a) == 0 { + return 0 + } + var dot, na, nb float64 + for i := range a { + dot += a[i] * b[i] + na += a[i] * a[i] + nb += b[i] * b[i] + } + if na == 0 || nb == 0 { + return 0 + } + return dot / (math.Sqrt(na) * math.Sqrt(nb)) +} + +// ----------------------------------------------------------------------------- +// Phase 2 — semantic mode +// ----------------------------------------------------------------------------- + +const ( + ttlSemantic = "30s" + defaultKeySemantic = "phase2-default" + thresholdSemantic = 0.85 +) + +// semanticNamespace is a dedicated Weaviate class for the semantic-mode suite. +// Phase 1 created cfg.Namespace with dimension=1 (direct-only); reusing that +// namespace for dim=1536 writes would error out with "vector dimensions do +// not match the index dimensions" — a Weaviate constraint, not a plugin bug. +// Real users switching modes face the same constraint and create a new +// namespace, so the suite mirrors that. +func semanticNamespace() string { return cfg.Namespace + "Semantic" } + +// semanticBaseline is the canonical Phase 2 plugin config — used by setup and +// by every t.Cleanup that restores baseline after a mutating case. +func semanticBaseline() map[string]any { + // Lock the embedding model: dimension=1536 is hard-coded and only + // text-embedding-3-small produces 1536-dim vectors. Any other model would + // cause confusing dimension-mismatch failures downstream rather than a + // clear prerequisite error here. + if cfg.OpenAIEmbed != "text-embedding-3-small" { + panic(fmt.Sprintf("semantic suite expects cfg.OpenAIEmbed=text-embedding-3-small, got %q", cfg.OpenAIEmbed)) + } + c := semanticConfig("openai", cfg.OpenAIEmbed, 1536, ttlSemantic, thresholdSemantic, defaultKeySemantic) + c["vector_store_namespace"] = semanticNamespace() + return c +} + +func restoreSemanticBaseline(t *testing.T, lc logCtx, step int) { + t.Helper() + pluginUpdate(t, lc, step, true, semanticBaseline()) +} + +// TestSemantic runs the semantic-mode cases (2.1–2.44). +// +// Parallelism rules (same as Phase 1): +// +// - Read-only cases call `t.Parallel()`. +// - Cases that mutate plugin config via `pluginUpdate` (2.12, 2.13, 2.21, +// 2.31, 2.32) MUST NOT call `t.Parallel()`. They run synchronously inside +// the parent loop, one at a time, restoring baseline via `t.Cleanup`. +// +// Plugin lifecycle: this test is self-contained — it upserts the plugin to +// semantic mode at setup regardless of whether Phase 1 ran. Existing entries +// in the namespace from prior runs are tolerated because each case uses a +// unique cache_key (phase2-kNN). +func TestSemantic(t *testing.T) { + lc := newLogCtx("semantic", "setup") + logf(t, lc.at(0), "SETUP", "phase_start", map[string]any{ + "mode": "semantic", + "ttl": ttlSemantic, + "threshold": thresholdSemantic, + "dimension": 1536, + }) + + // Upsert plugin to semantic mode. PUT creates with enabled:false if + // missing, then the same call's body sets enabled:true + config. + if _, exists := pluginGet(t, lc, 1); exists { + pluginUpdate(t, lc, 2, true, semanticBaseline()) + } else { + pluginCreate(t, lc, 2, true, semanticBaseline()) + } + + allKeys := []string{ + defaultKeySemantic, + "phase2-k1", "phase2-k2", "phase2-k3", "phase2-k4", "phase2-k5", + "phase2-k6", "phase2-k7", "phase2-k8", "phase2-k9", "phase2-k10", "phase2-k10-alt", + "phase2-k11", "phase2-k12", "phase2-k13", "phase2-k14", "phase2-k15", + "phase2-k16", "phase2-k17", "phase2-k18", "phase2-k19", "phase2-k20", + "phase2-k21", "phase2-k22", "phase2-k23", "phase2-k24", "phase2-k25", + "phase2-k26", "phase2-k27", "phase2-k28", "phase2-k29", + "phase2-k31a", "phase2-k32", + "phase2-k33", "phase2-k34", "phase2-k35", "phase2-k36", + "phase2-k37", "phase2-k38", "phase2-k39", + "phase2-k40", "phase2-k41", "phase2-k42", "phase2-k43", + "phase2-k39-seedA", "phase2-k39-seedB", + "phase2-k44", + } + t.Cleanup(func() { + // Surface unexpected cleanup failures so stale entries don't poison + // subsequent runs. 404 is fine — not every key in allKeys gets + // written by every run. + for _, k := range allKeys { + if got := clearByCacheKey(t, lc.at(99), 99, k); got != http.StatusOK && got != http.StatusNotFound { + t.Errorf("cleanup clearByCacheKey(%q): unexpected status %d", k, got) + } + } + }) + + // 2.1 direct_path_still_works — exact-match in semantic mode hits direct first. + t.Run("2.1_direct_path_still_works", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.1_direct_path_still_works") + key := "phase2-k1" + req := simpleChat(cfg.OpenAIModel, "Name one common edible mushroom variety.") + respA := postChat(t, lc, 1, req, cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + waitForCacheWrite(t, lc, 3) + respB := postChat(t, lc, 4, req, cacheHeaders{Key: key}) + idB := assertHit(t, lc, 5, respB, "direct") // direct runs first in semantic mode + assertSameCacheID(t, lc, 6, idB, idA) + }) + + // 2.2 semantic_hit_paraphrase — distinct text but high semantic similarity. + t.Run("2.2_semantic_hit_paraphrase", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.2_semantic_hit_paraphrase") + key := "phase2-k2" + pair := pairByName(t, "capital_france") + + respA := postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: key}) + _ = assertMiss(t, lc, 2, respA) + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, simpleChat(cfg.OpenAIModel, pair.Paraphrase), cacheHeaders{Key: key}) + _ = assertHit(t, lc, 5, respB, "semantic") + cd := respB.cacheDebug() + if cd.Similarity == nil || cd.Threshold == nil || cd.ProviderUsed == nil || cd.ModelUsed == nil || cd.InputTokens == nil { + t.Fatalf("expected similarity/threshold/provider_used/model_used/input_tokens stamped on semantic hit, got %+v", cd) + } + if *cd.Similarity < *cd.Threshold { + t.Fatalf("semantic hit but similarity %.4f < threshold %.4f", *cd.Similarity, *cd.Threshold) + } + }) + + // 2.3 below_threshold_miss — unrelated body misses, but cache_debug still + // stamped with provider_used/input_tokens (semantic search ran). + t.Run("2.3_below_threshold_miss", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.3_below_threshold_miss") + key := "phase2-k3" + pair := pairByName(t, "boiling_water") + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, simpleChat(cfg.OpenAIModel, pair.Unrelated), cacheHeaders{Key: key}) + _ = assertMiss(t, lc, 5, respB) + cd := respB.cacheDebug() + if cd.ProviderUsed == nil || cd.InputTokens == nil { + t.Fatalf("expected provider_used + input_tokens stamped on semantic-search miss, got %+v", cd) + } + }) + + // 2.4 threshold_header_relax — low threshold accepts unrelated as hit. + t.Run("2.4_threshold_header_relax", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.4_threshold_header_relax") + key := "phase2-k4" + pair := pairByName(t, "vinaigrette") + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 3) + + low := 0.1 + respB := postChat(t, lc, 4, simpleChat(cfg.OpenAIModel, pair.Unrelated), cacheHeaders{Key: key, Threshold: &low}) + _ = assertHit(t, lc, 5, respB, "semantic") + }) + + // 2.5 threshold_header_tighten — high threshold rejects a normally-hit paraphrase. + t.Run("2.5_threshold_header_tighten", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.5_threshold_header_tighten") + key := "phase2-k5" + pair := pairByName(t, "opera_composer") + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 3) + + high := 0.999 + respB := postChat(t, lc, 4, simpleChat(cfg.OpenAIModel, pair.Paraphrase), cacheHeaders{Key: key, Threshold: &high}) + _ = assertMiss(t, lc, 5, respB) + }) + + // 2.6 threshold_clamp_above — threshold > 1.0 clamps to 1.0 → miss. + t.Run("2.6_threshold_clamp_above", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.6_threshold_clamp_above") + key := "phase2-k6" + pair := pairByName(t, "photosynthesis") + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 3) + + over := 2.0 // clamps to 1.0 + respB := postChat(t, lc, 4, simpleChat(cfg.OpenAIModel, pair.Paraphrase), cacheHeaders{Key: key, Threshold: &over}) + _ = assertMiss(t, lc, 5, respB) + }) + + // 2.7 threshold_clamp_below — threshold < 0 clamps to 0 → hits anything. + t.Run("2.7_threshold_clamp_below", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.7_threshold_clamp_below") + key := "phase2-k7" + pair := pairByName(t, "capital_france") + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 3) + + under := -1.0 // clamps to 0.0 + respB := postChat(t, lc, 4, simpleChat(cfg.OpenAIModel, pair.Unrelated), cacheHeaders{Key: key, Threshold: &under}) + _ = assertHit(t, lc, 5, respB, "semantic") + }) + + // 2.8 cache_type_direct_in_semantic — x-bf-cache-type=direct on a paraphrase + // suppresses semantic search; B misses despite high similarity. + t.Run("2.8_cache_type_direct_in_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.8_cache_type_direct_in_semantic") + key := "phase2-k8" + pair := pairByName(t, "boiling_water") + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, simpleChat(cfg.OpenAIModel, pair.Paraphrase), cacheHeaders{Key: key, Type: "direct"}) + _ = assertMiss(t, lc, 5, respB) + }) + + // 2.9 cache_type_semantic_only_exact — x-bf-cache-type=semantic on identical + // body still produces a hit, but via the semantic path (direct suppressed). + t.Run("2.9_cache_type_semantic_only_exact", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.9_cache_type_semantic_only_exact") + key := "phase2-k9" + req := simpleChat(cfg.OpenAIModel, "Recommend one short documentary film about science.") + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, req, cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, req, cacheHeaders{Key: key, Type: "semantic"}) + _ = assertHit(t, lc, 5, respB, "semantic") + }) + + // 2.10 cache_key_isolation_semantic — paraphrases under different keys → miss. + t.Run("2.10_cache_key_isolation_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.10_cache_key_isolation_semantic") + pair := pairByName(t, "vinaigrette") + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: "phase2-k10"})) + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, simpleChat(cfg.OpenAIModel, pair.Paraphrase), cacheHeaders{Key: "phase2-k10-alt"}) + _ = assertMiss(t, lc, 5, respB) + }) + + // 2.11 cache_by_model_isolation_semantic — different models, default flag → miss. + t.Run("2.11_cache_by_model_isolation_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.11_cache_by_model_isolation_semantic") + key := "phase2-k11" + pair := pairByName(t, "photosynthesis") + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, simpleChat(cfg.OpenAIModelAlt, pair.Paraphrase), cacheHeaders{Key: key}) + _ = assertMiss(t, lc, 5, respB) + }) + + // 2.12 cache_by_model_false_semantic — flip flag, paraphrase cross-model → hit. + t.Run("2.12_cache_by_model_false_semantic", func(t *testing.T) { + // Serial: mutates plugin config (cache_by_model=false). + lc := newLogCtx("semantic", "2.12_cache_by_model_false_semantic") + + cfg2 := semanticBaseline() + cfg2["cache_by_model"] = false + pluginUpdate(t, lc, 1, true, cfg2) + t.Cleanup(func() { restoreSemanticBaseline(t, lc, 99) }) + + key := "phase2-k12" + pair := pairByName(t, "opera_composer") + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, simpleChat(cfg.OpenAIModelAlt, pair.Paraphrase), cacheHeaders{Key: key}) + _ = assertHit(t, lc, 5, respB, "semantic") + }) + + // 2.13 cross_provider_semantic — both cache_by_* flags off; paraphrase across providers → hit. + t.Run("2.13_cross_provider_semantic", func(t *testing.T) { + // Serial: mutates plugin config (cache_by_provider/model=false). + if cfg.AnthroModel == "" { + t.Skip("anthropic model not configured") + } + lc := newLogCtx("semantic", "2.13_cross_provider_semantic") + + cfg2 := semanticBaseline() + cfg2["cache_by_model"] = false + cfg2["cache_by_provider"] = false + pluginUpdate(t, lc, 1, true, cfg2) + t.Cleanup(func() { restoreSemanticBaseline(t, lc, 99) }) + + key := "phase2-k13" + pair := pairByName(t, "capital_france") + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 3) + + respB := postChat(t, lc, 4, simpleChat(cfg.AnthroModel, pair.Paraphrase), cacheHeaders{Key: key}) + _ = assertHit(t, lc, 5, respB, "semantic") + }) + + // 2.14 streaming_semantic_replay — paraphrase across two SSE streams → B replays. + t.Run("2.14_streaming_semantic_replay", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.14_streaming_semantic_replay") + key := "phase2-k14" + pair := pairByName(t, "boiling_water") + + respA := postChatStream(t, lc, 1, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: key}) + _ = assertMiss(t, lc, 2, respA) + waitForCacheWrite(t, lc, 3) + + respB := postChatStream(t, lc, 4, simpleChat(cfg.OpenAIModel, pair.Paraphrase), cacheHeaders{Key: key}) + _ = assertHit(t, lc, 5, respB, "semantic") + if len(respB.dataChunks()) != len(respA.dataChunks()) { + t.Fatalf("expected B chunk count %d to match A's %d", len(respB.dataChunks()), len(respA.dataChunks())) + } + }) + + // 2.15 semantic_then_direct_same_request — paraphrase stores; exact same body + // hits via direct (faster, embedding-cost fields absent on B). + t.Run("2.15_semantic_then_direct_same_request", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.15_semantic_then_direct_same_request") + key := "phase2-k15" + pair := pairByName(t, "vinaigrette") + + // A: canonical body — stores under direct ID. + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 3) + + // B: same canonical body — direct runs first and hits. + respB := postChat(t, lc, 4, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: key}) + _ = assertHit(t, lc, 5, respB, "direct") + cd := respB.cacheDebug() + if cd.ProviderUsed != nil || cd.ModelUsed != nil || cd.InputTokens != nil { + t.Fatalf("expected provider_used/model_used/input_tokens NIL on direct hit (no embedding generated), got %+v", cd) + } + }) + + // 2.16 clear_cache_id_semantic — populate via semantic, delete by id, retry → miss. + t.Run("2.16_clear_cache_id_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.16_clear_cache_id_semantic") + key := "phase2-k16" + pair := pairByName(t, "opera_composer") + + respA := postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: key}) + idA := assertMiss(t, lc, 2, respA) + waitForCacheWrite(t, lc, 3) + // Confirm paraphrase hits. + _ = assertHit(t, lc, 5, postChat(t, lc, 4, simpleChat(cfg.OpenAIModel, pair.Paraphrase), cacheHeaders{Key: key}), "semantic") + + if got := clearByCacheID(t, lc, 6, idA); got != http.StatusOK { + t.Fatalf("expected 200 from clear-by-id, got %d", got) + } + + // Paraphrase now misses. + _ = assertMiss(t, lc, 8, postChat(t, lc, 7, simpleChat(cfg.OpenAIModel, pair.Paraphrase), cacheHeaders{Key: key})) + }) + + // 2.17 clear_by_key_semantic — populate 2 paraphrases, clear-by-key, all miss. + t.Run("2.17_clear_by_key_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.17_clear_by_key_semantic") + key := "phase2-k17" + pair1 := pairByName(t, "capital_france") + pair2 := pairByName(t, "photosynthesis") + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, pair1.Canonical), cacheHeaders{Key: key})) + _ = assertMiss(t, lc, 4, postChat(t, lc, 3, simpleChat(cfg.OpenAIModel, pair2.Canonical), cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 5) + + if got := clearByCacheKey(t, lc, 6, key); got != http.StatusOK { + t.Fatalf("expected 200, got %d", got) + } + + _ = assertMiss(t, lc, 8, postChat(t, lc, 7, simpleChat(cfg.OpenAIModel, pair1.Paraphrase), cacheHeaders{Key: key})) + _ = assertMiss(t, lc, 10, postChat(t, lc, 9, simpleChat(cfg.OpenAIModel, pair2.Paraphrase), cacheHeaders{Key: key})) + }) + + // 2.18 ttl_expiry_semantic — sleep past TTL, paraphrase misses. + t.Run("2.18_ttl_expiry_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.18_ttl_expiry_semantic") + key := "phase2-k18" + pair := pairByName(t, "boiling_water") + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: key, TTL: "5s"})) + waitForCacheWrite(t, lc, 3) + + // Confirm hit within TTL. + _ = assertHit(t, lc, 5, postChat(t, lc, 4, simpleChat(cfg.OpenAIModel, pair.Paraphrase), cacheHeaders{Key: key, TTL: "5s"}), "semantic") + + wait := 6 * time.Second + logf(t, lc.at(6), "INFO", "sleep_past_ttl", map[string]any{"seconds": wait.Seconds()}) + time.Sleep(wait) + + _ = assertMiss(t, lc, 8, postChat(t, lc, 7, simpleChat(cfg.OpenAIModel, pair.Paraphrase), cacheHeaders{Key: key, TTL: "5s"})) + }) + + // 2.19 ttl_per_request_semantic — distinct shorter TTL applies. + t.Run("2.19_ttl_per_request_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.19_ttl_per_request_semantic") + key := "phase2-k19" + pair := pairByName(t, "vinaigrette") + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: key, TTL: "4s"})) + waitForCacheWrite(t, lc, 3) + _ = assertHit(t, lc, 5, postChat(t, lc, 4, simpleChat(cfg.OpenAIModel, pair.Paraphrase), cacheHeaders{Key: key, TTL: "4s"}), "semantic") + + time.Sleep(5 * time.Second) + _ = assertMiss(t, lc, 7, postChat(t, lc, 6, simpleChat(cfg.OpenAIModel, pair.Paraphrase), cacheHeaders{Key: key, TTL: "4s"})) + }) + + // 2.20 no_store_semantic — A no-store; B paraphrase → miss (nothing stored). + t.Run("2.20_no_store_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.20_no_store_semantic") + key := "phase2-k20" + pair := pairByName(t, "opera_composer") + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: key, NoStore: "true"})) + waitForCacheWrite(t, lc, 3) + + _ = assertMiss(t, lc, 5, postChat(t, lc, 4, simpleChat(cfg.OpenAIModel, pair.Paraphrase), cacheHeaders{Key: key})) + }) + + // 2.21 exclude_system_prompt_semantic — flag flips system out of hash + embedding; + // paraphrase + different systems → semantic hit. + t.Run("2.21_exclude_system_prompt_semantic", func(t *testing.T) { + // Serial: mutates plugin config. + lc := newLogCtx("semantic", "2.21_exclude_system_prompt_semantic") + cfg2 := semanticBaseline() + cfg2["exclude_system_prompt"] = true + pluginUpdate(t, lc, 1, true, cfg2) + t.Cleanup(func() { restoreSemanticBaseline(t, lc, 99) }) + + key := "phase2-k21" + pair := pairByName(t, "capital_france") + userA := pair.Canonical + userB := pair.Paraphrase + + _ = assertMiss(t, lc, 3, postChat(t, lc, 2, chatWithSystem(cfg.OpenAIModel, "You are a geographer.", userA), cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 4) + + _ = assertHit(t, lc, 6, postChat(t, lc, 5, chatWithSystem(cfg.OpenAIModel, "You are a poet.", userB), cacheHeaders{Key: key}), "semantic") + }) + + // 2.22 conversation_threshold_semantic — 4-message conversation skipped entirely. + t.Run("2.22_conversation_threshold_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.22_conversation_threshold_semantic") + key := "phase2-k22" + + msgs := []chatMessage{ + {Role: "user", Content: textContent("Hi.")}, + {Role: "assistant", Content: textContent("Hello! How can I help?")}, + {Role: "user", Content: textContent("Tell me about the boiling point of water.")}, + {Role: "user", Content: textContent("Actually, just give me the temperature in Celsius.")}, + } + req := chatRequest{Model: cfg.OpenAIModel, Messages: msgs} + + assertNoCacheDebug(t, lc, 2, postChat(t, lc, 1, req, cacheHeaders{Key: key})) + assertNoCacheDebug(t, lc, 4, postChat(t, lc, 3, req, cacheHeaders{Key: key})) + }) + + // 2.23 attachments_change_semantic — paraphrase + different image URL → miss + // (attachments part of params_hash, filter excludes). + t.Run("2.23_attachments_change_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.23_attachments_change_semantic") + key := "phase2-k23" + textA := "What's pictured in this image?" + textB := "Describe the contents of this image." + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, chatWithImage(cfg.OpenAIModel, textA, testImageURL1), cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 3) + _ = assertMiss(t, lc, 5, postChat(t, lc, 4, chatWithImage(cfg.OpenAIModel, textB, testImageURL2), cacheHeaders{Key: key})) + }) + + // 2.24 embedding_endpoint_semantic_skip — embedding requests bypass semantic + // search entirely (PreLLMHook semanticEligible check). Exact match hits + // direct; different input misses (no paraphrase match attempt). + t.Run("2.24_embedding_endpoint_semantic_skip", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.24_embedding_endpoint_semantic_skip") + key := "phase2-k24" + req := embeddingRequest{Model: "openai/" + cfg.OpenAIEmbed, Input: "The cat sat on the mat."} + + _ = assertMiss(t, lc, 2, postEmbedding(t, lc, 1, req, cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 3) + _ = assertHit(t, lc, 5, postEmbedding(t, lc, 4, req, cacheHeaders{Key: key}), "direct") + + // Different input — no semantic fallback, just direct miss. + req2 := embeddingRequest{Model: "openai/" + cfg.OpenAIEmbed, Input: "The dog chased the ball."} + _ = assertMiss(t, lc, 7, postEmbedding(t, lc, 6, req2, cacheHeaders{Key: key})) + }) + + // 2.25 image_gen_semantic_paraphrase — image prompts paraphrase across two calls. + t.Run("2.25_image_gen_semantic_paraphrase", func(t *testing.T) { + t.Parallel() + if os.Getenv("SC_SKIP_IMAGE_GEN") == "1" { + t.Skip("SC_SKIP_IMAGE_GEN=1") + } + lc := newLogCtx("semantic", "2.25_image_gen_semantic_paraphrase") + key := "phase2-k25" + pair := imagePairByName(t, "red_apple") + n := 1 + reqA := imageGenRequest{Model: "openai/dall-e-3", Prompt: pair.Canonical, N: &n, Size: "1024x1024"} + reqB := imageGenRequest{Model: "openai/dall-e-3", Prompt: pair.Paraphrase, N: &n, Size: "1024x1024"} + + _ = assertMiss(t, lc, 2, postImageGen(t, lc, 1, reqA, cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 3) + _ = assertHit(t, lc, 5, postImageGen(t, lc, 4, reqB, cacheHeaders{Key: key}), "semantic") + }) + + // 2.26 responses_api_semantic — paraphrase on /v1/responses → semantic hit. + t.Run("2.26_responses_api_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.26_responses_api_semantic") + key := "phase2-k26" + pair := pairByName(t, "photosynthesis") + reqA := responsesRequest{Model: cfg.OpenAIModel, Input: pair.Canonical} + reqB := responsesRequest{Model: cfg.OpenAIModel, Input: pair.Paraphrase} + + _ = assertMiss(t, lc, 2, postResponses(t, lc, 1, reqA, cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 3) + _ = assertHit(t, lc, 5, postResponses(t, lc, 4, reqB, cacheHeaders{Key: key}), "semantic") + }) + + // 2.27 text_completion_semantic — paraphrase on /v1/completions → semantic hit. + t.Run("2.27_text_completion_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.27_text_completion_semantic") + key := "phase2-k27" + maxTok := 40 + reqA := textCompletionRequest{Model: "openai/gpt-3.5-turbo-instruct", Prompt: "Briefly explain how photosynthesis works in green plants.", MaxTokens: &maxTok} + reqB := textCompletionRequest{Model: "openai/gpt-3.5-turbo-instruct", Prompt: "In a few sentences, describe how photosynthesis works in green plants.", MaxTokens: &maxTok} + + _ = assertMiss(t, lc, 2, postTextCompletion(t, lc, 1, reqA, cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 3) + _ = assertHit(t, lc, 5, postTextCompletion(t, lc, 4, reqB, cacheHeaders{Key: key}), "semantic") + }) + + // 2.28 gemini_semantic_hit — chat provider != embedding provider. + t.Run("2.28_gemini_semantic_hit", func(t *testing.T) { + t.Parallel() + if cfg.GeminiModel == "" { + t.Skip("gemini model not configured") + } + lc := newLogCtx("semantic", "2.28_gemini_semantic_hit") + key := "phase2-k28" + pair := pairByName(t, "capital_france") + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, simpleChat(cfg.GeminiModel, pair.Canonical), cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 3) + _ = assertHit(t, lc, 5, postChat(t, lc, 4, simpleChat(cfg.GeminiModel, pair.Paraphrase), cacheHeaders{Key: key}), "semantic") + }) + + // 2.29 params_hash_isolates_semantic — paraphrases with different temperatures → miss. + t.Run("2.29_params_hash_isolates_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.29_params_hash_isolates_semantic") + key := "phase2-k29" + pair := pairByName(t, "boiling_water") + + reqA := simpleChat(cfg.OpenAIModel, pair.Canonical) + t1 := 0.2 + reqA.Temperature = &t1 + reqB := simpleChat(cfg.OpenAIModel, pair.Paraphrase) + t2 := 0.9 + reqB.Temperature = &t2 + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, reqA, cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 3) + _ = assertMiss(t, lc, 5, postChat(t, lc, 4, reqB, cacheHeaders{Key: key})) + }) + + // 2.30 plugin_status_semantic — GET shows status active + semantic config. + t.Run("2.30_plugin_status_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.30_plugin_status_semantic") + p, exists := pluginGet(t, lc, 1) + if !exists { + t.Fatalf("plugin should exist mid-phase") + } + if !p.Enabled || p.Status.Status != "active" { + t.Fatalf("expected enabled+active, got enabled=%v status=%q", p.Enabled, p.Status.Status) + } + gotProvider, _ := p.Config["provider"].(string) + if gotProvider != "openai" { + t.Fatalf("expected provider=openai, got %q", gotProvider) + } + gotDim, _ := p.Config["dimension"].(float64) + if int(gotDim) != 1536 { + t.Fatalf("expected dimension=1536, got %v", p.Config["dimension"]) + } + }) + + // 2.31 namespace_change_isolates — entries scoped to namespace; flipping + // the namespace makes prior entries unreachable, flipping back restores. + t.Run("2.31_namespace_change_isolates", func(t *testing.T) { + // Serial: mutates plugin config (vector_store_namespace). + lc := newLogCtx("semantic", "2.31_namespace_change_isolates") + // Use a known body for direct-cache reproducibility. + body := "What is the boiling point of pure water at standard pressure?" + key := "phase2-k31a" + altNS := cfg.Namespace + "Alt" + // Step 7 will store an entry in altNS. The outer t.Cleanup at the + // suite level iterates allKeys against whatever namespace the plugin + // currently points at — once we restore baseline below, the altNS + // entry becomes unreachable from there. Flip back to altNS, clear, + // then restore baseline. + t.Cleanup(func() { + altCfg := semanticBaseline() + altCfg["vector_store_namespace"] = altNS + pluginUpdate(t, lc, 97, true, altCfg) + _ = clearByCacheKey(t, lc.at(98), 98, key) + restoreSemanticBaseline(t, lc, 99) + }) + + // Phase 2 baseline is namespace=cfg.Namespace. Populate an entry. + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, body), cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 3) + + // Confirm hit under baseline namespace. + _ = assertHit(t, lc, 5, postChat(t, lc, 4, simpleChat(cfg.OpenAIModel, body), cacheHeaders{Key: key}), "direct") + + // Flip to alternate namespace; same body should miss. + cfg2 := semanticBaseline() + cfg2["vector_store_namespace"] = altNS + pluginUpdate(t, lc, 6, true, cfg2) + _ = assertMiss(t, lc, 8, postChat(t, lc, 7, simpleChat(cfg.OpenAIModel, body), cacheHeaders{Key: key})) + + // Flip back to baseline; entry should resurface. + pluginUpdate(t, lc, 9, true, semanticBaseline()) + _ = assertHit(t, lc, 11, postChat(t, lc, 10, simpleChat(cfg.OpenAIModel, body), cacheHeaders{Key: key}), "direct") + }) + + // 2.32 dimension_change_silent_miss — write at dim 1536, switch model to + // dim 3072 same namespace; reads should miss (UI banner warns about this). + // Documents actual behavior — error vs silent miss vs warn. + t.Run("2.32_dimension_change_silent_miss", func(t *testing.T) { + // Serial: mutates plugin config (embedding_model + dimension). + lc := newLogCtx("semantic", "2.32_dimension_change_silent_miss") + t.Cleanup(func() { restoreSemanticBaseline(t, lc, 99) }) + + key := "phase2-k32" + pair := pairByName(t, "opera_composer") + + // Write under dim=1536. + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 3) + + // Switch to text-embedding-3-large (dim 3072) on the SAME namespace. + cfg2 := semanticConfig("openai", "text-embedding-3-large", 3072, ttlSemantic, thresholdSemantic, defaultKeySemantic) + cfg2["vector_store_namespace"] = semanticNamespace() + pluginUpdate(t, lc, 4, true, cfg2) + + // Read paraphrase. Expected: miss (UI warns: "reads will silently miss"). + // If Bifrost errors instead, the test will fail at postChat with status!=200 + // — that surfaces a different actual behavior worth documenting. + respB := postChat(t, lc, 5, simpleChat(cfg.OpenAIModel, pair.Paraphrase), cacheHeaders{Key: key}) + if cd := respB.cacheDebug(); cd != nil && cd.CacheHit { + t.Fatalf("expected miss (UI banner: dim change makes reads silently miss); got hit cache_id=%s", deref(cd.CacheID)) + } + logf(t, lc.at(6), "PASS", "dimension_change_silent_miss_documented", map[string]any{ + "behavior": "miss", + }) + }) + + // 2.33 streaming_tool_calls_replay — paraphrase preserves tool_calls in replay. + t.Run("2.33_streaming_tool_calls_replay", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.33_streaming_tool_calls_replay") + key := "phase2-k33" + toolDef := chatTool{Type: "function", Function: &toolFunction{ + Name: "get_weather", Description: "Get the current weather in a city", + Parameters: map[string]any{"type": "object", "properties": map[string]any{"city": map[string]any{"type": "string"}}, "required": []string{"city"}}, + }} + + reqA := simpleChat(cfg.OpenAIModel, "What's the current weather in Tokyo right now?") + reqA.Tools = []chatTool{toolDef} + reqB := simpleChat(cfg.OpenAIModel, "Tell me the present weather in Tokyo right now.") + reqB.Tools = []chatTool{toolDef} + + respA := postChatStream(t, lc, 1, reqA, cacheHeaders{Key: key}) + _ = assertMiss(t, lc, 2, respA) + waitForCacheWrite(t, lc, 3) + respB := postChatStream(t, lc, 4, reqB, cacheHeaders{Key: key}) + _ = assertHit(t, lc, 5, respB, "semantic") + if len(respB.dataChunks()) != len(respA.dataChunks()) { + t.Fatalf("chunk count mismatch: A=%d B=%d", len(respA.dataChunks()), len(respB.dataChunks())) + } + }) + + // 2.34 tools_order_independent_semantic — paraphrase with reordered tools → hit. + t.Run("2.34_tools_order_independent_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.34_tools_order_independent_semantic") + key := "phase2-k34" + toolA := chatTool{Type: "function", Function: &toolFunction{Name: "get_weather", Parameters: map[string]any{"type": "object", "properties": map[string]any{"city": map[string]any{"type": "string"}}}}} + toolB := chatTool{Type: "function", Function: &toolFunction{Name: "search_web", Parameters: map[string]any{"type": "object", "properties": map[string]any{"query": map[string]any{"type": "string"}}}}} + + reqA := simpleChat(cfg.OpenAIModel, "What is the capital city of France in modern times?") + reqA.Tools = []chatTool{toolA, toolB} + reqB := simpleChat(cfg.OpenAIModel, "Tell me the capital city of France in modern times.") + reqB.Tools = []chatTool{toolB, toolA} + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, reqA, cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 3) + _ = assertHit(t, lc, 5, postChat(t, lc, 4, reqB, cacheHeaders{Key: key}), "semantic") + }) + + // 2.35 tools_function_name_change_semantic — different tool names → miss. + t.Run("2.35_tools_function_name_change_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.35_tools_function_name_change_semantic") + key := "phase2-k35" + mkTool := func(name string) chatTool { + return chatTool{Type: "function", Function: &toolFunction{Name: name, Parameters: map[string]any{"type": "object", "properties": map[string]any{"q": map[string]any{"type": "string"}}}}} + } + reqA := simpleChat(cfg.OpenAIModel, "Briefly explain how photosynthesis works in green plants.") + reqA.Tools = []chatTool{mkTool("search")} + reqB := simpleChat(cfg.OpenAIModel, "In a few sentences, describe how photosynthesis works in green plants.") + reqB.Tools = []chatTool{mkTool("lookup")} + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, reqA, cacheHeaders{Key: key})) + // Wait so reqA's write commits; otherwise reqB misses for trivial + // reasons (empty cache) rather than tool-name isolation. + waitForCacheWrite(t, lc, 3) + _ = assertMiss(t, lc, 5, postChat(t, lc, 4, reqB, cacheHeaders{Key: key})) + }) + + // 2.36 prompt_cache_key_semantic — different prompt_cache_key → miss. + t.Run("2.36_prompt_cache_key_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.36_prompt_cache_key_semantic") + key := "phase2-k36" + pair := pairByName(t, "vinaigrette") + + reqA := simpleChat(cfg.OpenAIModel, pair.Canonical) + pckA := "tenant-X" + reqA.PromptCacheKey = &pckA + reqB := simpleChat(cfg.OpenAIModel, pair.Paraphrase) + pckB := "tenant-Y" + reqB.PromptCacheKey = &pckB + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, reqA, cacheHeaders{Key: key})) + // Wait so reqA's write commits; otherwise reqB misses for trivial + // reasons (empty cache) rather than prompt_cache_key isolation. + waitForCacheWrite(t, lc, 3) + _ = assertMiss(t, lc, 5, postChat(t, lc, 4, reqB, cacheHeaders{Key: key})) + }) + + // 2.37 service_tier_semantic — different service_tier → miss. + t.Run("2.37_service_tier_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.37_service_tier_semantic") + key := "phase2-k37" + pair := pairByName(t, "capital_france") + + reqA := simpleChat(cfg.OpenAIModel, pair.Canonical) + stA := "default" + reqA.ServiceTier = &stA + reqB := simpleChat(cfg.OpenAIModel, pair.Paraphrase) + stB := "auto" + reqB.ServiceTier = &stB + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, reqA, cacheHeaders{Key: key})) + // Wait so reqA's write commits; otherwise reqB misses for trivial + // reasons (empty cache) rather than service_tier isolation. + waitForCacheWrite(t, lc, 3) + _ = assertMiss(t, lc, 5, postChat(t, lc, 4, reqB, cacheHeaders{Key: key})) + }) + + // 2.38 store_flag_semantic — different store flag → miss. + t.Run("2.38_store_flag_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.38_store_flag_semantic") + key := "phase2-k38" + pair := pairByName(t, "boiling_water") + + reqA := simpleChat(cfg.OpenAIModel, pair.Canonical) + storeA := true + reqA.Store = &storeA + reqB := simpleChat(cfg.OpenAIModel, pair.Paraphrase) + storeB := false + reqB.Store = &storeB + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, reqA, cacheHeaders{Key: key})) + // Wait so reqA's write commits; otherwise reqB misses for trivial + // reasons (empty cache) rather than store-flag isolation. + waitForCacheWrite(t, lc, 3) + _ = assertMiss(t, lc, 5, postChat(t, lc, 4, reqB, cacheHeaders{Key: key})) + }) + + // 2.39 responses_previous_response_id_semantic — different previous_response_id → miss. + t.Run("2.39_responses_previous_response_id_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.39_responses_previous_response_id_semantic") + key := "phase2-k39" + + // Seed two response IDs. Distinct cache keys are essential — sharing one + // key would cause the second seed to semantic-hit the first and return + // the SAME response id, defeating the isolation test. + seed1 := postResponses(t, lc, 1, responsesRequest{Model: cfg.OpenAIModel, Input: "Recite the first digit of pi."}, cacheHeaders{Key: "phase2-k39-seedA", NoStore: "true"}) + var s1 struct { + ID string `json:"id"` + } + if err := json.Unmarshal(seed1.bodyRaw, &s1); err != nil || s1.ID == "" { + t.Skipf("could not seed response id: %v", err) + } + seed2 := postResponses(t, lc, 2, responsesRequest{Model: cfg.OpenAIModel, Input: "Name the largest moon of Jupiter."}, cacheHeaders{Key: "phase2-k39-seedB", NoStore: "true"}) + var s2 struct { + ID string `json:"id"` + } + if err := json.Unmarshal(seed2.bodyRaw, &s2); err != nil || s2.ID == "" { + t.Skipf("could not seed second response id: %v", err) + } + if s1.ID == s2.ID { + t.Skipf("seed response ids collided (%s); test prerequisite not met", s1.ID) + } + + reqA := responsesRequest{Model: cfg.OpenAIModel, Input: "Continue from before.", PreviousResponseID: &s1.ID} + reqB := responsesRequest{Model: cfg.OpenAIModel, Input: "Continue from prior.", PreviousResponseID: &s2.ID} + + _ = assertMiss(t, lc, 4, postResponses(t, lc, 3, reqA, cacheHeaders{Key: key})) + // Wait so reqA's write commits; otherwise reqB misses for trivial + // reasons (empty cache) rather than previous_response_id isolation. + waitForCacheWrite(t, lc, 5) + _ = assertMiss(t, lc, 7, postResponses(t, lc, 6, reqB, cacheHeaders{Key: key})) + }) + + // 2.40 no_store_explicit_false_semantic — header value "false" doesn't toggle. + t.Run("2.40_no_store_explicit_false_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.40_no_store_explicit_false_semantic") + key := "phase2-k40" + pair := pairByName(t, "opera_composer") + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: key, NoStore: "false"})) + waitForCacheWrite(t, lc, 3) + _ = assertHit(t, lc, 5, postChat(t, lc, 4, simpleChat(cfg.OpenAIModel, pair.Paraphrase), cacheHeaders{Key: key, NoStore: "false"}), "semantic") + }) + + // 2.41 no_store_uppercase_true_semantic — case-sensitive match; "TRUE" does NOT block. + t.Run("2.41_no_store_uppercase_true_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.41_no_store_uppercase_true_semantic") + key := "phase2-k41" + pair := pairByName(t, "photosynthesis") + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: key, NoStore: "TRUE"})) + waitForCacheWrite(t, lc, 3) + _ = assertHit(t, lc, 5, postChat(t, lc, 4, simpleChat(cfg.OpenAIModel, pair.Paraphrase), cacheHeaders{Key: key, NoStore: "TRUE"}), "semantic") + }) + + // 2.42 streaming_non_final_chunks_no_cache_debug_semantic — only final + // chunk has cache_debug, both on miss (semantic search ran) and hit (semantic replay). + t.Run("2.42_streaming_non_final_chunks_no_cache_debug_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.42_streaming_non_final_chunks_no_cache_debug_semantic") + key := "phase2-k42" + pair := pairByName(t, "vinaigrette") + + check := func(stage string, resp *streamResponse) { + data := resp.dataChunks() + if len(data) == 0 { + t.Fatalf("[%s] no data chunks", stage) + } + for i := 0; i < len(data)-1; i++ { + if cd := data[i].cacheDebug(); cd != nil { + t.Fatalf("[%s] non-final chunk %d had cache_debug: %+v", stage, i, cd) + } + } + if data[len(data)-1].cacheDebug() == nil { + t.Fatalf("[%s] final chunk missing cache_debug", stage) + } + } + + respA := postChatStream(t, lc, 1, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: key}) + _ = assertMiss(t, lc, 2, respA) + check("miss-with-semantic-search", respA) + waitForCacheWrite(t, lc, 3) + respB := postChatStream(t, lc, 4, simpleChat(cfg.OpenAIModel, pair.Paraphrase), cacheHeaders{Key: key}) + _ = assertHit(t, lc, 5, respB, "semantic") + check("hit-semantic", respB) + }) + + // 2.43 ttl_zero_per_request_semantic — TTL=0s falls back to default; B paraphrase hits. + t.Run("2.43_ttl_zero_per_request_semantic", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.43_ttl_zero_per_request_semantic") + key := "phase2-k43" + pair := pairByName(t, "capital_france") + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: key, TTL: "0s"})) + waitForCacheWrite(t, lc, 3) + _ = assertHit(t, lc, 5, postChat(t, lc, 4, simpleChat(cfg.OpenAIModel, pair.Paraphrase), cacheHeaders{Key: key, TTL: "0s"}), "semantic") + }) + + // 2.44 cache_debug_in_logs_endpoint — cross-check persisted log row's + // cache_debug column against the in-flight semantic hit. In semantic mode + // cache_debug carries the richest field set (provider_used, model_used, + // input_tokens, threshold, similarity), making this a high-value drift + // check. + t.Run("2.44_cache_debug_in_logs_endpoint", func(t *testing.T) { + t.Parallel() + lc := newLogCtx("semantic", "2.44_cache_debug_in_logs_endpoint") + key := "phase2-k44" + pair := pairByName(t, "vinaigrette") + + _ = assertMiss(t, lc, 2, postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, pair.Canonical), cacheHeaders{Key: key})) + waitForCacheWrite(t, lc, 3) + respB := postChat(t, lc, 4, simpleChat(cfg.OpenAIModel, pair.Paraphrase), cacheHeaders{Key: key}) + respCD := assertHitAndReturnCacheDebug(t, lc, 5, respB, "semantic") + + entry := findLogByCacheDebug(t, lc, 6, respCD) + assertLogMatchesResponseCacheDebug(t, lc, 7, respCD, entry.CacheDebug) + }) + + logf(t, newLogCtx("semantic", "teardown").at(99), "TEARDOWN", "phase_end", nil) +} From 335be6a063fc8f63507fe9458c324f6b76be42e3 Mon Sep 17 00:00:00 2001 From: Pratham Mishra <99235987+Pratham-Mishra04@users.noreply.github.com> Date: Wed, 13 May 2026 01:16:42 +0530 Subject: [PATCH 17/81] test: add semantic cache plugin lifecycle tests (#3428) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Adds an end-to-end lifecycle test for the semantic cache plugin, covering the full disable → re-enable → delete → recreate flow and asserting that namespace data persists across each state transition. ## Changes - Introduces `TestLifecycle` in `tests/semanticcache/lifecycle_test.go`, which runs 10 serial subtests (3.1–3.10) exercising the plugin's lifecycle state machine: - **3.1** – Disabling the plugin via PUT sets `enabled=false` and `status=disabled` - **3.2** – Requests while disabled bypass the cache pipeline entirely (no `cache_debug` header) - **3.3 / 3.4** – Cache-clear endpoints (`/api/cache/clear/{id}` and `/api/cache/clear-by-key/{k}`) return HTTP 400 when the plugin is not loaded - **3.5** – Re-enabling via PUT restores `enabled=true` and `status=active` - **3.6** – Entries written before disable are still queryable after re-enable - **3.7** – DELETE removes both the DB row and the in-memory plugin instance - **3.8** – Requests after delete bypass the cache pipeline (no `cache_debug` header) - **3.9** – Recreating the plugin with the same config succeeds and surfaces `status=active` - **3.10** – Entries written before delete are still queryable after recreate, validating the namespace-persistence contract introduced by the removal of `CleanUpOnShutdown` - Tests are intentionally serial (no `t.Parallel()`) because each subtest mutates globally shared plugin lifecycle state - A `t.Cleanup` handler performs best-effort key clearing regardless of which lifecycle state the plugin is left in at teardown ## Type of change - [ ] Bug fix - [ ] Feature - [ ] Refactor - [ ] Documentation - [x] Chore/CI ## Affected areas - [ ] Core (Go) - [ ] Transports (HTTP) - [ ] Providers/Integrations - [x] Plugins - [ ] UI (React) - [ ] Docs ## How to test ```sh go test ./tests/semanticcache/... -run TestLifecycle -v ``` Expected outcome: all 10 subtests (3.1–3.10) pass, with structured log output at each step confirming correct status transitions and cache hit/miss behaviour. ## Breaking changes - [x] No ## Related issues ## Security considerations None. Tests run against a local Bifrost instance and do not introduce new auth paths, secrets handling, or PII exposure. ## Checklist - [ ] I read `docs/contributing/README.md` and followed the guidelines - [x] I added/updated tests where appropriate - [ ] I updated documentation where needed - [x] I verified builds succeed (Go and UI) - [ ] I verified the CI pipeline passes locally if applicable --- tests/semanticcache/lifecycle_test.go | 187 ++++++++++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 tests/semanticcache/lifecycle_test.go diff --git a/tests/semanticcache/lifecycle_test.go b/tests/semanticcache/lifecycle_test.go new file mode 100644 index 0000000000..b00ccac8df --- /dev/null +++ b/tests/semanticcache/lifecycle_test.go @@ -0,0 +1,187 @@ +package semanticcache + +import ( + "net/http" + "testing" +) + +const ( + ttlLifecycle = "30s" + defaultKeyLifecycle = "phase3-default" +) + +// TestLifecycle exercises plugin disable / enable / delete lifecycle. +// +// Unlike TestDirect / TestSemantic, every subtest runs SERIALLY by design — +// each case mutates plugin lifecycle state (enabled flag, existence) which +// is fundamentally global and not parallelizable. No `t.Parallel()` calls +// in this file. +// +// Test flow (linear timeline): +// +// Setup → seed entry under direct-only plugin +// 3.1 → PUT {enabled:false} +// 3.2 → request after disable, no cache_debug stamped +// 3.3 → DELETE /api/cache/clear/{id} → expect 400 +// 3.4 → DELETE /api/cache/clear-by-key/{k} → expect 400 +// 3.5 → PUT {enabled:true} +// 3.6 → seed entry STILL hits (disable preserves namespace data) +// 3.7 → DELETE /api/plugins/semantic_cache +// 3.8 → request after delete, no cache_debug +// 3.9 → POST /api/plugins to recreate (same namespace) +// 3.10 → seed entry STILL hits (delete+recreate preserves namespace data — +// contract from commit a7c611e2e removing CleanUpOnShutdown) +func TestLifecycle(t *testing.T) { + lc := newLogCtx("lifecycle", "setup") + logf(t, lc.at(0), "SETUP", "phase_start", map[string]any{ + "mode": "direct-only", + "ttl": ttlLifecycle, + }) + + // Clean state — Phase 2 may have left a plugin in semantic mode; tear it + // down so we can create from scratch in direct-only. + if _, exists := pluginGet(t, lc, 1); exists { + pluginDelete(t, lc, 2) + } + + // Create plugin in direct-only mode. + created := pluginCreate(t, lc, 3, true, directOnlyConfig(ttlLifecycle, defaultKeyLifecycle)) + if !created.Enabled || created.Status.Status != "active" { + t.Fatalf("setup: expected enabled+active, got enabled=%v status=%q", + created.Enabled, created.Status.Status) + } + + // Populate the seed entry. We'll reference seedCacheID and seedReq across + // disable / re-enable / delete / recreate to assert namespace persistence. + seedKey := "phase3-seed" + seedReq := simpleChat(cfg.OpenAIModel, "Name the largest planet in our solar system.") + respA := postChat(t, lc, 4, seedReq, cacheHeaders{Key: seedKey}) + seedCacheID := assertMiss(t, lc, 5, respA) + waitForCacheWrite(t, lc, 6) + // Confirm the seed entry is queryable before we start disrupting state. + _ = assertHit(t, lc, 8, postChat(t, lc, 7, seedReq, cacheHeaders{Key: seedKey}), "direct") + logf(t, lc.at(9), "SETUP", "seed_entry_ready", map[string]any{"cache_id": seedCacheID}) + + allKeys := []string{seedKey, "phase3-k2", "phase3-k8"} + teardownLc := newLogCtx("lifecycle", "teardown") + t.Cleanup(func() { + // Best-effort: clear keys if the plugin is loaded at teardown time. + // If a case left it disabled/deleted, the 400 is informational. + for _, k := range allKeys { + _ = clearByCacheKey(t, teardownLc.at(99), 99, k) + } + }) + + // 3.1 disable_via_update — PUT {enabled:false, config:}. + // Per UI wire parity (PLAN §3.5), we re-send the current config along + // with enabled=false — never PUT bare {enabled:false} which would wipe + // the saved config blob. + t.Run("3.1_disable_via_update", func(t *testing.T) { + lc := newLogCtx("lifecycle", "3.1_disable_via_update") + updated := pluginUpdate(t, lc, 1, false, directOnlyConfig(ttlLifecycle, defaultKeyLifecycle)) + if updated.Enabled { + t.Fatalf("expected enabled=false in update response, got true") + } + // Confirm via GET that the disabled state is reflected. + p, exists := pluginGet(t, lc, 2) + if !exists { + t.Fatalf("plugin row should persist after disable (only memory unloaded)") + } + if p.Enabled { + t.Fatalf("GET expected enabled=false, got true") + } + if p.Status.Status != "disabled" { + t.Fatalf("expected status=disabled, got %q", p.Status.Status) + } + }) + + // 3.2 request_after_disable_no_cache_debug — plugin removed from + // in-memory pipeline; PreLLMHook never runs; no cache_debug stamped. + t.Run("3.2_request_after_disable_no_cache_debug", func(t *testing.T) { + lc := newLogCtx("lifecycle", "3.2_request_after_disable_no_cache_debug") + resp := postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, "What's 2+2?"), cacheHeaders{Key: "phase3-k2"}) + assertNoCacheDebug(t, lc, 2, resp) + }) + + // 3.3 clear_endpoints_when_plugin_disabled — the cache-clear handler must + // return HTTP 400 with "plugin is not loaded" when the resolver returns + // nil. Pre-fix this returned 405; bug surfaced + fixed earlier this run. + t.Run("3.3_clear_endpoints_when_plugin_disabled", func(t *testing.T) { + lc := newLogCtx("lifecycle", "3.3_clear_endpoints_when_plugin_disabled") + status := clearByCacheID(t, lc, 1, "00000000-0000-0000-0000-000000000000") + if status != http.StatusBadRequest { + t.Fatalf("expected 400 (plugin not loaded), got %d", status) + } + }) + + // 3.4 clear_by_key_endpoints_when_disabled — same contract for clear-by-key. + t.Run("3.4_clear_by_key_endpoints_when_disabled", func(t *testing.T) { + lc := newLogCtx("lifecycle", "3.4_clear_by_key_endpoints_when_disabled") + status := clearByCacheKey(t, lc, 1, "phase3-disabled-test") + if status != http.StatusBadRequest { + t.Fatalf("expected 400 (plugin not loaded), got %d", status) + } + }) + + // 3.5 re_enable_via_update — flip back to enabled; status flips to active. + t.Run("3.5_re_enable_via_update", func(t *testing.T) { + lc := newLogCtx("lifecycle", "3.5_re_enable_via_update") + updated := pluginUpdate(t, lc, 1, true, directOnlyConfig(ttlLifecycle, defaultKeyLifecycle)) + if !updated.Enabled { + t.Fatalf("expected enabled=true after re-enable, got false") + } + if updated.Status.Status != "active" { + t.Fatalf("expected status=active after re-enable, got %q", updated.Status.Status) + } + }) + + // 3.6 replay_previous_entries_after_reenable — entries written before + // disable must still be queryable. Namespace data is independent of + // plugin in-memory lifecycle. + t.Run("3.6_replay_previous_entries_after_reenable", func(t *testing.T) { + lc := newLogCtx("lifecycle", "3.6_replay_previous_entries_after_reenable") + resp := postChat(t, lc, 1, seedReq, cacheHeaders{Key: seedKey}) + gotID := assertHit(t, lc, 2, resp, "direct") + assertSameCacheID(t, lc, 3, gotID, seedCacheID) + }) + + // 3.7 delete_plugin — DELETE removes both DB row and in-memory plugin. + t.Run("3.7_delete_plugin", func(t *testing.T) { + lc := newLogCtx("lifecycle", "3.7_delete_plugin") + pluginDelete(t, lc, 1) + if _, exists := pluginGet(t, lc, 2); exists { + t.Fatalf("plugin should be 404 after delete") + } + }) + + // 3.8 request_after_delete — no plugin instance, no cache_debug. + t.Run("3.8_request_after_delete", func(t *testing.T) { + lc := newLogCtx("lifecycle", "3.8_request_after_delete") + resp := postChat(t, lc, 1, simpleChat(cfg.OpenAIModel, "What's 3+3?"), cacheHeaders{Key: "phase3-k8"}) + assertNoCacheDebug(t, lc, 2, resp) + }) + + // 3.9 re_create_clean — POST with the SAME config (and therefore the + // same namespace). Recreate must succeed and surface status=active. + t.Run("3.9_re_create_clean", func(t *testing.T) { + lc := newLogCtx("lifecycle", "3.9_re_create_clean") + created := pluginCreate(t, lc, 1, true, directOnlyConfig(ttlLifecycle, defaultKeyLifecycle)) + if !created.Enabled || created.Status.Status != "active" { + t.Fatalf("recreate: expected enabled+active, got enabled=%v status=%q", + created.Enabled, created.Status.Status) + } + }) + + // 3.10 namespace_persists_across_delete_recreate — the contract that + // commit a7c611e2e (removing CleanUpOnShutdown) enabled: entries written + // under a namespace must survive plugin delete + recreate. Without this, + // any production restart of Bifrost would wipe the cache. + t.Run("3.10_namespace_persists_across_delete_recreate", func(t *testing.T) { + lc := newLogCtx("lifecycle", "3.10_namespace_persists_across_delete_recreate") + resp := postChat(t, lc, 1, seedReq, cacheHeaders{Key: seedKey}) + gotID := assertHit(t, lc, 2, resp, "direct") + assertSameCacheID(t, lc, 3, gotID, seedCacheID) + }) + + logf(t, teardownLc.at(99), "TEARDOWN", "phase_end", nil) +} From f9cfe366205be5d7c22571881424271b2b0bd222 Mon Sep 17 00:00:00 2001 From: Pratham Mishra <99235987+Pratham-Mishra04@users.noreply.github.com> Date: Wed, 13 May 2026 01:17:40 +0530 Subject: [PATCH 18/81] feat: add `test-semantic-cache` and `test-semantic-cache-complete` Makefile targets (#3429) ## Summary Adds Makefile targets for running `semantic_cache` plugin unit tests and end-to-end tests, with optional integration of the `trail` CLI for capture-based debugging sessions. ## Changes - Added `test-semantic-cache` target that runs e2e tests from `tests/semanticcache`, supporting a `CACHE_TYPE` variable (`direct` or `semantic`) to filter which test phases are executed. Automatically wraps the run in `trail run` if the `trail` binary is available on `PATH`. - Added `test-semantic-cache-complete` target that runs both the plugin unit tests (`plugins/semanticcache`) and the e2e tests in sequence, optionally wrapping the entire session in a single `trail run` invocation. - Added `_test-semantic-cache-complete-inner` as an internal helper target that performs the actual sequential execution of unit and e2e tests with formatted output banners. - Registered all three new targets in the `.PHONY` declaration. ## Type of change - [ ] Bug fix - [ ] Feature - [ ] Refactor - [ ] Documentation - [x] Chore/CI ## Affected areas - [ ] Core (Go) - [ ] Transports (HTTP) - [ ] Providers/Integrations - [x] Plugins - [ ] UI (React) - [ ] Docs ## How to test ```sh # Run all semantic_cache e2e tests make test-semantic-cache # Run only direct cache tests CACHE_TYPE=direct make test-semantic-cache # Run only semantic cache tests CACHE_TYPE=semantic make test-semantic-cache # Run both unit and e2e tests together make test-semantic-cache-complete # Force e2e run regardless of preconditions RUN_FORCE=1 make test-semantic-cache-complete ``` If `trail` is installed and on `PATH`, all commands will automatically wrap execution in a `trail run` session for capture-based debugging. ## Breaking changes - [ ] Yes - [x] No ## Related issues ## Security considerations None. ## Checklist - [ ] I read `docs/contributing/README.md` and followed the guidelines - [ ] I added/updated tests where appropriate - [ ] I updated documentation where needed - [ ] I verified builds succeed (Go and UI) - [ ] I verified the CI pipeline passes locally if applicable --- Makefile | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 950dad247b..b1acabbb7d 100644 --- a/Makefile +++ b/Makefile @@ -66,7 +66,7 @@ define EXPOSE_ENV fi endef -.PHONY: all help dev dev-pulse build-ui build build-cli run run-cli install-air install-pulse clean test test-cli install-ui setup-workspace work-init work-clean docs docker-image docker-run cleanup-enterprise mod-tidy test-integrations-py test-integrations-ts install-playwright run-e2e run-e2e-ui run-e2e-headed format ui install-newman run-provider-harness-test run-cli-harness-test +.PHONY: all help dev dev-pulse build-ui build build-cli run run-cli install-air install-pulse clean test test-cli install-ui setup-workspace work-init work-clean docs docker-image docker-run cleanup-enterprise mod-tidy test-integrations-py test-integrations-ts install-playwright run-e2e run-e2e-ui run-e2e-headed format ui install-newman run-provider-harness-test run-cli-harness-test test-semantic-cache test-semantic-cache-complete _test-semantic-cache-complete-inner all: help @@ -1077,6 +1077,71 @@ test-all: test-core test-framework test-plugins test-http-transport test test-cl $(ECHO) ""; \ fi +test-semantic-cache: ## Run semantic_cache e2e tests (Usage: [CACHE_TYPE=direct|semantic] [RUN_FORCE=0] make test-semantic-cache). RUN_FORCE defaults to 1. Auto-detects trail CLI and wraps the run when present. + @cd tests/semanticcache && \ + case "$$CACHE_TYPE" in \ + direct) \ + filter='^(TestPreconditions|TestDirect|TestLifecycle)$$'; \ + $(ECHO) "$(CYAN)CACHE_TYPE=direct → running preconditions + direct + lifecycle$(NC)"; \ + ;; \ + semantic) \ + filter='^(TestPreconditions|TestParaphraseFixtures|TestSemantic|TestLifecycle)$$'; \ + $(ECHO) "$(CYAN)CACHE_TYPE=semantic → running preconditions + fixtures + semantic + lifecycle$(NC)"; \ + ;; \ + '') \ + filter=''; \ + $(ECHO) "$(CYAN)CACHE_TYPE unset → running all phases$(NC)"; \ + ;; \ + *) \ + $(ECHO) "$(RED)CACHE_TYPE=$$CACHE_TYPE invalid; expected 'direct', 'semantic', or unset$(NC)"; \ + exit 1; \ + ;; \ + esac; \ + if command -v trail >/dev/null 2>&1; then \ + $(ECHO) "$(GREEN)trail detected — wrapping run in 'trail run' (session id will be printed by trail)$(NC)"; \ + if [ -n "$$filter" ]; then \ + exec trail run -- env RUN_FORCE=$${RUN_FORCE:-1} GOWORK=off go test -v -run "$$filter" ./...; \ + else \ + exec trail run -- env RUN_FORCE=$${RUN_FORCE:-1} GOWORK=off go test -v ./...; \ + fi; \ + else \ + $(ECHO) "$(YELLOW)trail not on PATH — falling back to direct go test (install 'trail' for capture-based debugging)$(NC)"; \ + if [ -n "$$filter" ]; then \ + exec env RUN_FORCE=$${RUN_FORCE:-1} GOWORK=off go test -v -run "$$filter" ./...; \ + else \ + exec env RUN_FORCE=$${RUN_FORCE:-1} GOWORK=off go test -v ./...; \ + fi; \ + fi + +test-semantic-cache-complete: ## Run BOTH plugin unit tests + e2e tests for semantic_cache. RUN_FORCE defaults to 1. Wraps everything in trail if available. + @if command -v trail >/dev/null 2>&1; then \ + $(ECHO) "$(GREEN)trail detected — wrapping unit + e2e tests in a single trail session (id printed by trail)$(NC)"; \ + exec trail run -- $(MAKE) _test-semantic-cache-complete-inner; \ + else \ + $(ECHO) "$(YELLOW)trail not on PATH — running tests directly (install 'trail' for capture-based debugging)$(NC)"; \ + $(MAKE) _test-semantic-cache-complete-inner; \ + fi + +_test-semantic-cache-complete-inner: + @$(ECHO) "" + @$(ECHO) "$(CYAN)═══════════════════════════════════════════════════════════$(NC)" + @$(ECHO) "$(CYAN) Running semantic_cache plugin UNIT tests $(NC)" + @$(ECHO) "$(CYAN)═══════════════════════════════════════════════════════════$(NC)" + @cd plugins/semanticcache && go test -v ./... + @$(ECHO) "" + @$(ECHO) "$(GREEN)═══════════════════════════════════════════════════════════$(NC)" + @$(ECHO) "$(GREEN) Unit tests completed $(NC)" + @$(ECHO) "$(GREEN)═══════════════════════════════════════════════════════════$(NC)" + @$(ECHO) "" + @$(ECHO) "$(CYAN)═══════════════════════════════════════════════════════════$(NC)" + @$(ECHO) "$(CYAN) Running semantic_cache E2E tests $(NC)" + @$(ECHO) "$(CYAN)═══════════════════════════════════════════════════════════$(NC)" + @cd tests/semanticcache && RUN_FORCE=$${RUN_FORCE:-1} GOWORK=off go test -v ./... + @$(ECHO) "" + @$(ECHO) "$(GREEN)═══════════════════════════════════════════════════════════$(NC)" + @$(ECHO) "$(GREEN) E2E tests completed $(NC)" + @$(ECHO) "$(GREEN)═══════════════════════════════════════════════════════════$(NC)" + test-chatbot: ## Run interactive chatbot integration test (Usage: RUN_CHATBOT_TEST=1 make test-chatbot) @$(EXPOSE_ENV); \ $(ECHO) "$(GREEN)Running interactive chatbot integration test...$(NC)"; \ From 06eb28942a4e013402a8f3958483f194f1f7dfe6 Mon Sep 17 00:00:00 2001 From: Akshay Deo Date: Wed, 13 May 2026 16:34:53 +0530 Subject: [PATCH 19/81] harness improvements (#3457) --- Makefile | 62 ++- tests/e2e/api/HARNESS_COVERAGE_BACKLOG.md | 30 +- .../e2e/api/collections/provider-harness.json | 390 +++++++++++++++++- tests/e2e/api/runners/filter-collection.mjs | 64 ++- 4 files changed, 501 insertions(+), 45 deletions(-) diff --git a/Makefile b/Makefile index b1acabbb7d..04a43b3ee4 100644 --- a/Makefile +++ b/Makefile @@ -153,6 +153,20 @@ install-junit-viewer: ## Install junit-viewer for HTML report generation (if not dev: install-ui install-air setup-workspace $(if $(DEBUG),install-delve) ## Start complete development environment (UI + API with proxy) @$(EXPOSE_ENV); \ + set -m; \ + cleanup() { \ + trap - EXIT INT TERM HUP; \ + kill %1 %2 2>/dev/null || true; \ + sleep 1; \ + kill -KILL %1 %2 2>/dev/null || true; \ + wait 2>/dev/null || true; \ + }; \ + stop_dev() { \ + cleanup; \ + exit 130; \ + }; \ + trap cleanup EXIT; \ + trap stop_dev INT TERM HUP; \ $(ECHO) "$(GREEN)Starting Bifrost complete development environment...$(NC)"; \ $(ECHO) "$(YELLOW)This will start:$(NC)"; \ $(ECHO) " 1. UI development server (localhost:3000)"; \ @@ -186,7 +200,7 @@ dev: install-ui install-air setup-workspace $(if $(DEBUG),install-delve) ## Star -log-style "$(LOG_STYLE)" \ -log-level "$(LOG_LEVEL)" \ $(if $(PROMETHEUS_LABELS),-prometheus-labels "$(PROMETHEUS_LABELS)") \ - $(if $(APP_DIR),-app-dir "$(abspath $(APP_DIR))"); \ + $(if $(APP_DIR),-app-dir "$(abspath $(APP_DIR))") & \ else \ cd transports/bifrost-http && BIFROST_UI_DEV=true air -c .air.toml -- \ -host "$(HOST)" \ @@ -194,11 +208,28 @@ dev: install-ui install-air setup-workspace $(if $(DEBUG),install-delve) ## Star -log-style "$(LOG_STYLE)" \ -log-level "$(LOG_LEVEL)" \ $(if $(PROMETHEUS_LABELS),-prometheus-labels "$(PROMETHEUS_LABELS)") \ - $(if $(APP_DIR),-app-dir "$(abspath $(APP_DIR))"); \ - fi + $(if $(APP_DIR),-app-dir "$(abspath $(APP_DIR))") & \ + fi; \ + while [ "$$(jobs -r | wc -l | tr -d ' ')" -eq 2 ]; do sleep 1; done; \ + cleanup; \ + exit 1 dev-pulse: install-ui install-pulse setup-workspace $(if $(DEBUG),install-delve) ## Start complete development environment using pulse for hot reloading @$(EXPOSE_ENV); \ + set -m; \ + cleanup() { \ + trap - EXIT INT TERM HUP; \ + kill %1 %2 2>/dev/null || true; \ + sleep 1; \ + kill -KILL %1 %2 2>/dev/null || true; \ + wait 2>/dev/null || true; \ + }; \ + stop_dev() { \ + cleanup; \ + exit 130; \ + }; \ + trap cleanup EXIT; \ + trap stop_dev INT TERM HUP; \ $(ECHO) "$(GREEN)Starting Bifrost complete development environment (pulse)...$(NC)"; \ $(ECHO) "$(YELLOW)This will start:$(NC)"; \ $(ECHO) " 1. UI development server (localhost:3000)"; \ @@ -232,7 +263,7 @@ dev-pulse: install-ui install-pulse setup-workspace $(if $(DEBUG),install-delve) -log-style "$(LOG_STYLE)" \ -log-level "$(LOG_LEVEL)" \ $(if $(PROMETHEUS_LABELS),-prometheus-labels "$(PROMETHEUS_LABELS)") \ - $(if $(APP_DIR),-app-dir "$(abspath $(APP_DIR))"); \ + $(if $(APP_DIR),-app-dir "$(abspath $(APP_DIR))") & \ else \ PORT="$(PORT)" BIFROST_UI_DEV=true pulse -- \ -host "$(HOST)" \ @@ -240,8 +271,11 @@ dev-pulse: install-ui install-pulse setup-workspace $(if $(DEBUG),install-delve) -log-style "$(LOG_STYLE)" \ -log-level "$(LOG_LEVEL)" \ $(if $(PROMETHEUS_LABELS),-prometheus-labels "$(PROMETHEUS_LABELS)") \ - $(if $(APP_DIR),-app-dir "$(abspath $(APP_DIR))"); \ - fi + $(if $(APP_DIR),-app-dir "$(abspath $(APP_DIR))") & \ + fi; \ + while [ "$$(jobs -r | wc -l | tr -d ' ')" -eq 2 ]; do sleep 1; done; \ + cleanup; \ + exit 1 build-ui: install-ui ## Build ui @$(ECHO) "$(GREEN)Building ui...$(NC)" @@ -1602,7 +1636,11 @@ install-newman: ## Install newman + htmlextra reporter if not already installed @$(USE_NODE); npm list -g newman-reporter-htmlextra > /dev/null 2>&1 || ($(ECHO) "$(YELLOW)Installing newman-reporter-htmlextra...$(NC)" && npm install -g newman-reporter-htmlextra) @$(ECHO) "$(GREEN)Newman + htmlextra are ready$(NC)" +<<<<<<< HEAD run-provider-harness-test: $(if $(HELP),,install-newman) ## Run the Bifrost provider-harness Postman collection. HELP=1 prints full parameter docs. Per-provider parallelism is ON by default (~3-4× speedup); set PARALLEL=0 for sequential. Filter via PROVIDER=openai|anthropic|bedrock|gemini|vertex|azure|passthrough, FEATURE="" (matches request name/body), RERUN_FAILED=1 (re-run only items that failed last run). INCLUDE_PREVIEW=1 to run [PREVIEW]-tagged account/region-scoped cases. INCLUDE_SKIP=1 to run [SKIP]-tagged criss-cross cells for known-unsupported provider+modality pairs. USE_INFISICAL=1 to source from Infisical (Usage: make run-provider-harness-test [HELP=1] [PARALLEL=0] [PROVIDER=anthropic] [FEATURE="web search"] [RERUN_FAILED=1] [INCLUDE_PREVIEW=1] [INCLUDE_SKIP=1] [BASE_URL=...] [FOLDER="..."] [ENV_FILE=...] [VIEWER_PORT=8090] [CI=1]) +======= +run-provider-harness-test: $(if $(HELP),,install-newman) ## Run the Bifrost provider-harness Postman collection. HELP=1 prints full parameter docs. Filter via PROVIDER=openai|anthropic|bedrock|gemini|vertex|azure|passthrough, FEATURE="" or FEATURE="," (AND across substrings; matches request name/URL/body), RERUN_FAILED=1 (re-run only items that failed last run). INCLUDE_PREVIEW=1 to run [PREVIEW]-tagged account/region-scoped cases. USE_INFISICAL=1 to source from Infisical (Usage: make run-provider-harness-test [HELP=1] [PROVIDER=anthropic] [FEATURE="web search"] [FEATURE="cross-cut,structured output"] [RERUN_FAILED=1] [INCLUDE_PREVIEW=1] [BASE_URL=...] [FOLDER="..."] [ENV_FILE=...] [VIEWER_PORT=8090] [CI=1]) +>>>>>>> f09185ec1 (harness improvements) @if [ -n "$(HELP)" ]; then \ printf '\n%s\n' "$(CYAN)run-provider-harness-test - Bifrost provider harness runner$(NC)"; \ printf '%s\n\n' "Runs the Bifrost provider-harness Postman collection through newman, with optional filtering."; \ @@ -1611,8 +1649,9 @@ run-provider-harness-test: $(if $(HELP),,install-newman) ## Run the Bifrost prov printf ' %-18s %s\n' "HELP=1" "Print this help and exit (no Bifrost or network activity)."; \ printf ' %-18s %s\n' "PROVIDER=" "Filter requests by provider. One of: openai, anthropic, bedrock, gemini, vertex, azure, passthrough."; \ printf ' %-18s %s\n' "" " Matches via PROVIDER_KEYWORDS in tests/e2e/api/runners/filter-collection.mjs (loose name/body substring)."; \ - printf ' %-18s %s\n' "FEATURE=\"\"" "Filter by case-insensitive keyword against the full request JSON (name + URL + body)."; \ - printf ' %-18s %s\n' "" " Examples: FEATURE=\"web search\", FEATURE=\"streaming\", FEATURE=\"prompt caching\"."; \ + printf ' %-18s %s\n' "FEATURE=\"\"" "Filter by case-insensitive keyword(s) against the full request JSON (name + URL + body + ancestor folder names)."; \ + printf ' %-18s %s\n' "" " Single: FEATURE=\"web search\". Multi-keyword AND (comma-separated): FEATURE=\"cross-cut,structured output\"."; \ + printf ' %-18s %s\n' "" " \"cross-cut\" is a structural keyword - matches any row routed through unified /v1/chat/completions with a provider/model body, regardless of name."; \ printf ' %-18s %s\n' "RERUN_FAILED=1" "Re-run only requests that failed in the prior run (reads tmp/newman-report.json)."; \ printf ' %-18s %s\n' "" " Composes with PROVIDER and FEATURE (predicates AND together)."; \ printf ' %-18s %s\n' "BASE_URL=" "Bifrost gateway URL (default: http://localhost:8080). Skips auto-start if /health responds."; \ @@ -1627,6 +1666,7 @@ run-provider-harness-test: $(if $(HELP),,install-newman) ## Run the Bifrost prov printf ' %-18s %s\n' "USE_INFISICAL=1" "Source secrets from Infisical CLI ('infisical export --path /local --format dotenv') instead of .env."; \ printf '\n%s\n' "$(YELLOW)EXAMPLES$(NC)"; \ printf ' %s\n' "make run-provider-harness-test HELP=1"; \ +<<<<<<< HEAD printf ' %s\n' "make run-provider-harness-test # full sweep, 6 providers concurrently (default ~3-4× speedup)"; \ printf ' %s\n' "make run-provider-harness-test PARALLEL=0 # sequential mode (ordered output, htmlextra report)"; \ printf ' %s\n' "make run-provider-harness-test FOLDER=\"8. Criss-Cross\" # criss-cross matrix only (endpoint × provider × modality)"; \ @@ -1634,6 +1674,12 @@ run-provider-harness-test: $(if $(HELP),,install-newman) ## Run the Bifrost prov printf ' %s\n' "make run-provider-harness-test PROVIDER=bedrock # bedrock-only (includes bedrock-model cells across §8)"; \ printf ' %s\n' "make run-provider-harness-test FEATURE=\"web search\" # all providers, web-search entries"; \ printf ' %s\n' "make run-provider-harness-test INCLUDE_SKIP=1 # also run [SKIP] cells (capability-gap matrix)"; \ +======= + printf ' %s\n' "make run-provider-harness-test # full 339-request sweep"; \ + printf ' %s\n' "make run-provider-harness-test PROVIDER=bedrock # bedrock-only"; \ + printf ' %s\n' "make run-provider-harness-test FEATURE=\"web search\" # all providers, web-search entries"; \ + printf ' %s\n' "make run-provider-harness-test FEATURE=\"cross-cut,structured output\" # AND of substrings"; \ +>>>>>>> f09185ec1 (harness improvements) printf ' %s\n' "make run-provider-harness-test RERUN_FAILED=1 # triage iteration loop"; \ printf ' %s\n' "make run-provider-harness-test PROVIDER=anthropic RERUN_FAILED=1 # anthropic failures only"; \ printf ' %s\n' "make run-provider-harness-test PROVIDER=passthrough # passthrough sweep (incl. Bedrock SigV4)"; \ diff --git a/tests/e2e/api/HARNESS_COVERAGE_BACKLOG.md b/tests/e2e/api/HARNESS_COVERAGE_BACKLOG.md index 63b179dba6..09fd031343 100644 --- a/tests/e2e/api/HARNESS_COVERAGE_BACKLOG.md +++ b/tests/e2e/api/HARNESS_COVERAGE_BACKLOG.md @@ -301,7 +301,7 @@ Vertex's API surface for Gemini largely mirrors AI Studio's generateContent — ### Anthropic-on-Vertex specific - [x] Claude Opus 4.7 in user's region (`global` / `us-east5` / `europe-west1`) -- [ ] **Claude Sonnet 4.6 / 4.5 / Haiku 4.5** (regional gating — must use `global` or `us-east5`) +- [~] **Claude Sonnet 4.6 / 4.5 / Haiku 4.5** (regional gating - must use `global` or `us-east5`; Sonnet 4.6 cross-cut variants added in Cross-Cut Round 4 covering structured output, function calling, streaming, vision, tool_choice, stop sequences, multi-turn, system message, web search, PDF, sampling-params; Haiku 4.5 + Sonnet 4.5 still uncovered) - [ ] **`anthropic_version: "vertex-2023-10-16"` in body** (Vertex-specific replacement for the header) - [ ] **Vertex `:streamRawPredict` endpoint** for SSE streaming - [ ] **Beta headers via body field** (`anthropic_beta` instead of HTTP header) @@ -358,20 +358,20 @@ These exercise Bifrost's translation layer between provider shapes — every che `POST /v1/chat/completions` endpoint with `provider/model` prefix routing. - [x] OpenAI / Anthropic / Bedrock / Gemini / Vertex Basic Chat (50 cross-model entries) -- [~] Function calling cross-cut (4 providers tested; Vertex partially) -- [~] Structured output cross-cut (OpenAI + Gemini + Vertex; **Anthropic + Bedrock missing**) -- [~] Streaming cross-cut (4 providers tested; Vertex/Azure missing) -- [~] Vision cross-cut (OpenAI + Anthropic + Gemini; **Bedrock + Vertex + Azure missing**) -- [~] Web search cross-cut (3 providers; **Bedrock + Vertex + Azure missing**) -- [ ] **Code execution cross-cut** (Anthropic + Gemini) -- [ ] **Tool choice forced cross-cut** (multi-provider) -- [ ] **Computer use via cross-model** (`anthropic/claude-...` with computer_2025x tools — verifies Bifrost's translation; currently only tested via /anthropic drop-in) -- [ ] **Extended/adaptive thinking via cross-model** -- [ ] **Prompt caching via cross-model** -- [ ] **System message cross-cut** (every provider via `/v1/chat/completions`) -- [ ] **Multi-turn conversation cross-cut** (provider-specific role normalization) -- [ ] **Stop sequences cross-cut** (each provider has different stop semantics) -- [ ] **Sampling-params normalization** (Bifrost should silently drop temperature for Opus 4.7+) +- [x] Function calling cross-cut (OpenAI + Anthropic + Bedrock + Gemini + Vertex Claude + Vertex Gemini via Cross-Cut Round 4; Azure via Cross-Cut Round 4) +- [x] Structured output cross-cut (OpenAI + Anthropic + Bedrock + Gemini + Vertex Gemini + Vertex Claude via Cross-Cut Round 4; Azure via Cross-Cut Round 4) +- [x] Streaming cross-cut (OpenAI + Anthropic + Bedrock + Gemini + Vertex Claude + Vertex Gemini + Azure via Cross-Cut Round 4) +- [x] Vision cross-cut (OpenAI + Anthropic + Bedrock + Gemini + Vertex Gemini + Vertex Claude + Azure via Cross-Cut Round 4) +- [~] Web search cross-cut (Anthropic + Bedrock + Vertex Claude (sonnet) + Vertex Gemini (google_search) via Cross-Cut Round 4; **OpenAI Responses-style web_search via /v1/chat still missing**) +- [~] **Code execution cross-cut** (Anthropic + Gemini + Bedrock + Vertex Claude (opus); **Vertex Gemini code execution via /v1/chat untested**) +- [~] **Tool choice forced cross-cut** (OpenAI + Bedrock + Vertex Claude via Cross-Cut Round 4; **Anthropic + Gemini + Azure still missing**) +- [ ] **Computer use via cross-model** (`anthropic/claude-...` with computer_2025x tools - verifies Bifrost's translation; currently only tested via /anthropic drop-in and `vertex/claude-opus-4-7` preview at L1279) +- [~] **Extended/adaptive thinking via cross-model** (Anthropic enabled + Bedrock enabled/adaptive + Vertex Claude enabled/adaptive covered; **anthropic-direct adaptive Opus 4.7 still missing**) +- [x] **Prompt caching via cross-model** (Anthropic + Bedrock 1h + Vertex Claude 1h covered) +- [~] **System message cross-cut** (Vertex Claude added in Round 4; Azure added in Round 4; **other providers were already implicit via cross-cut entries** - if explicit test needed, file a ticket) +- [~] **Multi-turn conversation cross-cut** (Vertex Claude added in Round 4; remaining providers still cross-cut-implicit only) +- [x] **Stop sequences cross-cut** (OpenAI + Anthropic + Gemini already; + Bedrock + Vertex Claude + Vertex Gemini added in Cross-Cut Round 4) +- [~] **Sampling-params normalization** (Bifrost should silently drop temperature for Opus 4.7+; Anthropic-direct + Vertex Claude Opus 4.7 covered; **Bedrock Opus 4.7 via cross-model still missing**) - [ ] **Failover scenarios** (request to provider X falls back to provider Y on 5xx) - [ ] **Virtual keys / governance** (`X-Bifrost-VK` header with allowed_models) - [ ] **Rate limit propagation** (provider 429 → Bifrost 429 with Retry-After preserved) diff --git a/tests/e2e/api/collections/provider-harness.json b/tests/e2e/api/collections/provider-harness.json index 665ed30482..17fe42952e 100644 --- a/tests/e2e/api/collections/provider-harness.json +++ b/tests/e2e/api/collections/provider-harness.json @@ -1318,6 +1318,7 @@ "item": [ { "name": "openai/gpt-4o-mini (json_schema)", + "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: schema-compliant JSON (city/country/pop)', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c, 'content was empty').to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('content not JSON: ' + e.message + ' (got: ' + c.slice(0,120) + ')'); return; } pm.expect(p).to.have.property('city').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); pm.expect(p).to.have.property('pop').that.is.a('number'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], @@ -1327,6 +1328,7 @@ }, { "name": "anthropic/claude-haiku (forced tool)", + "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Forced tool: emit_city invoked with schema-compliant arguments', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls in response').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('emit_city'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('arguments not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); pm.expect(a).to.have.property('country').that.is.a('string'); pm.expect(a).to.have.property('pop').that.is.a('number'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], @@ -1336,6 +1338,7 @@ }, { "name": "gemini/gemini-2.5-flash (responseSchema)", + "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: schema-compliant JSON (city/country/pop)', function () { var j = pm.response.json(); var parts = (j.candidates && j.candidates[0] && j.candidates[0].content && j.candidates[0].content.parts) || []; var t = parts.find(function (pp) { return pp && pp.text; }); var c = t ? t.text : ''; pm.expect(c, 'candidates[0].content.parts[*].text empty').to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('parts.text not JSON: ' + e.message + ' (got: ' + c.slice(0,120) + ')'); return; } pm.expect(p).to.have.property('city').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); pm.expect(p).to.have.property('pop').that.is.a('number'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-goog-api-key","value":"{{genaiKey}}"}], @@ -1345,6 +1348,7 @@ }, { "name": "vertex/gemini-2.5-pro (responseSchema)", + "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: schema-compliant JSON (city/country/pop)', function () { var j = pm.response.json(); var parts = (j.candidates && j.candidates[0] && j.candidates[0].content && j.candidates[0].content.parts) || []; var t = parts.find(function (pp) { return pp && pp.text; }); var c = t ? t.text : ''; pm.expect(c, 'candidates[0].content.parts[*].text empty').to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('parts.text not JSON: ' + e.message + ' (got: ' + c.slice(0,120) + ')'); return; } pm.expect(p).to.have.property('city').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); pm.expect(p).to.have.property('pop').that.is.a('number'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-goog-api-key","value":"{{genaiKey}}"}], @@ -1391,6 +1395,7 @@ "item": [ { "name": "openai/gpt-4o-mini", + "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city argument', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls in response').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('arguments not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], @@ -1400,6 +1405,7 @@ }, { "name": "anthropic/claude-haiku-4-5", + "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city argument', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls in response').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('arguments not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], @@ -1409,6 +1415,7 @@ }, { "name": "bedrock/global.anthropic.claude-sonnet-4-6", + "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city argument', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls in response').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('arguments not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], @@ -1418,6 +1425,7 @@ }, { "name": "gemini/gemini-2.5-flash", + "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city argument', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls in response').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('arguments not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], @@ -1617,13 +1625,13 @@ { "name": "Cross-cut: code execution Gemini", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gemini/gemini-2.5-flash\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Compute 50!\"}],\n \"tools\": [{\"type\":\"code_execution\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, { "name": "Cross-cut: extended thinking via cross-model", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Plan a trip\"}],\n \"thinking\": {\"type\":\"enabled\",\"budget_tokens\":2000},\n \"max_tokens\": 4096\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, { "name": "Cross-cut: prompt caching via cross-model", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-haiku-4-5\",\n \"messages\": [{\"role\":\"system\",\"content\":[{\"type\":\"text\",\"text\":\"Long ctx\",\"cache_control\":{\"type\":\"ephemeral\"}}]},{\"role\":\"user\",\"content\":\"Hi\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, - { "name": "Cross-cut: stop sequences (OpenAI)", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-4o-mini\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count: one, two, three, four\"}],\n \"stop\": [\"three\"]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, - { "name": "Cross-cut: stop sequences (Anthropic)", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-haiku-4-5\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count: one, two, three, four\"}],\n \"stop\": [\"three\"]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, - { "name": "Cross-cut: stop sequences (Gemini)", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gemini/gemini-2.5-flash\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count: one, two, three, four\"}],\n \"stop\": [\"three\"]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: stop sequences (OpenAI)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Stop sequence: halted before stop token', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; var fr = (j.choices && j.choices[0] && j.choices[0].finish_reason) || ''; pm.expect(c.toLowerCase(), 'stop token \"three\" leaked into content').to.not.include('three'); pm.expect(['stop','stop_sequence','length','content_filter'], 'unexpected finish_reason: ' + fr).to.include(fr); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-4o-mini\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count: one, two, three, four\"}],\n \"stop\": [\"three\"]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: stop sequences (Anthropic)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Stop sequence: halted before stop token', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; var fr = (j.choices && j.choices[0] && j.choices[0].finish_reason) || ''; pm.expect(c.toLowerCase(), 'stop token \"three\" leaked into content').to.not.include('three'); pm.expect(['stop','stop_sequence','length','content_filter'], 'unexpected finish_reason: ' + fr).to.include(fr); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-haiku-4-5\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count: one, two, three, four\"}],\n \"stop\": [\"three\"]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: stop sequences (Gemini)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Stop sequence: halted before stop token', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; var fr = (j.choices && j.choices[0] && j.choices[0].finish_reason) || ''; pm.expect(c.toLowerCase(), 'stop token \"three\" leaked into content').to.not.include('three'); pm.expect(['stop','stop_sequence','length','content_filter'], 'unexpected finish_reason: ' + fr).to.include(fr); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gemini/gemini-2.5-flash\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count: one, two, three, four\"}],\n \"stop\": [\"three\"]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, { "name": "Cross-cut: tool_choice forced (OpenAI)", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-4o-mini\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"f\",\"parameters\":{\"type\":\"object\"}}}],\n \"tool_choice\": \"required\"\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, { "name": "Cross-cut: tool_choice forced (Bedrock)", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"f\",\"parameters\":{\"type\":\"object\"}}}],\n \"tool_choice\": \"required\"\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, - { "name": "Cross-cut: structured output Anthropic", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-haiku-4-5\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Pick a city\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, - { "name": "Cross-cut: structured output Bedrock", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Pick a city\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}}}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: structured output Anthropic", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: schema-compliant JSON (city)', function () { var j = pm.response.json(); var c = ''; if (j.choices && j.choices[0] && j.choices[0].message) { c = j.choices[0].message.content || ''; } if (!c && Array.isArray(j.content)) { var tb = j.content.find(function (b) { return b.type === 'text' && b.text; }); c = tb ? tb.text : ''; } pm.expect(c, 'content was empty').to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('content not JSON: ' + e.message + ' (got: ' + c.slice(0,120) + ')'); return; } pm.expect(p).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-haiku-4-5\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Pick a city\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: structured output Bedrock", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: schema-compliant JSON (city)', function () { var j = pm.response.json(); var c = ''; if (j.choices && j.choices[0] && j.choices[0].message) { c = j.choices[0].message.content || ''; } if (!c && Array.isArray(j.content)) { var tb = j.content.find(function (b) { return b.type === 'text' && b.text; }); c = tb ? tb.text : ''; } pm.expect(c, 'content was empty').to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('content not JSON: ' + e.message + ' (got: ' + c.slice(0,120) + ')'); return; } pm.expect(p).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Pick a city\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}}}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, { "name": "Cross-cut: vision Bedrock", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, { "name": "Cross-cut: vision Vertex (Gemini)", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/gemini-2.5-pro\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, { "name": "Cross-cut: web search Bedrock (Anthropic)", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-opus-4-7\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Latest news\"}],\n \"tools\": [{\"type\":\"web_search_20250305\",\"name\":\"web_search\",\"max_uses\":2}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, @@ -1660,7 +1668,7 @@ { "name": "Vertex Gemini: vision", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Describe\"},{\"fileData\":{\"mimeType\":\"image/jpeg\",\"fileUri\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/genai/v1beta/models/{{vertexModel}}:generateContent","host":["{{baseUrl}}"],"path":["genai","v1beta","models","{{vertexModel}}:generateContent"]}}}, { "name": "Vertex Gemini: code execution", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Compute fib(20)\"}]}],\n \"tools\": [{\"codeExecution\":{}}]\n}"}, "url": {"raw":"{{baseUrl}}/genai/v1beta/models/{{vertexModel}}:generateContent","host":["{{baseUrl}}"],"path":["genai","v1beta","models","{{vertexModel}}:generateContent"]}}}, { "name": "Vertex Gemini: thinking budget", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Solve 17*23\"}]}],\n \"generationConfig\": {\"thinkingConfig\":{\"thinkingBudget\":4000}}\n}"}, "url": {"raw":"{{baseUrl}}/genai/v1beta/models/{{vertexModel}}:generateContent","host":["{{baseUrl}}"],"path":["genai","v1beta","models","{{vertexModel}}:generateContent"]}}}, - { "name": "Vertex Gemini: structured output json_schema (via /v1/chat)", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/gemini-2.5-flash\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Pick a city\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}}}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Vertex Gemini: structured output json_schema (via /v1/chat)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: schema-compliant JSON (city)', function () { var j = pm.response.json(); var c = ''; if (j.choices && j.choices[0] && j.choices[0].message) { c = j.choices[0].message.content || ''; } if (!c && Array.isArray(j.content)) { var tb = j.content.find(function (b) { return b.type === 'text' && b.text; }); c = tb ? tb.text : ''; } pm.expect(c, 'content was empty').to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('content not JSON: ' + e.message + ' (got: ' + c.slice(0,120) + ')'); return; } pm.expect(p).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/gemini-2.5-flash\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Pick a city\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}}}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, { "name": "Vertex Claude: extended thinking", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-opus-4-7\",\n \"max_tokens\": 4096,\n \"thinking\": {\"type\":\"adaptive\"},\n \"messages\": [{\"role\":\"user\",\"content\":\"Solve in steps\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, { "name": "Vertex Claude: web search", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-opus-4-7\",\n \"max_tokens\": 1024,\n \"messages\": [{\"role\":\"user\",\"content\":\"Latest news\"}],\n \"tools\": [{\"type\":\"web_search_20250305\",\"name\":\"web_search\",\"max_uses\":2}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, { "name": "Vertex Claude: prompt caching", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-opus-4-7\",\n \"max_tokens\": 256,\n \"system\": [{\"type\":\"text\",\"text\":\"Long ctx\",\"cache_control\":{\"type\":\"ephemeral\"}}],\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, @@ -1712,7 +1720,7 @@ { "name": "Anthropic: allowed_callers + advanced beta", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-api-key","value":"{{anthropicKey}}"},{"key":"anthropic-version","value":"2023-06-01"},{"key":"anthropic-beta","value":"advanced-tool-use-2025-09-15"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-opus-4-7\",\n \"max_tokens\": 256,\n \"tools\": [{\"name\":\"f\",\"input_schema\":{\"type\":\"object\"},\"allowed_callers\":[\"direct\"]}],\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}]\n}"}, "url": {"raw":"{{baseUrl}}/anthropic/v1/messages","host":["{{baseUrl}}"],"path":["anthropic","v1","messages"]}}}, { "name": "Anthropic: skills/container", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-api-key","value":"{{anthropicKey}}"},{"key":"anthropic-version","value":"2023-06-01"},{"key":"anthropic-beta","value":"skills-2025-10-29"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-opus-4-7\",\n \"max_tokens\": 1024,\n \"container\": {\"skills\":[{\"skill_id\":\"data-analysis\",\"type\":\"anthropic\"}]},\n \"messages\": [{\"role\":\"user\",\"content\":\"Analyze\"}]\n}"}, "url": {"raw":"{{baseUrl}}/anthropic/v1/messages","host":["{{baseUrl}}"],"path":["anthropic","v1","messages"]}}}, { "name": "Gemini: parallel function calls", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Weather in NYC and SF?\"}]}],\n \"tools\": [{\"functionDeclarations\":[{\"name\":\"get_weather\",\"parameters\":{\"type\":\"OBJECT\",\"properties\":{\"city\":{\"type\":\"STRING\"}}}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/genai/v1beta/models/{{genaiModel}}:generateContent","host":["{{baseUrl}}"],"path":["genai","v1beta","models","{{genaiModel}}:generateContent"]}}}, - { "name": "Gemini: structured output via /v1/chat (json_schema)", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gemini/gemini-2.5-flash\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Pick a city\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}}}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Gemini: structured output via /v1/chat (json_schema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: schema-compliant JSON (city)', function () { var j = pm.response.json(); var c = ''; if (j.choices && j.choices[0] && j.choices[0].message) { c = j.choices[0].message.content || ''; } pm.expect(c, 'content was empty').to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('content not JSON: ' + e.message + ' (got: ' + c.slice(0,120) + ')'); return; } pm.expect(p).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gemini/gemini-2.5-flash\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Pick a city\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}}}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, { "name": "[PREVIEW] Gemini: cached content reference", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Use cache\"}]}],\n \"cachedContent\": \"cachedContents/REPLACE_ME\"\n}"}, "url": {"raw":"{{baseUrl}}/genai/v1beta/models/{{genaiModel}}:generateContent","host":["{{baseUrl}}"],"path":["genai","v1beta","models","{{genaiModel}}:generateContent"]}}}, { "name": "Gemini: audio input (inline)", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Transcribe\"},{\"inlineData\":{\"mimeType\":\"audio/wav\",\"data\":\"UklGRkQDAABXQVZFZm10IBAAAAABAAEAQB8AAIA+AAACABAAZGF0YSADAAAAAJUK6xPlGrIe3R5iG6oUgAv7AFj22eye5YHhAOEo5Jzql/MK/rcIXhLYGUQeHB9GHBgWTg3wAjv4cO645v7h0OBS4znp0fEW/NEGvhCxGLgdPB8OHXAXDg/jBCX6GPDs55niwOCZ4uznGPAl+uMEDg9wFw4dPB+4HbEYvhDRBhb80fE56VLj0OD+4bjmcO47+PACTg0YFkYcHB9EHtgZXhK3CAr+l/Oc6ijkAOGB4Z7l2exY9vsAgAuqFGIb3R6yHuUa6xOVCgAAa/UV7BvlTuEj4Z7kVuuA9AX/qAknE2Iafx4AH9gbZBVpDPYBSfei7SjmvOHk4Lrj6Omy8hD9xQeQEUgZAh4wH64cxxYvDuoDL/lC70/nSOLE4PLikOjy8B372wXoDxQYZx1AH2cdFBjoD9sFHfvy8JDo8uLE4EjiT+dC7y/56gMvDscWrhwwHwIeSBmQEcUHEP2y8ujpuuPk4LzhKOai7Un39gFpDGQV2BsAH38eYhonE6gJBf+A9FbrnuQj4U7hG+UV7Gv1AACVCusT5RqyHt0eYhuqFIAL+wBY9tnsnuWB4QDhKOSc6pfzCv63CF4S2BlEHhwfRhwYFk4N8AI7+HDuuOb+4dDgUuM56dHxFvzRBr4QsRi4HTwfDh1wFw4P4wQl+hjw7OeZ4sDgmeLs5xjwJfrjBA4PcBcOHTwfuB2xGL4Q0QYW/NHxOelS49Dg/uG45nDuO/jwAk4NGBZGHBwfRB7YGV4StwgK/pfznOoo5ADhgeGe5dnsWPb7AIALqhRiG90esh7lGusTlQoAAGv1Fewb5U7hI+Ge5FbrgPQF/6gJJxNiGn8eAB/YG2QVaQz2AUn3ou0o5rzh5OC64+jpsvIQ/cUHkBFIGQIeMB+uHMcWLw7qAy/5Qu9P50jixODy4pDo8vAd+9sF6A8UGGcdQB9nHRQY6A/bBR378vCQ6PLixOBI4k/nQu8v+eoDLw7HFq4cMB8CHkgZkBHFBxD9svLo6brj5OC84Sjmou1J9/YBaQxkFdgbAB9/HmIaJxOoCQX/gPRW657kI+FO4RvlFexr9Q==\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/genai/v1beta/models/{{genaiModel}}:generateContent","host":["{{baseUrl}}"],"path":["genai","v1beta","models","{{genaiModel}}:generateContent"]}}}, { "name": "Gemini: list cached contents", "request": { "method": "GET", "header": [{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "url": {"raw":"{{baseUrl}}/genai/v1beta/cachedContents","host":["{{baseUrl}}"],"path":["genai","v1beta","cachedContents"]}}}, @@ -1782,7 +1790,375 @@ { "name": "[PREVIEW] Azure: skills/container", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"api-key","value":"{{openaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"input\": \"Use skills\",\n \"tools\": [{\"type\":\"code_interpreter\",\"container\":{\"type\":\"auto\"}}]\n}"}, "url": {"raw":"{{baseUrl}}/openai/openai/deployments/{{azureDeployment}}/responses?api-version=2025-04-01-preview","host":["{{baseUrl}}"],"path":["openai","openai","deployments","{{azureDeployment}}","responses"],"query":[{"key":"api-version","value":"2025-04-01-preview"}]}}}, { "name": "Azure: service_tier scale", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"api-key","value":"{{openaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}],\n \"service_tier\": \"flex\"\n}"}, "url": {"raw":"{{baseUrl}}/openai/openai/deployments/{{azureDeployment}}/chat/completions?api-version={{azureApiVersion}}","host":["{{baseUrl}}"],"path":["openai","openai","deployments","{{azureDeployment}}","chat","completions"],"query":[{"key":"api-version","value":"{{azureApiVersion}}"}]}}} ] + }, + { + "name": "Cross-Cut Round 4 (Vertex Claude + Vertex Gemini + Azure cross-cut)", + "item": [ + { "name": "Vertex Claude: structured output (json_schema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: schema-compliant JSON (CityInfo)', function () { var j = pm.response.json(); var c = ''; if (j.choices && j.choices[0] && j.choices[0].message) { c = j.choices[0].message.content || ''; } if (!c && Array.isArray(j.content)) { var tb = j.content.find(function (b) { return b.type === 'text' && b.text; }); c = tb ? tb.text : ''; } pm.expect(c, 'content was empty').to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('content not JSON: ' + e.message + ' (got: ' + c.slice(0,120) + ')'); return; } pm.expect(p).to.have.property('name').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); pm.expect(p.name.toLowerCase(), 'expected Paris').to.include('paris'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"system\",\"content\":\"Extract the city information from the user's message.\"},{\"role\":\"user\",\"content\":\"I visited Paris, France last summer.\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"CityInfo\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"name\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"}},\"required\":[\"name\",\"country\"],\"additionalProperties\":false}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Vertex Claude: function calling cross-cut", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city argument', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls in response').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('arguments not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Vertex Claude: streaming", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Vertex Claude: vision", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"What is in this image?\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Vertex Claude: tool_choice forced", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"f\",\"parameters\":{\"type\":\"object\"}}}],\n \"tool_choice\": \"required\"\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Vertex Claude: stop sequences", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count: one, two, three, four\"}],\n \"stop\": [\"three\"]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Vertex Claude: multi-turn cross-cut", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"},{\"role\":\"assistant\",\"content\":\"Hello\"},{\"role\":\"user\",\"content\":\"How are you?\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Vertex Claude: system message cross-cut", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"system\",\"content\":\"You are a pirate.\"},{\"role\":\"user\",\"content\":\"Greet me\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Vertex Claude: web search via /v1/chat (sonnet)", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-sonnet-4-6\",\n \"max_tokens\": 1024,\n \"messages\": [{\"role\":\"user\",\"content\":\"Latest news\"}],\n \"tools\": [{\"type\":\"web_search_20250305\",\"name\":\"web_search\",\"max_uses\":2}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Vertex Claude: code execution via /v1/chat (opus)", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-opus-4-7\",\n \"max_tokens\": 1024,\n \"messages\": [{\"role\":\"user\",\"content\":\"Compute 50!\"}],\n \"tools\": [{\"type\":\"code_execution_20250522\",\"name\":\"code_execution\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Vertex Claude: PDF input via /v1/chat (sonnet)", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-sonnet-4-6\",\n \"max_tokens\": 512,\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"document\",\"source\":{\"type\":\"url\",\"url\":\"https://www.berkshirehathaway.com/letters/2024ltr.pdf\"}},{\"type\":\"text\",\"text\":\"Summarize\"}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Vertex Claude: sampling-params dropped for Opus 4.7", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-opus-4-7\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}],\n \"temperature\": 0.7,\n \"top_p\": 0.9\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Vertex Gemini: function calling cross-cut", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city argument', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls in response').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('arguments not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/gemini-2.5-flash\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Vertex Gemini: streaming cross-cut", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/gemini-2.5-flash\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: web search Vertex Gemini (google_search)", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/gemini-2.5-flash\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Latest news\"}],\n \"tools\": [{\"type\":\"google_search\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: stop sequences (Bedrock)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Stop sequence: halted before stop token', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; var fr = (j.choices && j.choices[0] && j.choices[0].finish_reason) || ''; pm.expect(c.toLowerCase(), 'stop token \"three\" leaked into content').to.not.include('three'); pm.expect(['stop','stop_sequence','length','content_filter'], 'unexpected finish_reason: ' + fr).to.include(fr); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count: one, two, three, four\"}],\n \"stop\": [\"three\"]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: stop sequences (Vertex Gemini)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Stop sequence: halted before stop token', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; var fr = (j.choices && j.choices[0] && j.choices[0].finish_reason) || ''; pm.expect(c.toLowerCase(), 'stop token \"three\" leaked into content').to.not.include('three'); pm.expect(['stop','stop_sequence','length','content_filter'], 'unexpected finish_reason: ' + fr).to.include(fr); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/gemini-2.5-flash\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count: one, two, three, four\"}],\n \"stop\": [\"three\"]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: Azure basic chat", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"azure/{{azureDeployment}}\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: Azure tools", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"azure/{{azureDeployment}}\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: Azure structured output (json_schema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: schema-compliant JSON (city/country)', function () { var j = pm.response.json(); var c = ''; if (j.choices && j.choices[0] && j.choices[0].message) { c = j.choices[0].message.content || ''; } if (!c && Array.isArray(j.content)) { var tb = j.content.find(function (b) { return b.type === 'text' && b.text; }); c = tb ? tb.text : ''; } pm.expect(c, 'content was empty').to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('content not JSON: ' + e.message + ' (got: ' + c.slice(0,120) + ')'); return; } pm.expect(p).to.have.property('city').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"azure/{{azureDeployment}}\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Extract city/country for Paris\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"}},\"required\":[\"city\",\"country\"],\"additionalProperties\":false}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: Azure streaming", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"azure/{{azureDeployment}}\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: Azure vision", "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"azure/{{azureDeployment}}\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}} + ] + }, + { + "name": "Cross-Cut Round 5: Structured Output Matrix (response_format json_schema via /v1/chat across providers/models)", + "item": [ + { "name": "Cross-cut: openai/gpt-5 (json_schema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: schema-compliant JSON (city/country/pop)', function () { var j = pm.response.json(); var c = ''; if (j.choices && j.choices[0] && j.choices[0].message) { c = j.choices[0].message.content || ''; } if (!c && Array.isArray(j.content)) { var tb = j.content.find(function (b) { return b.type === 'text' && b.text; }); c = tb ? tb.text : ''; } pm.expect(c, 'content was empty').to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('content not JSON: ' + e.message + ' (got: ' + c.slice(0,120) + ')'); return; } pm.expect(p).to.have.property('city').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); pm.expect(p).to.have.property('pop').that.is.a('number'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-5\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Extract city/country/pop for Paris.\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"},\"pop\":{\"type\":\"number\"}},\"required\":[\"city\",\"country\",\"pop\"],\"additionalProperties\":false}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: openai/gpt-5-mini (json_schema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: schema-compliant JSON (city/country/pop)', function () { var j = pm.response.json(); var c = ''; if (j.choices && j.choices[0] && j.choices[0].message) { c = j.choices[0].message.content || ''; } if (!c && Array.isArray(j.content)) { var tb = j.content.find(function (b) { return b.type === 'text' && b.text; }); c = tb ? tb.text : ''; } pm.expect(c, 'content was empty').to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('content not JSON: ' + e.message + ' (got: ' + c.slice(0,120) + ')'); return; } pm.expect(p).to.have.property('city').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); pm.expect(p).to.have.property('pop').that.is.a('number'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-5-mini\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Extract city/country/pop for Paris.\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"},\"pop\":{\"type\":\"number\"}},\"required\":[\"city\",\"country\",\"pop\"],\"additionalProperties\":false}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: openai/gpt-4o (json_schema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: schema-compliant JSON (city/country/pop)', function () { var j = pm.response.json(); var c = ''; if (j.choices && j.choices[0] && j.choices[0].message) { c = j.choices[0].message.content || ''; } if (!c && Array.isArray(j.content)) { var tb = j.content.find(function (b) { return b.type === 'text' && b.text; }); c = tb ? tb.text : ''; } pm.expect(c, 'content was empty').to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('content not JSON: ' + e.message + ' (got: ' + c.slice(0,120) + ')'); return; } pm.expect(p).to.have.property('city').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); pm.expect(p).to.have.property('pop').that.is.a('number'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-4o\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Extract city/country/pop for Paris.\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"},\"pop\":{\"type\":\"number\"}},\"required\":[\"city\",\"country\",\"pop\"],\"additionalProperties\":false}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: openai/gpt-4.1 (json_schema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: schema-compliant JSON (city/country/pop)', function () { var j = pm.response.json(); var c = ''; if (j.choices && j.choices[0] && j.choices[0].message) { c = j.choices[0].message.content || ''; } if (!c && Array.isArray(j.content)) { var tb = j.content.find(function (b) { return b.type === 'text' && b.text; }); c = tb ? tb.text : ''; } pm.expect(c, 'content was empty').to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('content not JSON: ' + e.message + ' (got: ' + c.slice(0,120) + ')'); return; } pm.expect(p).to.have.property('city').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); pm.expect(p).to.have.property('pop').that.is.a('number'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-4.1\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Extract city/country/pop for Paris.\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"},\"pop\":{\"type\":\"number\"}},\"required\":[\"city\",\"country\",\"pop\"],\"additionalProperties\":false}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: openai/o3-mini (json_schema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: schema-compliant JSON (city/country/pop)', function () { var j = pm.response.json(); var c = ''; if (j.choices && j.choices[0] && j.choices[0].message) { c = j.choices[0].message.content || ''; } if (!c && Array.isArray(j.content)) { var tb = j.content.find(function (b) { return b.type === 'text' && b.text; }); c = tb ? tb.text : ''; } pm.expect(c, 'content was empty').to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('content not JSON: ' + e.message + ' (got: ' + c.slice(0,120) + ')'); return; } pm.expect(p).to.have.property('city').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); pm.expect(p).to.have.property('pop').that.is.a('number'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/o3-mini\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Extract city/country/pop for Paris.\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"},\"pop\":{\"type\":\"number\"}},\"required\":[\"city\",\"country\",\"pop\"],\"additionalProperties\":false}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-opus-4-7 (json_schema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: schema-compliant JSON (city/country/pop)', function () { var j = pm.response.json(); var c = ''; if (j.choices && j.choices[0] && j.choices[0].message) { c = j.choices[0].message.content || ''; } if (!c && Array.isArray(j.content)) { var tb = j.content.find(function (b) { return b.type === 'text' && b.text; }); c = tb ? tb.text : ''; } pm.expect(c, 'content was empty').to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('content not JSON: ' + e.message + ' (got: ' + c.slice(0,120) + ')'); return; } pm.expect(p).to.have.property('city').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); pm.expect(p).to.have.property('pop').that.is.a('number'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-opus-4-7\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Extract city/country/pop for Paris.\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"},\"pop\":{\"type\":\"number\"}},\"required\":[\"city\",\"country\",\"pop\"],\"additionalProperties\":false}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-sonnet-4-6 (json_schema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: schema-compliant JSON (city/country/pop)', function () { var j = pm.response.json(); var c = ''; if (j.choices && j.choices[0] && j.choices[0].message) { c = j.choices[0].message.content || ''; } if (!c && Array.isArray(j.content)) { var tb = j.content.find(function (b) { return b.type === 'text' && b.text; }); c = tb ? tb.text : ''; } pm.expect(c, 'content was empty').to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('content not JSON: ' + e.message + ' (got: ' + c.slice(0,120) + ')'); return; } pm.expect(p).to.have.property('city').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); pm.expect(p).to.have.property('pop').that.is.a('number'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Extract city/country/pop for Paris.\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"},\"pop\":{\"type\":\"number\"}},\"required\":[\"city\",\"country\",\"pop\"],\"additionalProperties\":false}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/claude-opus-4-7 (json_schema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: schema-compliant JSON (city/country/pop)', function () { var j = pm.response.json(); var c = ''; if (j.choices && j.choices[0] && j.choices[0].message) { c = j.choices[0].message.content || ''; } if (!c && Array.isArray(j.content)) { var tb = j.content.find(function (b) { return b.type === 'text' && b.text; }); c = tb ? tb.text : ''; } pm.expect(c, 'content was empty').to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('content not JSON: ' + e.message + ' (got: ' + c.slice(0,120) + ')'); return; } pm.expect(p).to.have.property('city').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); pm.expect(p).to.have.property('pop').that.is.a('number'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-opus-4-7\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Extract city/country/pop for Paris.\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"},\"pop\":{\"type\":\"number\"}},\"required\":[\"city\",\"country\",\"pop\"],\"additionalProperties\":false}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/claude-haiku-4-5 (json_schema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: schema-compliant JSON (city/country/pop)', function () { var j = pm.response.json(); var c = ''; if (j.choices && j.choices[0] && j.choices[0].message) { c = j.choices[0].message.content || ''; } if (!c && Array.isArray(j.content)) { var tb = j.content.find(function (b) { return b.type === 'text' && b.text; }); c = tb ? tb.text : ''; } pm.expect(c, 'content was empty').to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('content not JSON: ' + e.message + ' (got: ' + c.slice(0,120) + ')'); return; } pm.expect(p).to.have.property('city').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); pm.expect(p).to.have.property('pop').that.is.a('number'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-haiku-4-5-20251001-v1:0\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Extract city/country/pop for Paris.\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"},\"pop\":{\"type\":\"number\"}},\"required\":[\"city\",\"country\",\"pop\"],\"additionalProperties\":false}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "[PREVIEW] Cross-cut: bedrock/nova-pro (json_schema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: schema-compliant JSON (city/country/pop)', function () { var j = pm.response.json(); var c = ''; if (j.choices && j.choices[0] && j.choices[0].message) { c = j.choices[0].message.content || ''; } if (!c && Array.isArray(j.content)) { var tb = j.content.find(function (b) { return b.type === 'text' && b.text; }); c = tb ? tb.text : ''; } pm.expect(c, 'content was empty').to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('content not JSON: ' + e.message + ' (got: ' + c.slice(0,120) + ')'); return; } pm.expect(p).to.have.property('city').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); pm.expect(p).to.have.property('pop').that.is.a('number'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/us.amazon.nova-pro-v1:0\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Extract city/country/pop for Paris.\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"},\"pop\":{\"type\":\"number\"}},\"required\":[\"city\",\"country\",\"pop\"],\"additionalProperties\":false}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "[PREVIEW] Cross-cut: bedrock/nova-lite (json_schema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: schema-compliant JSON (city/country/pop)', function () { var j = pm.response.json(); var c = ''; if (j.choices && j.choices[0] && j.choices[0].message) { c = j.choices[0].message.content || ''; } if (!c && Array.isArray(j.content)) { var tb = j.content.find(function (b) { return b.type === 'text' && b.text; }); c = tb ? tb.text : ''; } pm.expect(c, 'content was empty').to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('content not JSON: ' + e.message + ' (got: ' + c.slice(0,120) + ')'); return; } pm.expect(p).to.have.property('city').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); pm.expect(p).to.have.property('pop').that.is.a('number'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/us.amazon.nova-lite-v1:0\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Extract city/country/pop for Paris.\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"},\"pop\":{\"type\":\"number\"}},\"required\":[\"city\",\"country\",\"pop\"],\"additionalProperties\":false}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/claude-opus-4-7 (json_schema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: schema-compliant JSON (city/country/pop)', function () { var j = pm.response.json(); var c = ''; if (j.choices && j.choices[0] && j.choices[0].message) { c = j.choices[0].message.content || ''; } if (!c && Array.isArray(j.content)) { var tb = j.content.find(function (b) { return b.type === 'text' && b.text; }); c = tb ? tb.text : ''; } pm.expect(c, 'content was empty').to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('content not JSON: ' + e.message + ' (got: ' + c.slice(0,120) + ')'); return; } pm.expect(p).to.have.property('city').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); pm.expect(p).to.have.property('pop').that.is.a('number'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-opus-4-7\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Extract city/country/pop for Paris.\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"},\"pop\":{\"type\":\"number\"}},\"required\":[\"city\",\"country\",\"pop\"],\"additionalProperties\":false}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/gemini-2.5-flash (json_schema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: schema-compliant JSON (city/country/pop)', function () { var j = pm.response.json(); var c = ''; if (j.choices && j.choices[0] && j.choices[0].message) { c = j.choices[0].message.content || ''; } if (!c && Array.isArray(j.content)) { var tb = j.content.find(function (b) { return b.type === 'text' && b.text; }); c = tb ? tb.text : ''; } pm.expect(c, 'content was empty').to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('content not JSON: ' + e.message + ' (got: ' + c.slice(0,120) + ')'); return; } pm.expect(p).to.have.property('city').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); pm.expect(p).to.have.property('pop').that.is.a('number'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/gemini-2.5-flash\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Extract city/country/pop for Paris.\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"},\"pop\":{\"type\":\"number\"}},\"required\":[\"city\",\"country\",\"pop\"],\"additionalProperties\":false}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: gemini/gemini-2.5-pro (json_schema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: schema-compliant JSON (city/country/pop)', function () { var j = pm.response.json(); var c = ''; if (j.choices && j.choices[0] && j.choices[0].message) { c = j.choices[0].message.content || ''; } if (!c && Array.isArray(j.content)) { var tb = j.content.find(function (b) { return b.type === 'text' && b.text; }); c = tb ? tb.text : ''; } pm.expect(c, 'content was empty').to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('content not JSON: ' + e.message + ' (got: ' + c.slice(0,120) + ')'); return; } pm.expect(p).to.have.property('city').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); pm.expect(p).to.have.property('pop').that.is.a('number'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gemini/gemini-2.5-pro\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Extract city/country/pop for Paris.\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"},\"pop\":{\"type\":\"number\"}},\"required\":[\"city\",\"country\",\"pop\"],\"additionalProperties\":false}}}\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}} + ] + }, + { + "name": "Cross-Cut Round 6: Function Calling Matrix", + "item": [ + { "name": "Cross-cut: openai/gpt-5 function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('args not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-5\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: openai/gpt-5-mini function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('args not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-5-mini\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: openai/gpt-4o function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('args not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-4o\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: openai/gpt-4.1 function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('args not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-4.1\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: openai/o3-mini function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('args not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/o3-mini\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-opus-4-7 function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('args not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-opus-4-7\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-sonnet-4-6 function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('args not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-haiku-4-5 function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('args not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-haiku-4-5\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/global.anthropic.claude-opus-4-7 function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('args not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-opus-4-7\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/global.anthropic.claude-sonnet-4-6 function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('args not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/global.anthropic.claude-haiku-4-5-20251001-v1:0 function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('args not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-haiku-4-5-20251001-v1:0\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "[PREVIEW] Cross-cut: bedrock/us.amazon.nova-pro-v1:0 function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('args not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/us.amazon.nova-pro-v1:0\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/claude-opus-4-7 function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('args not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-opus-4-7\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/claude-sonnet-4-6 function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('args not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/gemini-2.5-pro function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('args not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/gemini-2.5-pro\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/gemini-2.5-flash function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('args not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/gemini-2.5-flash\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: gemini/gemini-2.5-pro function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('args not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gemini/gemini-2.5-pro\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: gemini/gemini-2.5-flash function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('args not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gemini/gemini-2.5-flash\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: azure/{{azureDeployment}} function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked with city', function () { var j = pm.response.json(); var tc = null; if (j.choices && j.choices[0] && j.choices[0].message && Array.isArray(j.choices[0].message.tool_calls) && j.choices[0].message.tool_calls.length) { tc = j.choices[0].message.tool_calls[0]; } pm.expect(tc, 'no tool_calls').to.not.be.null; pm.expect(tc.function && tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('args not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"azure/{{azureDeployment}}\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}} + ] + }, + { + "name": "Cross-Cut Round 7: Streaming Matrix", + "item": [ + { "name": "Cross-cut: openai/gpt-5 streaming", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: response is SSE', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected SSE, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-5\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: openai/gpt-5-mini streaming", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: response is SSE', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected SSE, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-5-mini\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: openai/gpt-4o streaming", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: response is SSE', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected SSE, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-4o\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: openai/gpt-4.1 streaming", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: response is SSE', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected SSE, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-4.1\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: openai/o3-mini streaming", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: response is SSE', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected SSE, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/o3-mini\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-opus-4-7 streaming", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: response is SSE', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected SSE, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-opus-4-7\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-sonnet-4-6 streaming", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: response is SSE', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected SSE, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-haiku-4-5 streaming", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: response is SSE', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected SSE, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-haiku-4-5\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/global.anthropic.claude-opus-4-7 streaming", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: response is SSE', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected SSE, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-opus-4-7\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/global.anthropic.claude-sonnet-4-6 streaming", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: response is SSE', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected SSE, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/global.anthropic.claude-haiku-4-5-20251001-v1:0 streaming", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: response is SSE', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected SSE, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-haiku-4-5-20251001-v1:0\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/claude-opus-4-7 streaming", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: response is SSE', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected SSE, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-opus-4-7\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/claude-sonnet-4-6 streaming", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: response is SSE', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected SSE, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/gemini-2.5-pro streaming", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: response is SSE', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected SSE, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/gemini-2.5-pro\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/gemini-2.5-flash streaming", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: response is SSE', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected SSE, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/gemini-2.5-flash\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: gemini/gemini-2.5-pro streaming", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: response is SSE', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected SSE, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gemini/gemini-2.5-pro\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: gemini/gemini-2.5-flash streaming", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: response is SSE', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected SSE, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gemini/gemini-2.5-flash\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: azure/{{azureDeployment}} streaming", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: response is SSE', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected SSE, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"azure/{{azureDeployment}}\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}} + ] + }, + { + "name": "Cross-Cut Round 8: Vision Matrix", + "item": [ + { "name": "Cross-cut: openai/gpt-5 vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: response describes image', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; pm.expect(c.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-5\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: openai/gpt-4o vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: response describes image', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; pm.expect(c.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-4o\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: openai/gpt-4.1 vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: response describes image', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; pm.expect(c.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-4.1\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-opus-4-7 vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: response describes image', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; pm.expect(c.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-opus-4-7\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-sonnet-4-6 vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: response describes image', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; pm.expect(c.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-haiku-4-5 vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: response describes image', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; pm.expect(c.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-haiku-4-5\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/global.anthropic.claude-opus-4-7 vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: response describes image', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; pm.expect(c.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-opus-4-7\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/global.anthropic.claude-sonnet-4-6 vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: response describes image', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; pm.expect(c.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/claude-opus-4-7 vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: response describes image', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; pm.expect(c.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-opus-4-7\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/claude-sonnet-4-6 vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: response describes image', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; pm.expect(c.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/gemini-2.5-pro vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: response describes image', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; pm.expect(c.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/gemini-2.5-pro\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/gemini-2.5-flash vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: response describes image', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; pm.expect(c.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/gemini-2.5-flash\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: gemini/gemini-2.5-pro vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: response describes image', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; pm.expect(c.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gemini/gemini-2.5-pro\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: gemini/gemini-2.5-flash vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: response describes image', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; pm.expect(c.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gemini/gemini-2.5-flash\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: azure/{{azureDeployment}} vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: response describes image', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; pm.expect(c.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"azure/{{azureDeployment}}\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}} + ] + }, + { + "name": "Cross-Cut Round 9: Tool Choice Forced Matrix", + "item": [ + { "name": "Cross-cut: openai/gpt-5 tool_choice forced", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Tool choice forced: tool_calls present', function () { var j = pm.response.json(); var tc = j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.tool_calls; pm.expect(Array.isArray(tc) && tc.length > 0).to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-5\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}],\n \"tool_choice\": \"required\"\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: openai/gpt-5-mini tool_choice forced", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Tool choice forced: tool_calls present', function () { var j = pm.response.json(); var tc = j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.tool_calls; pm.expect(Array.isArray(tc) && tc.length > 0).to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-5-mini\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}],\n \"tool_choice\": \"required\"\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: openai/gpt-4o tool_choice forced", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Tool choice forced: tool_calls present', function () { var j = pm.response.json(); var tc = j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.tool_calls; pm.expect(Array.isArray(tc) && tc.length > 0).to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-4o\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}],\n \"tool_choice\": \"required\"\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: openai/gpt-4.1 tool_choice forced", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Tool choice forced: tool_calls present', function () { var j = pm.response.json(); var tc = j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.tool_calls; pm.expect(Array.isArray(tc) && tc.length > 0).to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-4.1\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}],\n \"tool_choice\": \"required\"\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-opus-4-7 tool_choice forced", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Tool choice forced: tool_calls present', function () { var j = pm.response.json(); var tc = j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.tool_calls; pm.expect(Array.isArray(tc) && tc.length > 0).to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-opus-4-7\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}],\n \"tool_choice\": \"required\"\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-sonnet-4-6 tool_choice forced", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Tool choice forced: tool_calls present', function () { var j = pm.response.json(); var tc = j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.tool_calls; pm.expect(Array.isArray(tc) && tc.length > 0).to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}],\n \"tool_choice\": \"required\"\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-haiku-4-5 tool_choice forced", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Tool choice forced: tool_calls present', function () { var j = pm.response.json(); var tc = j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.tool_calls; pm.expect(Array.isArray(tc) && tc.length > 0).to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-haiku-4-5\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}],\n \"tool_choice\": \"required\"\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/global.anthropic.claude-opus-4-7 tool_choice forced", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Tool choice forced: tool_calls present', function () { var j = pm.response.json(); var tc = j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.tool_calls; pm.expect(Array.isArray(tc) && tc.length > 0).to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-opus-4-7\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}],\n \"tool_choice\": \"required\"\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/global.anthropic.claude-sonnet-4-6 tool_choice forced", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Tool choice forced: tool_calls present', function () { var j = pm.response.json(); var tc = j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.tool_calls; pm.expect(Array.isArray(tc) && tc.length > 0).to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}],\n \"tool_choice\": \"required\"\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/claude-opus-4-7 tool_choice forced", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Tool choice forced: tool_calls present', function () { var j = pm.response.json(); var tc = j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.tool_calls; pm.expect(Array.isArray(tc) && tc.length > 0).to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-opus-4-7\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}],\n \"tool_choice\": \"required\"\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/claude-sonnet-4-6 tool_choice forced", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Tool choice forced: tool_calls present', function () { var j = pm.response.json(); var tc = j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.tool_calls; pm.expect(Array.isArray(tc) && tc.length > 0).to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}],\n \"tool_choice\": \"required\"\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: gemini/gemini-2.5-pro tool_choice forced", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Tool choice forced: tool_calls present', function () { var j = pm.response.json(); var tc = j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.tool_calls; pm.expect(Array.isArray(tc) && tc.length > 0).to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gemini/gemini-2.5-pro\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}],\n \"tool_choice\": \"required\"\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: azure/{{azureDeployment}} tool_choice forced", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Tool choice forced: tool_calls present', function () { var j = pm.response.json(); var tc = j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.tool_calls; pm.expect(Array.isArray(tc) && tc.length > 0).to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"azure/{{azureDeployment}}\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}],\n \"tool_choice\": \"required\"\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}} + ] + }, + { + "name": "Cross-Cut Round 10: Stop Sequences Matrix", + "item": [ + { "name": "Cross-cut: openai/gpt-5 stop sequences", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Stop sequence: halted before stop token', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; var fr = (j.choices && j.choices[0] && j.choices[0].finish_reason) || ''; pm.expect(c.toLowerCase(), 'stop token \"three\" leaked').to.not.include('three'); pm.expect(['stop','stop_sequence','length','content_filter']).to.include(fr); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-5\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count: one, two, three, four\"}],\n \"stop\": [\"three\"]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: openai/gpt-4o stop sequences", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Stop sequence: halted before stop token', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; var fr = (j.choices && j.choices[0] && j.choices[0].finish_reason) || ''; pm.expect(c.toLowerCase(), 'stop token \"three\" leaked').to.not.include('three'); pm.expect(['stop','stop_sequence','length','content_filter']).to.include(fr); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-4o\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count: one, two, three, four\"}],\n \"stop\": [\"three\"]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-opus-4-7 stop sequences", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Stop sequence: halted before stop token', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; var fr = (j.choices && j.choices[0] && j.choices[0].finish_reason) || ''; pm.expect(c.toLowerCase(), 'stop token \"three\" leaked').to.not.include('three'); pm.expect(['stop','stop_sequence','length','content_filter']).to.include(fr); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-opus-4-7\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count: one, two, three, four\"}],\n \"stop\": [\"three\"]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-sonnet-4-6 stop sequences", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Stop sequence: halted before stop token', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; var fr = (j.choices && j.choices[0] && j.choices[0].finish_reason) || ''; pm.expect(c.toLowerCase(), 'stop token \"three\" leaked').to.not.include('three'); pm.expect(['stop','stop_sequence','length','content_filter']).to.include(fr); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count: one, two, three, four\"}],\n \"stop\": [\"three\"]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-haiku-4-5 stop sequences", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Stop sequence: halted before stop token', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; var fr = (j.choices && j.choices[0] && j.choices[0].finish_reason) || ''; pm.expect(c.toLowerCase(), 'stop token \"three\" leaked').to.not.include('three'); pm.expect(['stop','stop_sequence','length','content_filter']).to.include(fr); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-haiku-4-5\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count: one, two, three, four\"}],\n \"stop\": [\"three\"]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/global.anthropic.claude-opus-4-7 stop sequences", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Stop sequence: halted before stop token', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; var fr = (j.choices && j.choices[0] && j.choices[0].finish_reason) || ''; pm.expect(c.toLowerCase(), 'stop token \"three\" leaked').to.not.include('three'); pm.expect(['stop','stop_sequence','length','content_filter']).to.include(fr); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-opus-4-7\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count: one, two, three, four\"}],\n \"stop\": [\"three\"]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/global.anthropic.claude-sonnet-4-6 stop sequences", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Stop sequence: halted before stop token', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; var fr = (j.choices && j.choices[0] && j.choices[0].finish_reason) || ''; pm.expect(c.toLowerCase(), 'stop token \"three\" leaked').to.not.include('three'); pm.expect(['stop','stop_sequence','length','content_filter']).to.include(fr); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count: one, two, three, four\"}],\n \"stop\": [\"three\"]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/claude-opus-4-7 stop sequences", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Stop sequence: halted before stop token', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; var fr = (j.choices && j.choices[0] && j.choices[0].finish_reason) || ''; pm.expect(c.toLowerCase(), 'stop token \"three\" leaked').to.not.include('three'); pm.expect(['stop','stop_sequence','length','content_filter']).to.include(fr); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-opus-4-7\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count: one, two, three, four\"}],\n \"stop\": [\"three\"]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/claude-sonnet-4-6 stop sequences", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Stop sequence: halted before stop token', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; var fr = (j.choices && j.choices[0] && j.choices[0].finish_reason) || ''; pm.expect(c.toLowerCase(), 'stop token \"three\" leaked').to.not.include('three'); pm.expect(['stop','stop_sequence','length','content_filter']).to.include(fr); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count: one, two, three, four\"}],\n \"stop\": [\"three\"]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/gemini-2.5-flash stop sequences", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Stop sequence: halted before stop token', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; var fr = (j.choices && j.choices[0] && j.choices[0].finish_reason) || ''; pm.expect(c.toLowerCase(), 'stop token \"three\" leaked').to.not.include('three'); pm.expect(['stop','stop_sequence','length','content_filter']).to.include(fr); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/gemini-2.5-flash\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count: one, two, three, four\"}],\n \"stop\": [\"three\"]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: gemini/gemini-2.5-flash stop sequences", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Stop sequence: halted before stop token', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; var fr = (j.choices && j.choices[0] && j.choices[0].finish_reason) || ''; pm.expect(c.toLowerCase(), 'stop token \"three\" leaked').to.not.include('three'); pm.expect(['stop','stop_sequence','length','content_filter']).to.include(fr); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gemini/gemini-2.5-flash\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count: one, two, three, four\"}],\n \"stop\": [\"three\"]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}} + ] + }, + { + "name": "Cross-Cut Round 11: Multi-turn Matrix", + "item": [ + { "name": "Cross-cut: openai/gpt-5 multi-turn", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-5\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"},{\"role\":\"assistant\",\"content\":\"Hello\"},{\"role\":\"user\",\"content\":\"How are you?\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: openai/gpt-4o multi-turn", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-4o\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"},{\"role\":\"assistant\",\"content\":\"Hello\"},{\"role\":\"user\",\"content\":\"How are you?\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-opus-4-7 multi-turn", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-opus-4-7\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"},{\"role\":\"assistant\",\"content\":\"Hello\"},{\"role\":\"user\",\"content\":\"How are you?\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-sonnet-4-6 multi-turn", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"},{\"role\":\"assistant\",\"content\":\"Hello\"},{\"role\":\"user\",\"content\":\"How are you?\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-haiku-4-5 multi-turn", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-haiku-4-5\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"},{\"role\":\"assistant\",\"content\":\"Hello\"},{\"role\":\"user\",\"content\":\"How are you?\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/global.anthropic.claude-opus-4-7 multi-turn", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-opus-4-7\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"},{\"role\":\"assistant\",\"content\":\"Hello\"},{\"role\":\"user\",\"content\":\"How are you?\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/global.anthropic.claude-sonnet-4-6 multi-turn", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"},{\"role\":\"assistant\",\"content\":\"Hello\"},{\"role\":\"user\",\"content\":\"How are you?\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/claude-opus-4-7 multi-turn", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-opus-4-7\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"},{\"role\":\"assistant\",\"content\":\"Hello\"},{\"role\":\"user\",\"content\":\"How are you?\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/claude-sonnet-4-6 multi-turn", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"},{\"role\":\"assistant\",\"content\":\"Hello\"},{\"role\":\"user\",\"content\":\"How are you?\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/gemini-2.5-pro multi-turn", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/gemini-2.5-pro\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"},{\"role\":\"assistant\",\"content\":\"Hello\"},{\"role\":\"user\",\"content\":\"How are you?\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: gemini/gemini-2.5-pro multi-turn", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gemini/gemini-2.5-pro\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"},{\"role\":\"assistant\",\"content\":\"Hello\"},{\"role\":\"user\",\"content\":\"How are you?\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}} + ] + }, + { + "name": "Cross-Cut Round 12: System Message Matrix", + "item": [ + { "name": "Cross-cut: openai/gpt-5 system message", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-5\",\n \"messages\": [{\"role\":\"system\",\"content\":\"You are a pirate.\"},{\"role\":\"user\",\"content\":\"Greet me\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: openai/gpt-4o system message", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"openai/gpt-4o\",\n \"messages\": [{\"role\":\"system\",\"content\":\"You are a pirate.\"},{\"role\":\"user\",\"content\":\"Greet me\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-opus-4-7 system message", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-opus-4-7\",\n \"messages\": [{\"role\":\"system\",\"content\":\"You are a pirate.\"},{\"role\":\"user\",\"content\":\"Greet me\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-sonnet-4-6 system message", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"system\",\"content\":\"You are a pirate.\"},{\"role\":\"user\",\"content\":\"Greet me\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-haiku-4-5 system message", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-haiku-4-5\",\n \"messages\": [{\"role\":\"system\",\"content\":\"You are a pirate.\"},{\"role\":\"user\",\"content\":\"Greet me\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/global.anthropic.claude-opus-4-7 system message", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-opus-4-7\",\n \"messages\": [{\"role\":\"system\",\"content\":\"You are a pirate.\"},{\"role\":\"user\",\"content\":\"Greet me\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/global.anthropic.claude-sonnet-4-6 system message", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"system\",\"content\":\"You are a pirate.\"},{\"role\":\"user\",\"content\":\"Greet me\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/claude-opus-4-7 system message", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-opus-4-7\",\n \"messages\": [{\"role\":\"system\",\"content\":\"You are a pirate.\"},{\"role\":\"user\",\"content\":\"Greet me\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/claude-sonnet-4-6 system message", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-sonnet-4-6\",\n \"messages\": [{\"role\":\"system\",\"content\":\"You are a pirate.\"},{\"role\":\"user\",\"content\":\"Greet me\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/gemini-2.5-pro system message", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/gemini-2.5-pro\",\n \"messages\": [{\"role\":\"system\",\"content\":\"You are a pirate.\"},{\"role\":\"user\",\"content\":\"Greet me\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: gemini/gemini-2.5-pro system message", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gemini/gemini-2.5-pro\",\n \"messages\": [{\"role\":\"system\",\"content\":\"You are a pirate.\"},{\"role\":\"user\",\"content\":\"Greet me\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}} + ] + }, + { + "name": "Cross-Cut Round 13: Web Search Matrix", + "item": [ + { "name": "Cross-cut: anthropic/claude-opus-4-7 web_search", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Web search: response non-empty', function () { var raw = JSON.stringify(pm.response.json()); pm.expect(raw.length).to.be.greaterThan(100); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-opus-4-7\",\n \"max_tokens\": 1024,\n \"messages\": [{\"role\":\"user\",\"content\":\"Latest news\"}],\n \"tools\": [{\"type\":\"web_search_20250305\",\"name\":\"web_search\",\"max_uses\":2}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-sonnet-4-6 web_search", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Web search: response non-empty', function () { var raw = JSON.stringify(pm.response.json()); pm.expect(raw.length).to.be.greaterThan(100); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-sonnet-4-6\",\n \"max_tokens\": 1024,\n \"messages\": [{\"role\":\"user\",\"content\":\"Latest news\"}],\n \"tools\": [{\"type\":\"web_search_20250305\",\"name\":\"web_search\",\"max_uses\":2}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/claude-opus-4-7 web_search", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Web search: response non-empty', function () { var raw = JSON.stringify(pm.response.json()); pm.expect(raw.length).to.be.greaterThan(100); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-opus-4-7\",\n \"max_tokens\": 1024,\n \"messages\": [{\"role\":\"user\",\"content\":\"Latest news\"}],\n \"tools\": [{\"type\":\"web_search_20250305\",\"name\":\"web_search\",\"max_uses\":2}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/claude-opus-4-7 web_search", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Web search: response non-empty', function () { var raw = JSON.stringify(pm.response.json()); pm.expect(raw.length).to.be.greaterThan(100); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-opus-4-7\",\n \"max_tokens\": 1024,\n \"messages\": [{\"role\":\"user\",\"content\":\"Latest news\"}],\n \"tools\": [{\"type\":\"web_search_20250305\",\"name\":\"web_search\",\"max_uses\":2}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: gemini/gemini-2.5-flash google_search", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Web search: response non-empty', function () { var raw = JSON.stringify(pm.response.json()); pm.expect(raw.length).to.be.greaterThan(100); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gemini/gemini-2.5-flash\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Latest news\"}],\n \"tools\": [{\"type\":\"google_search\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/gemini-2.5-flash google_search", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Web search: response non-empty', function () { var raw = JSON.stringify(pm.response.json()); pm.expect(raw.length).to.be.greaterThan(100); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/gemini-2.5-flash\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Latest news\"}],\n \"tools\": [{\"type\":\"google_search\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}} + ] + }, + { + "name": "Cross-Cut Round 14: Code Execution Matrix", + "item": [ + { "name": "Cross-cut: anthropic/claude-opus-4-7 code_execution", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-opus-4-7\",\n \"max_tokens\": 1024,\n \"messages\": [{\"role\":\"user\",\"content\":\"Compute 50!\"}],\n \"tools\": [{\"type\":\"code_execution_20250522\",\"name\":\"code_execution\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/claude-opus-4-7 code_execution", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-opus-4-7\",\n \"max_tokens\": 1024,\n \"messages\": [{\"role\":\"user\",\"content\":\"Compute 50!\"}],\n \"tools\": [{\"type\":\"code_execution_20250522\",\"name\":\"code_execution\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/claude-opus-4-7 code_execution", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-opus-4-7\",\n \"max_tokens\": 1024,\n \"messages\": [{\"role\":\"user\",\"content\":\"Compute 50!\"}],\n \"tools\": [{\"type\":\"code_execution_20250522\",\"name\":\"code_execution\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}} + ] + }, + { + "name": "Cross-Cut Round 15: Extended/Adaptive Thinking Matrix", + "item": [ + { "name": "Cross-cut: anthropic/claude-opus-4-7 adaptive thinking", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-opus-4-7\",\n \"max_tokens\": 4096,\n \"thinking\": {\"type\":\"adaptive\"},\n \"messages\": [{\"role\":\"user\",\"content\":\"Solve in steps\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-sonnet-4-6 enabled thinking", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-sonnet-4-6\",\n \"max_tokens\": 4096,\n \"thinking\": {\"type\":\"enabled\",\"budget_tokens\":2000},\n \"messages\": [{\"role\":\"user\",\"content\":\"Solve in steps\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/claude-opus-4-7 adaptive thinking", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-opus-4-7\",\n \"max_tokens\": 4096,\n \"thinking\": {\"type\":\"adaptive\"},\n \"messages\": [{\"role\":\"user\",\"content\":\"Solve in steps\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/claude-sonnet-4-6 enabled thinking", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-sonnet-4-6\",\n \"max_tokens\": 4096,\n \"thinking\": {\"type\":\"enabled\",\"budget_tokens\":2000},\n \"messages\": [{\"role\":\"user\",\"content\":\"Solve in steps\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/claude-opus-4-7 adaptive thinking", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-opus-4-7\",\n \"max_tokens\": 4096,\n \"thinking\": {\"type\":\"adaptive\"},\n \"messages\": [{\"role\":\"user\",\"content\":\"Solve in steps\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/claude-sonnet-4-6 enabled thinking", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-sonnet-4-6\",\n \"max_tokens\": 4096,\n \"thinking\": {\"type\":\"enabled\",\"budget_tokens\":2000},\n \"messages\": [{\"role\":\"user\",\"content\":\"Solve in steps\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}} + ] + }, + { + "name": "Cross-Cut Round 16: Prompt Caching Matrix", + "item": [ + { "name": "Cross-cut: anthropic/claude-opus-4-7 prompt caching", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Prompt caching: usage present', function () { var j = pm.response.json(); pm.expect(j.usage || {}).to.be.an('object'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-opus-4-7\",\n \"max_tokens\": 256,\n \"system\": [{\"type\":\"text\",\"text\":\"Long ctx\",\"cache_control\":{\"type\":\"ephemeral\"}}],\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-haiku-4-5 prompt caching", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Prompt caching: usage present', function () { var j = pm.response.json(); pm.expect(j.usage || {}).to.be.an('object'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-haiku-4-5\",\n \"max_tokens\": 256,\n \"system\": [{\"type\":\"text\",\"text\":\"Long ctx\",\"cache_control\":{\"type\":\"ephemeral\"}}],\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/claude-opus-4-7 prompt caching", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Prompt caching: usage present', function () { var j = pm.response.json(); pm.expect(j.usage || {}).to.be.an('object'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-opus-4-7\",\n \"max_tokens\": 256,\n \"system\": [{\"type\":\"text\",\"text\":\"Long ctx\",\"cache_control\":{\"type\":\"ephemeral\"}}],\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/claude-haiku-4-5 prompt caching", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Prompt caching: usage present', function () { var j = pm.response.json(); pm.expect(j.usage || {}).to.be.an('object'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-haiku-4-5-20251001-v1:0\",\n \"max_tokens\": 256,\n \"system\": [{\"type\":\"text\",\"text\":\"Long ctx\",\"cache_control\":{\"type\":\"ephemeral\"}}],\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/claude-opus-4-7 prompt caching", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Prompt caching: usage present', function () { var j = pm.response.json(); pm.expect(j.usage || {}).to.be.an('object'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-opus-4-7\",\n \"max_tokens\": 256,\n \"system\": [{\"type\":\"text\",\"text\":\"Long ctx\",\"cache_control\":{\"type\":\"ephemeral\"}}],\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/claude-sonnet-4-6 prompt caching", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Prompt caching: usage present', function () { var j = pm.response.json(); pm.expect(j.usage || {}).to.be.an('object'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-sonnet-4-6\",\n \"max_tokens\": 256,\n \"system\": [{\"type\":\"text\",\"text\":\"Long ctx\",\"cache_control\":{\"type\":\"ephemeral\"}}],\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}} + ] + }, + { + "name": "Cross-Cut Round 17: PDF Input Matrix", + "item": [ + { "name": "Cross-cut: anthropic/claude-opus-4-7 PDF input", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-opus-4-7\",\n \"max_tokens\": 512,\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"document\",\"source\":{\"type\":\"url\",\"url\":\"https://www.berkshirehathaway.com/letters/2024ltr.pdf\"}},{\"type\":\"text\",\"text\":\"Summarize\"}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: anthropic/claude-sonnet-4-6 PDF input", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"anthropic/claude-sonnet-4-6\",\n \"max_tokens\": 512,\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"document\",\"source\":{\"type\":\"url\",\"url\":\"https://www.berkshirehathaway.com/letters/2024ltr.pdf\"}},{\"type\":\"text\",\"text\":\"Summarize\"}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: bedrock/claude-opus-4-7 PDF input", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"bedrock/global.anthropic.claude-opus-4-7\",\n \"max_tokens\": 512,\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"document\",\"source\":{\"type\":\"url\",\"url\":\"https://www.berkshirehathaway.com/letters/2024ltr.pdf\"}},{\"type\":\"text\",\"text\":\"Summarize\"}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}}, + { "name": "Cross-cut: vertex/claude-opus-4-7 PDF input", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"vertex/claude-opus-4-7\",\n \"max_tokens\": 512,\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"document\",\"source\":{\"type\":\"url\",\"url\":\"https://www.berkshirehathaway.com/letters/2024ltr.pdf\"}},{\"type\":\"text\",\"text\":\"Summarize\"}]}]\n}"}, "url": {"raw":"{{baseUrl}}/v1/chat/completions","host":["{{baseUrl}}"],"path":["v1","chat","completions"]}}} + ] + }, + { + "name": "Cross-Cut Round 18: Cohere Drop-in Smoke", + "item": [ + { "name": "Cohere drop-in: basic chat", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Cohere shape: message.content non-empty', function () { var j = pm.response.json(); var c = j.message && j.message.content; pm.expect(Array.isArray(c) ? c.length > 0 : (typeof c === 'string' && c.length > 0), 'expected non-empty cohere message content').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"command-r-plus\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hello\"}]\n}"}, "url": {"raw":"{{baseUrl}}/cohere/v2/chat","host":["{{baseUrl}}"],"path":["cohere", "v2", "chat"]} } }, + { "name": "Cohere drop-in: streaming", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var raw = JSON.stringify(j); pm.expect(raw.length, 'body too small').to.be.greaterThan(50); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"command-r-plus\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}]\n,\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/cohere/v2/chat","host":["{{baseUrl}}"],"path":["cohere", "v2", "chat"]} } }, + { "name": "Cohere drop-in: multi-turn", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Cohere shape: message.content non-empty', function () { var j = pm.response.json(); var c = j.message && j.message.content; pm.expect(Array.isArray(c) ? c.length > 0 : (typeof c === 'string' && c.length > 0), 'expected non-empty cohere message content').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"command-r-plus\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"},{\"role\":\"assistant\",\"content\":\"Hello\"},{\"role\":\"user\",\"content\":\"How are you?\"}]\n}"}, "url": {"raw":"{{baseUrl}}/cohere/v2/chat","host":["{{baseUrl}}"],"path":["cohere", "v2", "chat"]} } }, + { "name": "Cohere drop-in: tools", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Cohere shape: message.content non-empty', function () { var j = pm.response.json(); var c = j.message && j.message.content; pm.expect(Array.isArray(c) ? c.length > 0 : (typeof c === 'string' && c.length > 0), 'expected non-empty cohere message content').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"command-r-plus\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/cohere/v2/chat","host":["{{baseUrl}}"],"path":["cohere", "v2", "chat"]} } }, + { "name": "Cohere drop-in: list models", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Response content non-empty', function () { var j = pm.response.json(); var raw = JSON.stringify(j); pm.expect(raw.length, 'body too small').to.be.greaterThan(50); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":""}, "url": {"raw":"{{baseUrl}}/cohere/v1/models","host":["{{baseUrl}}"],"path":["cohere", "v1", "models"]} } } + ] + }, + { + "name": "Cross-Cut Round 19: LangChain Drop-in Smoke", + "item": [ + { "name": "LangChain drop-in: OpenAI shape chat", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('OpenAI shape: choices[0].message.content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && (j.choices[0].message.content || (j.choices[0].message.tool_calls && j.choices[0].message.tool_calls.length))) || ''; pm.expect(c, 'no content or tool_calls').to.be.ok; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gpt-4o-mini\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hello\"}]\n}"}, "url": {"raw":"{{baseUrl}}/langchain/v1/chat/completions","host":["{{baseUrl}}"],"path":["langchain", "v1", "chat", "completions"]} } }, + { "name": "LangChain drop-in: Anthropic shape chat", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Anthropic shape: content array non-empty', function () { var j = pm.response.json(); pm.expect(Array.isArray(j.content) && j.content.length > 0, 'expected non-empty content array').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-haiku-4-5\",\n \"max_tokens\": 256,\n \"messages\": [{\"role\":\"user\",\"content\":\"Hello\"}]\n}"}, "url": {"raw":"{{baseUrl}}/langchain/v1/messages","host":["{{baseUrl}}"],"path":["langchain", "v1", "messages"]} } }, + { "name": "LangChain drop-in: Gemini shape chat", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Gemini shape: candidates[0].content.parts non-empty', function () { var j = pm.response.json(); var parts = j.candidates && j.candidates[0] && j.candidates[0].content && j.candidates[0].content.parts; pm.expect(Array.isArray(parts) && parts.length > 0, 'expected non-empty parts').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Hello\"}]}]\n}"}, "url": {"raw":"{{baseUrl}}/langchain/v1beta/models/gemini-2.5-flash:generateContent","host":["{{baseUrl}}"],"path":["langchain", "v1beta", "models", "gemini-2.5-flash:generateContent"]} } }, + { "name": "LangChain drop-in: Bedrock shape converse", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Bedrock Converse shape: output.message.content non-empty', function () { var j = pm.response.json(); var content = j.output && j.output.message && j.output.message.content; pm.expect(Array.isArray(content) && content.length > 0, 'expected non-empty content').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"messages\": [{\"role\":\"user\",\"content\":[{\"text\":\"Hello\"}]}],\n \"inferenceConfig\": {\"maxTokens\": 256}\n}"}, "url": {"raw":"{{baseUrl}}/langchain/model/global.anthropic.claude-haiku-4-5-20251001-v1:0/converse","host":["{{baseUrl}}"],"path":["langchain", "model", "global.anthropic.claude-haiku-4-5-20251001-v1:0", "converse"]} } }, + { "name": "LangChain drop-in: Cohere shape chat", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Cohere shape: message.content non-empty', function () { var j = pm.response.json(); var c = j.message && j.message.content; pm.expect(Array.isArray(c) ? c.length > 0 : (typeof c === 'string' && c.length > 0), 'expected non-empty cohere message content').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"command-r-plus\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hello\"}]\n}"}, "url": {"raw":"{{baseUrl}}/langchain/v2/chat","host":["{{baseUrl}}"],"path":["langchain", "v2", "chat"]} } } + ] + }, + { + "name": "Cross-Cut Round 20: LiteLLM Drop-in Smoke", + "item": [ + { "name": "LiteLLM drop-in: OpenAI shape chat", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('OpenAI shape: choices[0].message.content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && (j.choices[0].message.content || (j.choices[0].message.tool_calls && j.choices[0].message.tool_calls.length))) || ''; pm.expect(c, 'no content or tool_calls').to.be.ok; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gpt-4o-mini\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hello\"}]\n}"}, "url": {"raw":"{{baseUrl}}/litellm/v1/chat/completions","host":["{{baseUrl}}"],"path":["litellm", "v1", "chat", "completions"]} } }, + { "name": "LiteLLM drop-in: Anthropic shape chat", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Anthropic shape: content array non-empty', function () { var j = pm.response.json(); pm.expect(Array.isArray(j.content) && j.content.length > 0, 'expected non-empty content array').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-haiku-4-5\",\n \"max_tokens\": 256,\n \"messages\": [{\"role\":\"user\",\"content\":\"Hello\"}]\n}"}, "url": {"raw":"{{baseUrl}}/litellm/v1/messages","host":["{{baseUrl}}"],"path":["litellm", "v1", "messages"]} } }, + { "name": "LiteLLM drop-in: Gemini shape chat", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Gemini shape: candidates[0].content.parts non-empty', function () { var j = pm.response.json(); var parts = j.candidates && j.candidates[0] && j.candidates[0].content && j.candidates[0].content.parts; pm.expect(Array.isArray(parts) && parts.length > 0, 'expected non-empty parts').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Hello\"}]}]\n}"}, "url": {"raw":"{{baseUrl}}/litellm/v1beta/models/gemini-2.5-flash:generateContent","host":["{{baseUrl}}"],"path":["litellm", "v1beta", "models", "gemini-2.5-flash:generateContent"]} } }, + { "name": "LiteLLM drop-in: Bedrock shape converse", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Bedrock Converse shape: output.message.content non-empty', function () { var j = pm.response.json(); var content = j.output && j.output.message && j.output.message.content; pm.expect(Array.isArray(content) && content.length > 0, 'expected non-empty content').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"messages\": [{\"role\":\"user\",\"content\":[{\"text\":\"Hello\"}]}],\n \"inferenceConfig\": {\"maxTokens\": 256}\n}"}, "url": {"raw":"{{baseUrl}}/litellm/model/global.anthropic.claude-haiku-4-5-20251001-v1:0/converse","host":["{{baseUrl}}"],"path":["litellm", "model", "global.anthropic.claude-haiku-4-5-20251001-v1:0", "converse"]} } }, + { "name": "LiteLLM drop-in: Cohere shape chat", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Cohere shape: message.content non-empty', function () { var j = pm.response.json(); var c = j.message && j.message.content; pm.expect(Array.isArray(c) ? c.length > 0 : (typeof c === 'string' && c.length > 0), 'expected non-empty cohere message content').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"command-r-plus\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hello\"}]\n}"}, "url": {"raw":"{{baseUrl}}/litellm/v2/chat","host":["{{baseUrl}}"],"path":["litellm", "v2", "chat"]} } } + ] + }, + { + "name": "Cross-Cut Round 21: PydanticAI Drop-in Smoke", + "item": [ + { "name": "PydanticAI drop-in: OpenAI shape chat", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('OpenAI shape: choices[0].message.content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && (j.choices[0].message.content || (j.choices[0].message.tool_calls && j.choices[0].message.tool_calls.length))) || ''; pm.expect(c, 'no content or tool_calls').to.be.ok; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gpt-4o-mini\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hello\"}]\n}"}, "url": {"raw":"{{baseUrl}}/pydanticai/v1/chat/completions","host":["{{baseUrl}}"],"path":["pydanticai", "v1", "chat", "completions"]} } }, + { "name": "PydanticAI drop-in: Anthropic shape chat", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Anthropic shape: content array non-empty', function () { var j = pm.response.json(); pm.expect(Array.isArray(j.content) && j.content.length > 0, 'expected non-empty content array').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-haiku-4-5\",\n \"max_tokens\": 256,\n \"messages\": [{\"role\":\"user\",\"content\":\"Hello\"}]\n}"}, "url": {"raw":"{{baseUrl}}/pydanticai/v1/messages","host":["{{baseUrl}}"],"path":["pydanticai", "v1", "messages"]} } }, + { "name": "PydanticAI drop-in: Gemini shape chat", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Gemini shape: candidates[0].content.parts non-empty', function () { var j = pm.response.json(); var parts = j.candidates && j.candidates[0] && j.candidates[0].content && j.candidates[0].content.parts; pm.expect(Array.isArray(parts) && parts.length > 0, 'expected non-empty parts').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Hello\"}]}]\n}"}, "url": {"raw":"{{baseUrl}}/pydanticai/v1beta/models/gemini-2.5-flash:generateContent","host":["{{baseUrl}}"],"path":["pydanticai", "v1beta", "models", "gemini-2.5-flash:generateContent"]} } }, + { "name": "PydanticAI drop-in: Bedrock shape converse", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Bedrock Converse shape: output.message.content non-empty', function () { var j = pm.response.json(); var content = j.output && j.output.message && j.output.message.content; pm.expect(Array.isArray(content) && content.length > 0, 'expected non-empty content').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"messages\": [{\"role\":\"user\",\"content\":[{\"text\":\"Hello\"}]}],\n \"inferenceConfig\": {\"maxTokens\": 256}\n}"}, "url": {"raw":"{{baseUrl}}/pydanticai/model/global.anthropic.claude-haiku-4-5-20251001-v1:0/converse","host":["{{baseUrl}}"],"path":["pydanticai", "model", "global.anthropic.claude-haiku-4-5-20251001-v1:0", "converse"]} } }, + { "name": "PydanticAI drop-in: Cohere shape chat", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Cohere shape: message.content non-empty', function () { var j = pm.response.json(); var c = j.message && j.message.content; pm.expect(Array.isArray(c) ? c.length > 0 : (typeof c === 'string' && c.length > 0), 'expected non-empty cohere message content').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"command-r-plus\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hello\"}]\n}"}, "url": {"raw":"{{baseUrl}}/pydanticai/v2/chat","host":["{{baseUrl}}"],"path":["pydanticai", "v2", "chat"]} } } + ] + }, + { + "name": "Cross-Cut Round 22: Cursor Drop-in Smoke", + "item": [ + { "name": "Cursor drop-in: OpenAI shape chat", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('OpenAI shape: choices[0].message.content non-empty', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && (j.choices[0].message.content || (j.choices[0].message.tool_calls && j.choices[0].message.tool_calls.length))) || ''; pm.expect(c, 'no content or tool_calls').to.be.ok; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gpt-4o-mini\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Hello\"}]\n}"}, "url": {"raw":"{{baseUrl}}/cursor/v1/chat/completions","host":["{{baseUrl}}"],"path":["cursor", "v1", "chat", "completions"]} } }, + { "name": "Cursor drop-in: Anthropic shape chat", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Anthropic shape: content array non-empty', function () { var j = pm.response.json(); pm.expect(Array.isArray(j.content) && j.content.length > 0, 'expected non-empty content array').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-haiku-4-5\",\n \"max_tokens\": 256,\n \"messages\": [{\"role\":\"user\",\"content\":\"Hello\"}]\n}"}, "url": {"raw":"{{baseUrl}}/cursor/v1/messages","host":["{{baseUrl}}"],"path":["cursor", "v1", "messages"]} } }, + { "name": "Cursor drop-in: Gemini shape chat", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Gemini shape: candidates[0].content.parts non-empty', function () { var j = pm.response.json(); var parts = j.candidates && j.candidates[0] && j.candidates[0].content && j.candidates[0].content.parts; pm.expect(Array.isArray(parts) && parts.length > 0, 'expected non-empty parts').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Hello\"}]}]\n}"}, "url": {"raw":"{{baseUrl}}/cursor/v1beta/models/gemini-2.5-flash:generateContent","host":["{{baseUrl}}"],"path":["cursor", "v1beta", "models", "gemini-2.5-flash:generateContent"]} } }, + { "name": "Cursor drop-in: Bedrock shape converse", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Bedrock Converse shape: output.message.content non-empty', function () { var j = pm.response.json(); var content = j.output && j.output.message && j.output.message.content; pm.expect(Array.isArray(content) && content.length > 0, 'expected non-empty content').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"messages\": [{\"role\":\"user\",\"content\":[{\"text\":\"Hello\"}]}],\n \"inferenceConfig\": {\"maxTokens\": 256}\n}"}, "url": {"raw":"{{baseUrl}}/cursor/model/global.anthropic.claude-haiku-4-5-20251001-v1:0/converse","host":["{{baseUrl}}"],"path":["cursor", "model", "global.anthropic.claude-haiku-4-5-20251001-v1:0", "converse"]} } } + ] + }, + { + "name": "Cross-Cut Round 23: Drop-in Structured Output Matrix (native shapes via /openai, /anthropic, /bedrock, /genai)", + "item": [ + { "name": "Drop-in /openai: gpt-5 (json_schema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: JSON with city/country/pop', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('not JSON: ' + e.message); return; } pm.expect(p).to.have.property('city').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); pm.expect(p).to.have.property('pop').that.is.a('number'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"Authorization","value":"Bearer {{openaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gpt-5\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Extract city/country/pop for Paris.\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"},\"pop\":{\"type\":\"number\"}},\"required\":[\"city\",\"country\",\"pop\"],\"additionalProperties\":false}}}\n}"}, "url": {"raw":"{{baseUrl}}/openai/v1/chat/completions","host":["{{baseUrl}}"],"path":["openai", "v1", "chat", "completions"]} } }, + { "name": "Drop-in /openai: gpt-4o (json_schema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: JSON with city/country/pop', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('not JSON: ' + e.message); return; } pm.expect(p).to.have.property('city').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); pm.expect(p).to.have.property('pop').that.is.a('number'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"Authorization","value":"Bearer {{openaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gpt-4o\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Extract city/country/pop for Paris.\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"},\"pop\":{\"type\":\"number\"}},\"required\":[\"city\",\"country\",\"pop\"],\"additionalProperties\":false}}}\n}"}, "url": {"raw":"{{baseUrl}}/openai/v1/chat/completions","host":["{{baseUrl}}"],"path":["openai", "v1", "chat", "completions"]} } }, + { "name": "Drop-in /openai: gpt-4o-mini (json_schema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: JSON with city/country/pop', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('not JSON: ' + e.message); return; } pm.expect(p).to.have.property('city').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); pm.expect(p).to.have.property('pop').that.is.a('number'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"Authorization","value":"Bearer {{openaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gpt-4o-mini\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Extract city/country/pop for Paris.\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"},\"pop\":{\"type\":\"number\"}},\"required\":[\"city\",\"country\",\"pop\"],\"additionalProperties\":false}}}\n}"}, "url": {"raw":"{{baseUrl}}/openai/v1/chat/completions","host":["{{baseUrl}}"],"path":["openai", "v1", "chat", "completions"]} } }, + { "name": "Drop-in /anthropic: claude-haiku-4-5 (forced tool emit_city)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Anthropic shape: content array non-empty', function () { var j = pm.response.json(); pm.expect(Array.isArray(j.content) && j.content.length > 0, 'expected non-empty content array').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-api-key","value":"{{anthropicKey}}"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-haiku-4-5\",\n \"max_tokens\": 1024,\n \"messages\": [{\"role\":\"user\",\"content\":\"Extract city/country/pop for Paris.\"}],\n \"tools\": [{\"name\":\"emit_city\",\"input_schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"},\"pop\":{\"type\":\"number\"}},\"required\":[\"city\",\"country\",\"pop\"]}}],\n \"tool_choice\": {\"type\":\"tool\",\"name\":\"emit_city\"}\n}"}, "url": {"raw":"{{baseUrl}}/anthropic/v1/messages","host":["{{baseUrl}}"],"path":["anthropic", "v1", "messages"]} } }, + { "name": "Drop-in /anthropic: claude-sonnet-4-6 (forced tool emit_city)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Anthropic shape: content array non-empty', function () { var j = pm.response.json(); pm.expect(Array.isArray(j.content) && j.content.length > 0, 'expected non-empty content array').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-api-key","value":"{{anthropicKey}}"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-sonnet-4-6\",\n \"max_tokens\": 1024,\n \"messages\": [{\"role\":\"user\",\"content\":\"Extract city/country/pop for Paris.\"}],\n \"tools\": [{\"name\":\"emit_city\",\"input_schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"},\"pop\":{\"type\":\"number\"}},\"required\":[\"city\",\"country\",\"pop\"]}}],\n \"tool_choice\": {\"type\":\"tool\",\"name\":\"emit_city\"}\n}"}, "url": {"raw":"{{baseUrl}}/anthropic/v1/messages","host":["{{baseUrl}}"],"path":["anthropic", "v1", "messages"]} } }, + { "name": "Drop-in /bedrock: claude-haiku Converse (toolChoice emit_city)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Bedrock Converse shape: output.message.content non-empty', function () { var j = pm.response.json(); var content = j.output && j.output.message && j.output.message.content; pm.expect(Array.isArray(content) && content.length > 0, 'expected non-empty content').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"messages\": [{\"role\":\"user\",\"content\":[{\"text\":\"Extract city/country/pop for Paris.\"}]}],\n \"toolConfig\": {\"tools\":[{\"toolSpec\":{\"name\":\"emit_city\",\"inputSchema\":{\"json\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"},\"pop\":{\"type\":\"number\"}},\"required\":[\"city\",\"country\",\"pop\"]}}}}],\"toolChoice\":{\"tool\":{\"name\":\"emit_city\"}}}\n}"}, "url": {"raw":"{{baseUrl}}/bedrock/model/global.anthropic.claude-haiku-4-5-20251001-v1:0/converse","host":["{{baseUrl}}"],"path":["bedrock", "model", "global.anthropic.claude-haiku-4-5-20251001-v1:0", "converse"]} } }, + { "name": "Drop-in /genai: gemini-2.5-flash (responseSchema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Gemini shape: candidates[0].content.parts non-empty', function () { var j = pm.response.json(); var parts = j.candidates && j.candidates[0] && j.candidates[0].content && j.candidates[0].content.parts; pm.expect(Array.isArray(parts) && parts.length > 0, 'expected non-empty parts').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Extract city/country/pop for Paris.\"}]}],\n \"generationConfig\": {\"responseMimeType\":\"application/json\",\"responseSchema\":{\"type\":\"OBJECT\",\"properties\":{\"city\":{\"type\":\"STRING\"},\"country\":{\"type\":\"STRING\"},\"pop\":{\"type\":\"NUMBER\"}}}}\n}"}, "url": {"raw":"{{baseUrl}}/genai/v1beta/models/gemini-2.5-flash:generateContent","host":["{{baseUrl}}"],"path":["genai", "v1beta", "models", "gemini-2.5-flash:generateContent"]} } }, + { "name": "Drop-in /genai: gemini-2.5-pro (responseSchema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Gemini shape: candidates[0].content.parts non-empty', function () { var j = pm.response.json(); var parts = j.candidates && j.candidates[0] && j.candidates[0].content && j.candidates[0].content.parts; pm.expect(Array.isArray(parts) && parts.length > 0, 'expected non-empty parts').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Extract city/country/pop for Paris.\"}]}],\n \"generationConfig\": {\"responseMimeType\":\"application/json\",\"responseSchema\":{\"type\":\"OBJECT\",\"properties\":{\"city\":{\"type\":\"STRING\"},\"country\":{\"type\":\"STRING\"},\"pop\":{\"type\":\"NUMBER\"}}}}\n}"}, "url": {"raw":"{{baseUrl}}/genai/v1beta/models/gemini-2.5-pro:generateContent","host":["{{baseUrl}}"],"path":["genai", "v1beta", "models", "gemini-2.5-pro:generateContent"]} } } + ] + }, + { + "name": "Cross-Cut Round 24: Drop-in Function Calling Matrix (native shapes)", + "item": [ + { "name": "Drop-in /openai: gpt-5 function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked', function () { var j = pm.response.json(); var tc = j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.tool_calls && j.choices[0].message.tool_calls[0]; pm.expect(tc, 'no tool_calls').to.be.ok; pm.expect(tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('args not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"Authorization","value":"Bearer {{openaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gpt-5\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/openai/v1/chat/completions","host":["{{baseUrl}}"],"path":["openai", "v1", "chat", "completions"]} } }, + { "name": "Drop-in /openai: gpt-4o function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked', function () { var j = pm.response.json(); var tc = j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.tool_calls && j.choices[0].message.tool_calls[0]; pm.expect(tc, 'no tool_calls').to.be.ok; pm.expect(tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('args not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"Authorization","value":"Bearer {{openaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gpt-4o\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/openai/v1/chat/completions","host":["{{baseUrl}}"],"path":["openai", "v1", "chat", "completions"]} } }, + { "name": "Drop-in /openai: gpt-4o-mini function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked', function () { var j = pm.response.json(); var tc = j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.tool_calls && j.choices[0].message.tool_calls[0]; pm.expect(tc, 'no tool_calls').to.be.ok; pm.expect(tc.function.name).to.equal('get_weather'); var a; try { a = JSON.parse(tc.function.arguments); } catch (e) { pm.expect.fail('args not JSON: ' + e.message); return; } pm.expect(a).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"Authorization","value":"Bearer {{openaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gpt-4o-mini\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/openai/v1/chat/completions","host":["{{baseUrl}}"],"path":["openai", "v1", "chat", "completions"]} } }, + { "name": "Drop-in /anthropic: claude-opus-4-7 tool_use", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Anthropic: tool_use block with get_weather', function () { var j = pm.response.json(); var tu = (j.content || []).find(function (b) { return b.type === 'tool_use'; }); pm.expect(tu, 'no tool_use block').to.be.ok; pm.expect(tu.name).to.equal('get_weather'); pm.expect(tu.input).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-api-key","value":"{{anthropicKey}}"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-opus-4-7\",\n \"max_tokens\": 1024,\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"name\":\"get_weather\",\"input_schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}]\n}"}, "url": {"raw":"{{baseUrl}}/anthropic/v1/messages","host":["{{baseUrl}}"],"path":["anthropic", "v1", "messages"]} } }, + { "name": "Drop-in /anthropic: claude-sonnet-4-6 tool_use", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Anthropic: tool_use block with get_weather', function () { var j = pm.response.json(); var tu = (j.content || []).find(function (b) { return b.type === 'tool_use'; }); pm.expect(tu, 'no tool_use block').to.be.ok; pm.expect(tu.name).to.equal('get_weather'); pm.expect(tu.input).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-api-key","value":"{{anthropicKey}}"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-sonnet-4-6\",\n \"max_tokens\": 1024,\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"name\":\"get_weather\",\"input_schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}]\n}"}, "url": {"raw":"{{baseUrl}}/anthropic/v1/messages","host":["{{baseUrl}}"],"path":["anthropic", "v1", "messages"]} } }, + { "name": "Drop-in /anthropic: claude-haiku-4-5 tool_use", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Anthropic: tool_use block with get_weather', function () { var j = pm.response.json(); var tu = (j.content || []).find(function (b) { return b.type === 'tool_use'; }); pm.expect(tu, 'no tool_use block').to.be.ok; pm.expect(tu.name).to.equal('get_weather'); pm.expect(tu.input).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-api-key","value":"{{anthropicKey}}"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-haiku-4-5\",\n \"max_tokens\": 1024,\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"name\":\"get_weather\",\"input_schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}]\n}"}, "url": {"raw":"{{baseUrl}}/anthropic/v1/messages","host":["{{baseUrl}}"],"path":["anthropic", "v1", "messages"]} } }, + { "name": "Drop-in /bedrock: claude-sonnet-4-6 Converse tool_use", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Bedrock Converse shape: output.message.content non-empty', function () { var j = pm.response.json(); var content = j.output && j.output.message && j.output.message.content; pm.expect(Array.isArray(content) && content.length > 0, 'expected non-empty content').to.be.true; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"messages\": [{\"role\":\"user\",\"content\":[{\"text\":\"Weather in Lagos?\"}]}],\n \"toolConfig\": {\"tools\":[{\"toolSpec\":{\"name\":\"get_weather\",\"inputSchema\":{\"json\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}}]}\n}"}, "url": {"raw":"{{baseUrl}}/bedrock/model/global.anthropic.claude-sonnet-4-6/converse","host":["{{baseUrl}}"],"path":["bedrock", "model", "global.anthropic.claude-sonnet-4-6", "converse"]} } }, + { "name": "Drop-in /genai: gemini-2.5-flash functionDeclarations", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Gemini: functionCall in parts', function () { var j = pm.response.json(); var parts = (j.candidates && j.candidates[0] && j.candidates[0].content && j.candidates[0].content.parts) || []; var fc = parts.find(function (p) { return p && p.functionCall; }); pm.expect(fc, 'no functionCall').to.be.ok; pm.expect(fc.functionCall.name).to.equal('get_weather'); pm.expect(fc.functionCall.args).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Weather in Lagos?\"}]}],\n \"tools\": [{\"functionDeclarations\":[{\"name\":\"get_weather\",\"parameters\":{\"type\":\"OBJECT\",\"properties\":{\"city\":{\"type\":\"STRING\"}},\"required\":[\"city\"]}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/genai/v1beta/models/gemini-2.5-flash:generateContent","host":["{{baseUrl}}"],"path":["genai", "v1beta", "models", "gemini-2.5-flash:generateContent"]} } }, + { "name": "Drop-in /genai: gemini-2.5-pro functionDeclarations", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Gemini: functionCall in parts', function () { var j = pm.response.json(); var parts = (j.candidates && j.candidates[0] && j.candidates[0].content && j.candidates[0].content.parts) || []; var fc = parts.find(function (p) { return p && p.functionCall; }); pm.expect(fc, 'no functionCall').to.be.ok; pm.expect(fc.functionCall.name).to.equal('get_weather'); pm.expect(fc.functionCall.args).to.have.property('city').that.is.a('string'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Weather in Lagos?\"}]}],\n \"tools\": [{\"functionDeclarations\":[{\"name\":\"get_weather\",\"parameters\":{\"type\":\"OBJECT\",\"properties\":{\"city\":{\"type\":\"STRING\"}},\"required\":[\"city\"]}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/genai/v1beta/models/gemini-2.5-pro:generateContent","host":["{{baseUrl}}"],"path":["genai", "v1beta", "models", "gemini-2.5-pro:generateContent"]} } } + ] + }, + { + "name": "Cross-Cut Round 25: Drop-in Vision Matrix (native shapes)", + "item": [ + { "name": "Drop-in /openai: gpt-5 vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: response describes image', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; pm.expect(c.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"Authorization","value":"Bearer {{openaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gpt-5\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/openai/v1/chat/completions","host":["{{baseUrl}}"],"path":["openai", "v1", "chat", "completions"]} } }, + { "name": "Drop-in /openai: gpt-4o vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: response describes image', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; pm.expect(c.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"Authorization","value":"Bearer {{openaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gpt-4o\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/openai/v1/chat/completions","host":["{{baseUrl}}"],"path":["openai", "v1", "chat", "completions"]} } }, + { "name": "Drop-in /openai: gpt-4.1 vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: response describes image', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; pm.expect(c.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"Authorization","value":"Bearer {{openaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gpt-4.1\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/openai/v1/chat/completions","host":["{{baseUrl}}"],"path":["openai", "v1", "chat", "completions"]} } }, + { "name": "Drop-in /anthropic: claude-opus-4-7 vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Anthropic vision: text block describes image', function () { var j = pm.response.json(); var t = (j.content || []).find(function (b) { return b.type === 'text' && b.text; }); pm.expect(t, 'no text block').to.be.ok; pm.expect(t.text.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-api-key","value":"{{anthropicKey}}"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-opus-4-7\",\n \"max_tokens\": 512,\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"image\",\"source\":{\"type\":\"url\",\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}},{\"type\":\"text\",\"text\":\"Describe\"}]}]\n}"}, "url": {"raw":"{{baseUrl}}/anthropic/v1/messages","host":["{{baseUrl}}"],"path":["anthropic", "v1", "messages"]} } }, + { "name": "Drop-in /anthropic: claude-sonnet-4-6 vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Anthropic vision: text block describes image', function () { var j = pm.response.json(); var t = (j.content || []).find(function (b) { return b.type === 'text' && b.text; }); pm.expect(t, 'no text block').to.be.ok; pm.expect(t.text.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-api-key","value":"{{anthropicKey}}"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-sonnet-4-6\",\n \"max_tokens\": 512,\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"image\",\"source\":{\"type\":\"url\",\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}},{\"type\":\"text\",\"text\":\"Describe\"}]}]\n}"}, "url": {"raw":"{{baseUrl}}/anthropic/v1/messages","host":["{{baseUrl}}"],"path":["anthropic", "v1", "messages"]} } }, + { "name": "Drop-in /anthropic: claude-haiku-4-5 vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Anthropic vision: text block describes image', function () { var j = pm.response.json(); var t = (j.content || []).find(function (b) { return b.type === 'text' && b.text; }); pm.expect(t, 'no text block').to.be.ok; pm.expect(t.text.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-api-key","value":"{{anthropicKey}}"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-haiku-4-5\",\n \"max_tokens\": 512,\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"image\",\"source\":{\"type\":\"url\",\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}},{\"type\":\"text\",\"text\":\"Describe\"}]}]\n}"}, "url": {"raw":"{{baseUrl}}/anthropic/v1/messages","host":["{{baseUrl}}"],"path":["anthropic", "v1", "messages"]} } }, + { "name": "Drop-in /genai: gemini-2.5-flash vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Gemini vision: text part describes image', function () { var j = pm.response.json(); var parts = (j.candidates && j.candidates[0] && j.candidates[0].content && j.candidates[0].content.parts) || []; var t = parts.find(function (p) { return p && p.text; }); pm.expect(t, 'no text part').to.be.ok; pm.expect(t.text.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Describe\"},{\"fileData\":{\"mimeType\":\"image/jpeg\",\"fileUri\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/genai/v1beta/models/gemini-2.5-flash:generateContent","host":["{{baseUrl}}"],"path":["genai", "v1beta", "models", "gemini-2.5-flash:generateContent"]} } }, + { "name": "Drop-in /genai: gemini-2.5-pro vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Gemini vision: text part describes image', function () { var j = pm.response.json(); var parts = (j.candidates && j.candidates[0] && j.candidates[0].content && j.candidates[0].content.parts) || []; var t = parts.find(function (p) { return p && p.text; }); pm.expect(t, 'no text part').to.be.ok; pm.expect(t.text.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Describe\"},{\"fileData\":{\"mimeType\":\"image/jpeg\",\"fileUri\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/genai/v1beta/models/gemini-2.5-pro:generateContent","host":["{{baseUrl}}"],"path":["genai", "v1beta", "models", "gemini-2.5-pro:generateContent"]} } } + ] + }, + { + "name": "Cross-Cut Round 26: Drop-in Streaming Matrix (native shapes)", + "item": [ + { "name": "Drop-in /openai stream: gpt-4o", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: SSE response', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected event-stream, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"Authorization","value":"Bearer {{openaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gpt-4o\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/openai/v1/chat/completions","host":["{{baseUrl}}"],"path":["openai", "v1", "chat", "completions"]} } }, + { "name": "Drop-in /openai stream: gpt-4o-mini", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: SSE response', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected event-stream, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"Authorization","value":"Bearer {{openaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gpt-4o-mini\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/openai/v1/chat/completions","host":["{{baseUrl}}"],"path":["openai", "v1", "chat", "completions"]} } }, + { "name": "Drop-in /openai stream: gpt-5", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: SSE response', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected event-stream, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"Authorization","value":"Bearer {{openaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gpt-5\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/openai/v1/chat/completions","host":["{{baseUrl}}"],"path":["openai", "v1", "chat", "completions"]} } }, + { "name": "Drop-in /anthropic stream: claude-opus-4-7", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: SSE response', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected event-stream, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-api-key","value":"{{anthropicKey}}"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-opus-4-7\",\n \"max_tokens\": 256,\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/anthropic/v1/messages","host":["{{baseUrl}}"],"path":["anthropic", "v1", "messages"]} } }, + { "name": "Drop-in /anthropic stream: claude-sonnet-4-6", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: SSE response', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected event-stream, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-api-key","value":"{{anthropicKey}}"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-sonnet-4-6\",\n \"max_tokens\": 256,\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/anthropic/v1/messages","host":["{{baseUrl}}"],"path":["anthropic", "v1", "messages"]} } }, + { "name": "Drop-in /anthropic stream: claude-haiku-4-5", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: SSE response', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected event-stream, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-api-key","value":"{{anthropicKey}}"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-haiku-4-5\",\n \"max_tokens\": 256,\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/anthropic/v1/messages","host":["{{baseUrl}}"],"path":["anthropic", "v1", "messages"]} } }, + { "name": "Drop-in /bedrock stream: claude-haiku Converse", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Bedrock stream: AWS event-stream or SSE', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected stream content-type, got ' + ct).to.match(/event-stream|vnd\\.amazon\\.eventstream/); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"messages\": [{\"role\":\"user\",\"content\":[{\"text\":\"Count 1-5.\"}]}],\n \"inferenceConfig\": {\"maxTokens\": 256}\n}"}, "url": {"raw":"{{baseUrl}}/bedrock/model/global.anthropic.claude-haiku-4-5-20251001-v1:0/converse-stream","host":["{{baseUrl}}"],"path":["bedrock", "model", "global.anthropic.claude-haiku-4-5-20251001-v1:0", "converse-stream"]} } }, + { "name": "Drop-in /genai stream: gemini-2.5-flash", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: SSE response', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected event-stream, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Count 1-5.\"}]}]\n}"}, "url": {"raw":"{{baseUrl}}/genai/v1beta/models/gemini-2.5-flash:streamGenerateContent?alt=sse","host":["{{baseUrl}}"],"path":["genai", "v1beta", "models", "gemini-2.5-flash:streamGenerateContent"],"query":[{"key":"alt","value":"sse"}]} } }, + { "name": "Drop-in /genai stream: gemini-2.5-pro", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: SSE response', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected event-stream, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Count 1-5.\"}]}]\n}"}, "url": {"raw":"{{baseUrl}}/genai/v1beta/models/gemini-2.5-pro:streamGenerateContent?alt=sse","host":["{{baseUrl}}"],"path":["genai", "v1beta", "models", "gemini-2.5-pro:streamGenerateContent"],"query":[{"key":"alt","value":"sse"}]} } } + ] + }, + { + "name": "Cross-Cut Round 27: Drop-in Umbrella Vision Matrix (vision via /langchain, /litellm, /pydanticai, /cursor)", + "item": [ + { "name": "Drop-in /langchain vision: OpenAI shape", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: response describes image', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; pm.expect(c.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gpt-4o\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/langchain/v1/chat/completions","host":["{{baseUrl}}"],"path":["langchain", "v1", "chat", "completions"]} } }, + { "name": "Drop-in /langchain vision: Anthropic shape", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: text block describes image', function () { var j = pm.response.json(); var t = (j.content || []).find(function (b) { return b.type === 'text' && b.text; }); pm.expect(t, 'no text block').to.be.ok; pm.expect(t.text.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-haiku-4-5\",\n \"max_tokens\": 512,\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"image\",\"source\":{\"type\":\"url\",\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}},{\"type\":\"text\",\"text\":\"Describe\"}]}]\n}"}, "url": {"raw":"{{baseUrl}}/langchain/v1/messages","host":["{{baseUrl}}"],"path":["langchain", "v1", "messages"]} } }, + { "name": "Drop-in /langchain vision: Gemini shape", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: text part describes image', function () { var j = pm.response.json(); var parts = (j.candidates && j.candidates[0] && j.candidates[0].content && j.candidates[0].content.parts) || []; var t = parts.find(function (p) { return p && p.text; }); pm.expect(t, 'no text part').to.be.ok; pm.expect(t.text.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Describe\"},{\"fileData\":{\"mimeType\":\"image/jpeg\",\"fileUri\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/langchain/v1beta/models/gemini-2.5-flash:generateContent","host":["{{baseUrl}}"],"path":["langchain", "v1beta", "models", "gemini-2.5-flash:generateContent"]} } }, + { "name": "Drop-in /litellm vision: OpenAI shape", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: response describes image', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; pm.expect(c.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gpt-4o\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/litellm/v1/chat/completions","host":["{{baseUrl}}"],"path":["litellm", "v1", "chat", "completions"]} } }, + { "name": "Drop-in /litellm vision: Anthropic shape", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: text block describes image', function () { var j = pm.response.json(); var t = (j.content || []).find(function (b) { return b.type === 'text' && b.text; }); pm.expect(t, 'no text block').to.be.ok; pm.expect(t.text.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-haiku-4-5\",\n \"max_tokens\": 512,\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"image\",\"source\":{\"type\":\"url\",\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}},{\"type\":\"text\",\"text\":\"Describe\"}]}]\n}"}, "url": {"raw":"{{baseUrl}}/litellm/v1/messages","host":["{{baseUrl}}"],"path":["litellm", "v1", "messages"]} } }, + { "name": "Drop-in /litellm vision: Gemini shape", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: text part describes image', function () { var j = pm.response.json(); var parts = (j.candidates && j.candidates[0] && j.candidates[0].content && j.candidates[0].content.parts) || []; var t = parts.find(function (p) { return p && p.text; }); pm.expect(t, 'no text part').to.be.ok; pm.expect(t.text.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Describe\"},{\"fileData\":{\"mimeType\":\"image/jpeg\",\"fileUri\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/litellm/v1beta/models/gemini-2.5-flash:generateContent","host":["{{baseUrl}}"],"path":["litellm", "v1beta", "models", "gemini-2.5-flash:generateContent"]} } }, + { "name": "Drop-in /pydanticai vision: OpenAI shape", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: response describes image', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; pm.expect(c.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gpt-4o\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/pydanticai/v1/chat/completions","host":["{{baseUrl}}"],"path":["pydanticai", "v1", "chat", "completions"]} } }, + { "name": "Drop-in /pydanticai vision: Anthropic shape", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: text block describes image', function () { var j = pm.response.json(); var t = (j.content || []).find(function (b) { return b.type === 'text' && b.text; }); pm.expect(t, 'no text block').to.be.ok; pm.expect(t.text.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-haiku-4-5\",\n \"max_tokens\": 512,\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"image\",\"source\":{\"type\":\"url\",\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}},{\"type\":\"text\",\"text\":\"Describe\"}]}]\n}"}, "url": {"raw":"{{baseUrl}}/pydanticai/v1/messages","host":["{{baseUrl}}"],"path":["pydanticai", "v1", "messages"]} } }, + { "name": "Drop-in /pydanticai vision: Gemini shape", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: text part describes image', function () { var j = pm.response.json(); var parts = (j.candidates && j.candidates[0] && j.candidates[0].content && j.candidates[0].content.parts) || []; var t = parts.find(function (p) { return p && p.text; }); pm.expect(t, 'no text part').to.be.ok; pm.expect(t.text.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Describe\"},{\"fileData\":{\"mimeType\":\"image/jpeg\",\"fileUri\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/pydanticai/v1beta/models/gemini-2.5-flash:generateContent","host":["{{baseUrl}}"],"path":["pydanticai", "v1beta", "models", "gemini-2.5-flash:generateContent"]} } }, + { "name": "Drop-in /cursor vision: OpenAI shape", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: response describes image', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; pm.expect(c.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gpt-4o\",\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/cursor/v1/chat/completions","host":["{{baseUrl}}"],"path":["cursor", "v1", "chat", "completions"]} } }, + { "name": "Drop-in /cursor vision: Anthropic shape", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: text block describes image', function () { var j = pm.response.json(); var t = (j.content || []).find(function (b) { return b.type === 'text' && b.text; }); pm.expect(t, 'no text block').to.be.ok; pm.expect(t.text.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-haiku-4-5\",\n \"max_tokens\": 512,\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"image\",\"source\":{\"type\":\"url\",\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}},{\"type\":\"text\",\"text\":\"Describe\"}]}]\n}"}, "url": {"raw":"{{baseUrl}}/cursor/v1/messages","host":["{{baseUrl}}"],"path":["cursor", "v1", "messages"]} } }, + { "name": "Drop-in /cursor vision: Gemini shape", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: text part describes image', function () { var j = pm.response.json(); var parts = (j.candidates && j.candidates[0] && j.candidates[0].content && j.candidates[0].content.parts) || []; var t = parts.find(function (p) { return p && p.text; }); pm.expect(t, 'no text part').to.be.ok; pm.expect(t.text.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Describe\"},{\"fileData\":{\"mimeType\":\"image/jpeg\",\"fileUri\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/cursor/v1beta/models/gemini-2.5-flash:generateContent","host":["{{baseUrl}}"],"path":["cursor", "v1beta", "models", "gemini-2.5-flash:generateContent"]} } } + ] + }, + { + "name": "Cross-Cut Round 28: Passthrough Advanced Matrix (features via *_passthrough byte-for-byte routes)", + "item": [ + { "name": "Passthrough /openai: structured output (json_schema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: JSON with city/country/pop', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('not JSON: ' + e.message); return; } pm.expect(p).to.have.property('city').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); pm.expect(p).to.have.property('pop').that.is.a('number'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"Authorization","value":"Bearer {{openaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gpt-4o-mini\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Extract city/country/pop for Paris.\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"},\"pop\":{\"type\":\"number\"}},\"required\":[\"city\",\"country\",\"pop\"],\"additionalProperties\":false}}}\n}"}, "url": {"raw":"{{baseUrl}}/openai_passthrough/v1/chat/completions","host":["{{baseUrl}}"],"path":["openai_passthrough", "v1", "chat", "completions"]} } }, + { "name": "Passthrough /openai: function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Function call: get_weather invoked', function () { var j = pm.response.json(); var tc = j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.tool_calls && j.choices[0].message.tool_calls[0]; pm.expect(tc, 'no tool_calls').to.be.ok; pm.expect(tc.function.name).to.equal('get_weather'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"Authorization","value":"Bearer {{openaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gpt-4o-mini\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}}]\n}"}, "url": {"raw":"{{baseUrl}}/openai_passthrough/v1/chat/completions","host":["{{baseUrl}}"],"path":["openai_passthrough", "v1", "chat", "completions"]} } }, + { "name": "Passthrough /openai: streaming", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: SSE response', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected event-stream, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"Authorization","value":"Bearer {{openaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"model\": \"gpt-4o-mini\",\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/openai_passthrough/v1/chat/completions","host":["{{baseUrl}}"],"path":["openai_passthrough", "v1", "chat", "completions"]} } }, + { "name": "Passthrough /anthropic: function calling (tool_use)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Anthropic: tool_use block', function () { var j = pm.response.json(); var tu = (j.content || []).find(function (b) { return b.type === 'tool_use'; }); pm.expect(tu, 'no tool_use').to.be.ok; pm.expect(tu.name).to.equal('get_weather'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-api-key","value":"{{anthropicKey}}"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-haiku-4-5\",\n \"max_tokens\": 1024,\n \"messages\": [{\"role\":\"user\",\"content\":\"Weather in Lagos?\"}],\n \"tools\": [{\"name\":\"get_weather\",\"input_schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"]}}]\n}"}, "url": {"raw":"{{baseUrl}}/anthropic_passthrough/v1/messages","host":["{{baseUrl}}"],"path":["anthropic_passthrough", "v1", "messages"]} } }, + { "name": "Passthrough /anthropic: vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: text block describes image', function () { var j = pm.response.json(); var t = (j.content || []).find(function (b) { return b.type === 'text' && b.text; }); pm.expect(t, 'no text block').to.be.ok; pm.expect(t.text.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-api-key","value":"{{anthropicKey}}"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-haiku-4-5\",\n \"max_tokens\": 512,\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"image\",\"source\":{\"type\":\"url\",\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}},{\"type\":\"text\",\"text\":\"Describe\"}]}]\n}"}, "url": {"raw":"{{baseUrl}}/anthropic_passthrough/v1/messages","host":["{{baseUrl}}"],"path":["anthropic_passthrough", "v1", "messages"]} } }, + { "name": "Passthrough /anthropic: multi-turn", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Multi-turn: response present', function () { var j = pm.response.json(); var c = (j.content && j.content.length > 0) || (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content); pm.expect(c, 'no content').to.be.ok; }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-api-key","value":"{{anthropicKey}}"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-haiku-4-5\",\n \"max_tokens\": 256,\n \"messages\": [{\"role\":\"user\",\"content\":\"Hi\"},{\"role\":\"assistant\",\"content\":\"Hello\"},{\"role\":\"user\",\"content\":\"How are you?\"}]\n}"}, "url": {"raw":"{{baseUrl}}/anthropic_passthrough/v1/messages","host":["{{baseUrl}}"],"path":["anthropic_passthrough", "v1", "messages"]} } }, + { "name": "Passthrough /anthropic: streaming", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: SSE response', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected event-stream, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-api-key","value":"{{anthropicKey}}"},{"key":"anthropic-version","value":"2023-06-01"}], "body": {"mode":"raw","raw":"{\n \"model\": \"claude-haiku-4-5\",\n \"max_tokens\": 256,\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/anthropic_passthrough/v1/messages","host":["{{baseUrl}}"],"path":["anthropic_passthrough", "v1", "messages"]} } }, + { "name": "Passthrough /genai: function calling", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Gemini: functionCall present', function () { var j = pm.response.json(); var parts = (j.candidates && j.candidates[0] && j.candidates[0].content && j.candidates[0].content.parts) || []; var fc = parts.find(function (p) { return p && p.functionCall; }); pm.expect(fc, 'no functionCall').to.be.ok; pm.expect(fc.functionCall.name).to.equal('get_weather'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Weather in Lagos?\"}]}],\n \"tools\": [{\"functionDeclarations\":[{\"name\":\"get_weather\",\"parameters\":{\"type\":\"OBJECT\",\"properties\":{\"city\":{\"type\":\"STRING\"}},\"required\":[\"city\"]}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/genai_passthrough/v1beta/models/gemini-2.5-flash:generateContent","host":["{{baseUrl}}"],"path":["genai_passthrough", "v1beta", "models", "gemini-2.5-flash:generateContent"]} } }, + { "name": "Passthrough /genai: vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: text part describes image', function () { var j = pm.response.json(); var parts = (j.candidates && j.candidates[0] && j.candidates[0].content && j.candidates[0].content.parts) || []; var t = parts.find(function (p) { return p && p.text; }); pm.expect(t, 'no text part').to.be.ok; pm.expect(t.text.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Describe\"},{\"fileData\":{\"mimeType\":\"image/jpeg\",\"fileUri\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/genai_passthrough/v1beta/models/gemini-2.5-flash:generateContent","host":["{{baseUrl}}"],"path":["genai_passthrough", "v1beta", "models", "gemini-2.5-flash:generateContent"]} } }, + { "name": "Passthrough /genai: streaming", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: SSE response', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected event-stream, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"x-goog-api-key","value":"{{genaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"contents\": [{\"parts\":[{\"text\":\"Count 1-5.\"}]}]\n}"}, "url": {"raw":"{{baseUrl}}/genai_passthrough/v1beta/models/gemini-2.5-flash:streamGenerateContent?alt=sse","host":["{{baseUrl}}"],"path":["genai_passthrough", "v1beta", "models", "gemini-2.5-flash:streamGenerateContent"],"query":[{"key":"alt","value":"sse"}]} } }, + { "name": "Passthrough /azure: structured output (json_schema)", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Structured output: JSON with city/country/pop', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; var p; try { p = JSON.parse(c); } catch (e) { pm.expect.fail('not JSON: ' + e.message); return; } pm.expect(p).to.have.property('city').that.is.a('string'); pm.expect(p).to.have.property('country').that.is.a('string'); pm.expect(p).to.have.property('pop').that.is.a('number'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"api-key","value":"{{openaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"messages\": [{\"role\":\"user\",\"content\":\"Extract city/country/pop for Paris.\"}],\n \"response_format\": {\"type\":\"json_schema\",\"json_schema\":{\"name\":\"city\",\"strict\":true,\"schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"},\"country\":{\"type\":\"string\"},\"pop\":{\"type\":\"number\"}},\"required\":[\"city\",\"country\",\"pop\"],\"additionalProperties\":false}}}\n}"}, "url": {"raw":"{{baseUrl}}/azure_passthrough/openai/deployments/{{azureDeployment}}/chat/completions?api-version={{azureApiVersion}}","host":["{{baseUrl}}"],"path":["azure_passthrough", "openai", "deployments", "{{azureDeployment}}", "chat", "completions"],"query":[{"key":"api-version","value":"{{azureApiVersion}}"}]} } }, + { "name": "Passthrough /azure: vision", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Vision: response describes image', function () { var j = pm.response.json(); var c = (j.choices && j.choices[0] && j.choices[0].message && j.choices[0].message.content) || ''; pm.expect(c).to.be.a('string').and.not.empty; pm.expect(c.length).to.be.greaterThan(20); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"api-key","value":"{{openaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"messages\": [{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Describe\"},{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://storage.googleapis.com/generativeai-downloads/images/scones.jpg\"}}]}]\n}"}, "url": {"raw":"{{baseUrl}}/azure_passthrough/openai/deployments/{{azureDeployment}}/chat/completions?api-version={{azureApiVersion}}","host":["{{baseUrl}}"],"path":["azure_passthrough", "openai", "deployments", "{{azureDeployment}}", "chat", "completions"],"query":[{"key":"api-version","value":"{{azureApiVersion}}"}]} } }, + { "name": "Passthrough /azure: streaming", "event": [{"listen":"test","script":{"type":"text/javascript","exec":["if (pm.response.code < 400) { pm.test('Streaming: SSE response', function () { var ct = pm.response.headers.get('content-type') || ''; pm.expect(ct, 'expected event-stream, got ' + ct).to.include('event-stream'); }); }"]}}], "request": { "method": "POST", "header": [{"key":"Content-Type","value":"application/json"},{"key":"api-key","value":"{{openaiKey}}"}], "body": {"mode":"raw","raw":"{\n \"messages\": [{\"role\":\"user\",\"content\":\"Count 1-5.\"}],\n \"stream\": true\n}"}, "url": {"raw":"{{baseUrl}}/azure_passthrough/openai/deployments/{{azureDeployment}}/chat/completions?api-version={{azureApiVersion}}","host":["{{baseUrl}}"],"path":["azure_passthrough", "openai", "deployments", "{{azureDeployment}}", "chat", "completions"],"query":[{"key":"api-version","value":"{{azureApiVersion}}"}]} } } + ] } + ] } ] diff --git a/tests/e2e/api/runners/filter-collection.mjs b/tests/e2e/api/runners/filter-collection.mjs index ba37475fe6..562c34f04a 100644 --- a/tests/e2e/api/runners/filter-collection.mjs +++ b/tests/e2e/api/runners/filter-collection.mjs @@ -1,10 +1,15 @@ #!/usr/bin/env node -// Filters a Postman collection by provider, feature keyword, or "rerun failed" +// Filters a Postman collection by provider, feature keyword(s), or "rerun failed" // from a prior newman report. Writes the filtered collection to --out. // // Usage: // node filter-collection.mjs --source path.json --out /tmp/x.json --provider anthropic // node filter-collection.mjs --source path.json --out /tmp/x.json --feature "web search" +// node filter-collection.mjs --source path.json --out /tmp/x.json --feature "cross-cut,structured output" # multi-keyword AND +// +// Structural keyword: "cross-cut" matches by route shape (unified /v1/chat/completions +// with a provider/model body), not just by name substring. Lets the AND filter find +// every cross-cut row without renaming 100+ items to add a literal "Cross-cut:" prefix. // node filter-collection.mjs --source path.json --out /tmp/x.json --rerun-failed --report tmp/newman-report.json import { readFileSync, writeFileSync, existsSync } from "node:fs"; @@ -23,7 +28,7 @@ const args = Object.fromEntries( const SOURCE = args.source; const OUT = args.out; const PROVIDER = (args.provider || "").toLowerCase(); -const FEATURE = (args.feature || "").toLowerCase(); +const FEATURE_PARTS = (args.feature || "").toLowerCase().split(",").map((s) => s.trim()).filter(Boolean); const RERUN_FAILED = args["rerun-failed"] === "true"; const REPORT = args.report || "tmp/newman-report.json"; @@ -31,7 +36,7 @@ if (!SOURCE || !OUT) { console.error("[filter-collection] --source and --out are required"); process.exit(2); } -if (!PROVIDER && !FEATURE && !RERUN_FAILED) { +if (!PROVIDER && !FEATURE_PARTS.length && !RERUN_FAILED) { console.error("[filter-collection] need at least one of: --provider, --feature, --rerun-failed"); process.exit(2); } @@ -46,17 +51,46 @@ const PROVIDER_KEYWORDS = { passthrough: ["_passthrough"], }; -const itemMatchesProvider = (item) => { +// Haystack = item JSON + ancestor folder names. Folder names encode the harness +// taxonomy ("Structured Output cross-cut", "Vertex Features", ...) so PROVIDER and +// FEATURE filters need to see them, otherwise a row named "openai/gpt-4o-mini" inside +// folder "Structured Output cross-cut" is invisible to FEATURE="cross-cut". +const buildHaystack = (item, ancestorNames) => + (JSON.stringify(item) + " " + ancestorNames.join(" ")).toLowerCase(); + +// Structural keywords - matched against route shape, not name substring. Lets users +// say FEATURE="cross-cut,structured output" and have it work for every row routed via +// unified /v1/chat/completions with a provider/model prefix, regardless of how the +// row is named or which folder it lives in. +const STRUCTURAL_KEYWORDS = { + "cross-cut": (item) => { + const req = item.request || {}; + const url = (typeof req.url === "string" ? req.url : req.url?.raw) || ""; + const body = req.body?.raw || ""; + const isUnified = /\/v1\/chat\/completions(\?|$)/.test(url) && + !/\/(openai|anthropic|bedrock|genai|azure)\/v1/.test(url) && + !/_passthrough/.test(url); + const hasProviderPrefix = /"model"\s*:\s*"(openai|anthropic|bedrock|gemini|vertex|azure)\//.test(body); + return isUnified && hasProviderPrefix; + }, + crosscut: (item) => STRUCTURAL_KEYWORDS["cross-cut"](item), +}; + +const itemMatchesProvider = (item, ancestorNames) => { if (!PROVIDER) return true; const keywords = PROVIDER_KEYWORDS[PROVIDER] || [PROVIDER]; - const haystack = JSON.stringify(item).toLowerCase(); + const haystack = buildHaystack(item, ancestorNames); return keywords.some((k) => haystack.includes(k)); }; -const itemMatchesFeature = (item) => { - if (!FEATURE) return true; - const haystack = JSON.stringify(item).toLowerCase(); - return haystack.includes(FEATURE); +const itemMatchesFeature = (item, ancestorNames) => { + if (!FEATURE_PARTS.length) return true; + const haystack = buildHaystack(item, ancestorNames); + return FEATURE_PARTS.every((p) => { + const structural = STRUCTURAL_KEYWORDS[p]; + if (structural) return structural(item) || haystack.includes(p); + return haystack.includes(p); + }); }; let failedNames = null; @@ -79,18 +113,18 @@ const itemMatchesRerunFailed = (item) => { return failedNames.has(item.name); }; -const passes = (item) => { +const passes = (item, ancestorNames) => { if (!item.request) return true; // folders pass; we filter their items below - return itemMatchesProvider(item) && itemMatchesFeature(item) && itemMatchesRerunFailed(item); + return itemMatchesProvider(item, ancestorNames) && itemMatchesFeature(item, ancestorNames) && itemMatchesRerunFailed(item); }; -const filterTree = (items) => { +const filterTree = (items, ancestorNames = []) => { const out = []; for (const item of items) { if (Array.isArray(item.item)) { - const kids = filterTree(item.item); + const kids = filterTree(item.item, [...ancestorNames, item.name || ""]); if (kids.length > 0) out.push({ ...item, item: kids }); - } else if (passes(item)) { + } else if (passes(item, ancestorNames)) { out.push(item); } } @@ -101,4 +135,4 @@ const collection = JSON.parse(readFileSync(SOURCE, "utf8")); const filtered = { ...collection, item: filterTree(collection.item || []) }; const totalAfter = JSON.stringify(filtered).match(/"request":/g)?.length || 0; writeFileSync(OUT, JSON.stringify(filtered, null, 2)); -console.error(`[filter-collection] wrote ${OUT} with ${totalAfter} requests after filter (provider=${PROVIDER || "-"}, feature=${FEATURE || "-"}, rerun-failed=${RERUN_FAILED})`); +console.error(`[filter-collection] wrote ${OUT} with ${totalAfter} requests after filter (provider=${PROVIDER || "-"}, feature=${FEATURE_PARTS.join("+") || "-"}, rerun-failed=${RERUN_FAILED})`); From 4fccacbf99ab8f6950a58f5d94667cc536f49c80 Mon Sep 17 00:00:00 2001 From: Akshay Deo Date: Wed, 13 May 2026 17:55:29 +0530 Subject: [PATCH 20/81] makefile diff fixes (#3462) ## Summary Briefly explain the purpose of this PR and the problem it solves. ## Changes - What was changed and why - Any notable design decisions or trade-offs ## Type of change - [ ] Bug fix - [ ] Feature - [ ] Refactor - [ ] Documentation - [ ] Chore/CI ## Affected areas - [ ] Core (Go) - [ ] Transports (HTTP) - [ ] Providers/Integrations - [ ] Plugins - [ ] UI (React) - [ ] Docs ## How to test Describe the steps to validate this change. Include commands and expected outcomes. ```sh # Core/Transports go version go test ./... # UI cd ui pnpm i || npm i pnpm test || npm test pnpm build || npm run build ``` If adding new configs or environment variables, document them here. ## Screenshots/Recordings If UI changes, add before/after screenshots or short clips. ## Breaking changes - [ ] Yes - [ ] No If yes, describe impact and migration instructions. ## Related issues Link related issues and discussions. Example: Closes #123 ## Security considerations Note any security implications (auth, secrets, PII, sandboxing, etc.). ## Checklist - [ ] I read `docs/contributing/README.md` and followed the guidelines - [ ] I added/updated tests where appropriate - [ ] I updated documentation where needed - [ ] I verified builds succeed (Go and UI) - [ ] I verified the CI pipeline passes locally if applicable --- Makefile | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/Makefile b/Makefile index 04a43b3ee4..767e8c91c8 100644 --- a/Makefile +++ b/Makefile @@ -1636,11 +1636,7 @@ install-newman: ## Install newman + htmlextra reporter if not already installed @$(USE_NODE); npm list -g newman-reporter-htmlextra > /dev/null 2>&1 || ($(ECHO) "$(YELLOW)Installing newman-reporter-htmlextra...$(NC)" && npm install -g newman-reporter-htmlextra) @$(ECHO) "$(GREEN)Newman + htmlextra are ready$(NC)" -<<<<<<< HEAD -run-provider-harness-test: $(if $(HELP),,install-newman) ## Run the Bifrost provider-harness Postman collection. HELP=1 prints full parameter docs. Per-provider parallelism is ON by default (~3-4× speedup); set PARALLEL=0 for sequential. Filter via PROVIDER=openai|anthropic|bedrock|gemini|vertex|azure|passthrough, FEATURE="" (matches request name/body), RERUN_FAILED=1 (re-run only items that failed last run). INCLUDE_PREVIEW=1 to run [PREVIEW]-tagged account/region-scoped cases. INCLUDE_SKIP=1 to run [SKIP]-tagged criss-cross cells for known-unsupported provider+modality pairs. USE_INFISICAL=1 to source from Infisical (Usage: make run-provider-harness-test [HELP=1] [PARALLEL=0] [PROVIDER=anthropic] [FEATURE="web search"] [RERUN_FAILED=1] [INCLUDE_PREVIEW=1] [INCLUDE_SKIP=1] [BASE_URL=...] [FOLDER="..."] [ENV_FILE=...] [VIEWER_PORT=8090] [CI=1]) -======= run-provider-harness-test: $(if $(HELP),,install-newman) ## Run the Bifrost provider-harness Postman collection. HELP=1 prints full parameter docs. Filter via PROVIDER=openai|anthropic|bedrock|gemini|vertex|azure|passthrough, FEATURE="" or FEATURE="," (AND across substrings; matches request name/URL/body), RERUN_FAILED=1 (re-run only items that failed last run). INCLUDE_PREVIEW=1 to run [PREVIEW]-tagged account/region-scoped cases. USE_INFISICAL=1 to source from Infisical (Usage: make run-provider-harness-test [HELP=1] [PROVIDER=anthropic] [FEATURE="web search"] [FEATURE="cross-cut,structured output"] [RERUN_FAILED=1] [INCLUDE_PREVIEW=1] [BASE_URL=...] [FOLDER="..."] [ENV_FILE=...] [VIEWER_PORT=8090] [CI=1]) ->>>>>>> f09185ec1 (harness improvements) @if [ -n "$(HELP)" ]; then \ printf '\n%s\n' "$(CYAN)run-provider-harness-test - Bifrost provider harness runner$(NC)"; \ printf '%s\n\n' "Runs the Bifrost provider-harness Postman collection through newman, with optional filtering."; \ @@ -1666,20 +1662,10 @@ run-provider-harness-test: $(if $(HELP),,install-newman) ## Run the Bifrost prov printf ' %-18s %s\n' "USE_INFISICAL=1" "Source secrets from Infisical CLI ('infisical export --path /local --format dotenv') instead of .env."; \ printf '\n%s\n' "$(YELLOW)EXAMPLES$(NC)"; \ printf ' %s\n' "make run-provider-harness-test HELP=1"; \ -<<<<<<< HEAD - printf ' %s\n' "make run-provider-harness-test # full sweep, 6 providers concurrently (default ~3-4× speedup)"; \ - printf ' %s\n' "make run-provider-harness-test PARALLEL=0 # sequential mode (ordered output, htmlextra report)"; \ - printf ' %s\n' "make run-provider-harness-test FOLDER=\"8. Criss-Cross\" # criss-cross matrix only (endpoint × provider × modality)"; \ - printf ' %s\n' "make run-provider-harness-test FOLDER=\"8.2 Text Chat (streaming)\" # criss-cross streaming sub-folder"; \ - printf ' %s\n' "make run-provider-harness-test PROVIDER=bedrock # bedrock-only (includes bedrock-model cells across §8)"; \ - printf ' %s\n' "make run-provider-harness-test FEATURE=\"web search\" # all providers, web-search entries"; \ - printf ' %s\n' "make run-provider-harness-test INCLUDE_SKIP=1 # also run [SKIP] cells (capability-gap matrix)"; \ -======= printf ' %s\n' "make run-provider-harness-test # full 339-request sweep"; \ printf ' %s\n' "make run-provider-harness-test PROVIDER=bedrock # bedrock-only"; \ printf ' %s\n' "make run-provider-harness-test FEATURE=\"web search\" # all providers, web-search entries"; \ printf ' %s\n' "make run-provider-harness-test FEATURE=\"cross-cut,structured output\" # AND of substrings"; \ ->>>>>>> f09185ec1 (harness improvements) printf ' %s\n' "make run-provider-harness-test RERUN_FAILED=1 # triage iteration loop"; \ printf ' %s\n' "make run-provider-harness-test PROVIDER=anthropic RERUN_FAILED=1 # anthropic failures only"; \ printf ' %s\n' "make run-provider-harness-test PROVIDER=passthrough # passthrough sweep (incl. Bedrock SigV4)"; \ From c4a01bc9757d30ecc1b1a02c6c207d32e5000fe8 Mon Sep 17 00:00:00 2001 From: Javier Torres Date: Wed, 13 May 2026 13:44:28 -0500 Subject: [PATCH 21/81] Preserve Anthropic output schema refs (#3449) --- core/providers/anthropic/utils.go | 8 ++ core/providers/anthropic/utils_test.go | 137 +++++++++++++++++++++++++ 2 files changed, 145 insertions(+) diff --git a/core/providers/anthropic/utils.go b/core/providers/anthropic/utils.go index d26be4bef5..80b609f002 100644 --- a/core/providers/anthropic/utils.go +++ b/core/providers/anthropic/utils.go @@ -2654,6 +2654,14 @@ func convertResponsesTextConfigToAnthropicOutputFormat(textConfig *schemas.Respo schema["required"] = format.JSONSchema.Required } + if format.JSONSchema.Defs != nil { + schema["$defs"] = *format.JSONSchema.Defs + } + + if format.JSONSchema.Definitions != nil { + schema["definitions"] = *format.JSONSchema.Definitions + } + if format.JSONSchema.Type != nil && *format.JSONSchema.Type == "object" { schema["additionalProperties"] = false } else if format.JSONSchema.AdditionalProperties != nil { diff --git a/core/providers/anthropic/utils_test.go b/core/providers/anthropic/utils_test.go index 9117e1f9d3..a5f1728d6c 100644 --- a/core/providers/anthropic/utils_test.go +++ b/core/providers/anthropic/utils_test.go @@ -577,6 +577,143 @@ func TestConvertChatResponseFormatToAnthropicOutputFormat(t *testing.T) { } } +func TestConvertResponsesTextConfigToAnthropicOutputFormatPreservesSchemaRefs(t *testing.T) { + schemaType := "object" + properties := map[string]interface{}{ + "record": map[string]interface{}{ + "$ref": "#/$defs/Document", + }, + } + defs := map[string]interface{}{ + "Document": map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "title": map[string]interface{}{"type": "string"}, + "authors": map[string]interface{}{ + "type": "array", + "items": map[string]interface{}{ + "$ref": "#/$defs/Person", + }, + }, + }, + "required": []interface{}{"title", "authors"}, + }, + "Person": map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "name": map[string]interface{}{"type": "string"}, + "email": map[string]interface{}{"type": []interface{}{"string", "null"}}, + }, + "required": []interface{}{"name", "email"}, + }, + } + + result := convertResponsesTextConfigToAnthropicOutputFormat(&schemas.ResponsesTextConfig{ + Format: &schemas.ResponsesTextConfigFormat{ + Type: "json_schema", + JSONSchema: &schemas.ResponsesTextConfigFormatJSONSchema{ + Type: &schemaType, + Properties: &properties, + Required: []string{"record"}, + Defs: &defs, + }, + }, + }) + if result == nil { + t.Fatal("expected output format") + } + + var output map[string]interface{} + if err := sonic.Unmarshal(result, &output); err != nil { + t.Fatalf("failed to unmarshal output format: %v", err) + } + + if output["type"] != "json_schema" { + t.Fatalf("expected json_schema type, got %v", output["type"]) + } + + schema, ok := output["schema"].(map[string]interface{}) + if !ok { + t.Fatalf("expected schema map, got %T", output["schema"]) + } + if schema["additionalProperties"] != false { + t.Fatalf("expected additionalProperties=false, got %v", schema["additionalProperties"]) + } + if _, ok := schema["$defs"].(map[string]interface{}); !ok { + t.Fatalf("expected $defs to be preserved, got %v", schema["$defs"]) + } + + outputProperties, ok := schema["properties"].(map[string]interface{}) + if !ok { + t.Fatalf("expected properties map, got %T", schema["properties"]) + } + recordSchema, ok := outputProperties["record"].(map[string]interface{}) + if !ok { + t.Fatalf("expected record schema map, got %T", outputProperties["record"]) + } + if recordSchema["$ref"] != "#/$defs/Document" { + t.Fatalf("expected record $ref to be preserved, got %v", recordSchema["$ref"]) + } +} + +func TestConvertResponsesTextConfigToAnthropicOutputFormatPreservesLegacyDefinitions(t *testing.T) { + schemaType := "object" + properties := map[string]interface{}{ + "record": map[string]interface{}{ + "$ref": "#/definitions/Document", + }, + } + definitions := map[string]interface{}{ + "Document": map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "title": map[string]interface{}{"type": "string"}, + }, + "required": []interface{}{"title"}, + }, + } + + result := convertResponsesTextConfigToAnthropicOutputFormat(&schemas.ResponsesTextConfig{ + Format: &schemas.ResponsesTextConfigFormat{ + Type: "json_schema", + JSONSchema: &schemas.ResponsesTextConfigFormatJSONSchema{ + Type: &schemaType, + Properties: &properties, + Required: []string{"record"}, + Definitions: &definitions, + }, + }, + }) + if result == nil { + t.Fatal("expected output format") + } + + var output map[string]interface{} + if err := sonic.Unmarshal(result, &output); err != nil { + t.Fatalf("failed to unmarshal output format: %v", err) + } + + schema, ok := output["schema"].(map[string]interface{}) + if !ok { + t.Fatalf("expected schema map, got %T", output["schema"]) + } + if _, ok := schema["definitions"].(map[string]interface{}); !ok { + t.Fatalf("expected definitions to be preserved, got %v", schema["definitions"]) + } + + outputProperties, ok := schema["properties"].(map[string]interface{}) + if !ok { + t.Fatalf("expected properties map, got %T", schema["properties"]) + } + recordSchema, ok := outputProperties["record"].(map[string]interface{}) + if !ok { + t.Fatalf("expected record schema map, got %T", outputProperties["record"]) + } + if recordSchema["$ref"] != "#/definitions/Document" { + t.Fatalf("expected record $ref to be preserved, got %v", recordSchema["$ref"]) + } +} + func TestValidateToolsForProvider(t *testing.T) { tests := []struct { name string From c3cb27a00895d0325f4c2712b0d65013571256a5 Mon Sep 17 00:00:00 2001 From: Anuj Parihar Date: Thu, 14 May 2026 15:14:17 +0530 Subject: [PATCH 22/81] feat: use the new parameter json schema compliant to json schema spec (#3444) --- core/providers/gemini/chat.go | 5 ++++- core/providers/gemini/responses.go | 34 +++++++++++++++++++++--------- core/providers/gemini/utils.go | 12 +++++++---- 3 files changed, 36 insertions(+), 15 deletions(-) diff --git a/core/providers/gemini/chat.go b/core/providers/gemini/chat.go index a0b399d034..2a398301b0 100644 --- a/core/providers/gemini/chat.go +++ b/core/providers/gemini/chat.go @@ -31,7 +31,10 @@ func ToGeminiChatCompletionRequest(bifrostReq *schemas.BifrostChatRequest) (*Gem } // Handle tool-related parameters if len(bifrostReq.Params.Tools) > 0 { - geminiReq.Tools = convertBifrostToolsToGemini(bifrostReq.Params.Tools) + geminiReq.Tools, err = convertBifrostToolsToGemini(bifrostReq.Params.Tools) + if err != nil { + return nil, err + } // Convert tool choice to tool config if bifrostReq.Params.ToolChoice != nil { diff --git a/core/providers/gemini/responses.go b/core/providers/gemini/responses.go index 95fead5661..bcef38f810 100644 --- a/core/providers/gemini/responses.go +++ b/core/providers/gemini/responses.go @@ -93,7 +93,10 @@ func ToGeminiResponsesRequest(bifrostReq *schemas.BifrostResponsesRequest) (*Gem geminiReq.ExtraParams = bifrostReq.Params.ExtraParams // Handle tool-related parameters if len(bifrostReq.Params.Tools) > 0 { - geminiReq.Tools = convertResponsesToolsToGemini(bifrostReq.Params.Tools) + geminiReq.Tools, err = convertResponsesToolsToGemini(bifrostReq.Params.Tools) + if err != nil { + return nil, err + } // Convert tool choice if present if bifrostReq.Params.ToolChoice != nil { @@ -2191,6 +2194,16 @@ func convertGeminiToolsToResponsesTools(tools []Tool) []schemas.ResponsesTool { if fn.Parameters != nil { params := convertSchemaToFunctionParameters(fn.Parameters) responsesTool.ResponsesToolFunction.Parameters = ¶ms + } else if fn.ParametersJSONSchema != nil { + raw, err := providerUtils.MarshalSorted(fn.ParametersJSONSchema) + if err != nil { + continue + } + var params schemas.ToolFunctionParameters + if err := json.Unmarshal(raw, ¶ms); err != nil { + continue + } + responsesTool.ResponsesToolFunction.Parameters = ¶ms } responsesTools = append(responsesTools, responsesTool) } @@ -2779,7 +2792,7 @@ func (r *GeminiGenerationRequest) convertParamsToGenerationConfigResponses(param } // convertResponsesToolsToGemini converts Responses tools to Gemini tools -func convertResponsesToolsToGemini(tools []schemas.ResponsesTool) []Tool { +func convertResponsesToolsToGemini(tools []schemas.ResponsesTool) ([]Tool, error) { geminiTool := Tool{} hasWebSearchTool := false @@ -2805,12 +2818,13 @@ func convertResponsesToolsToGemini(tools []schemas.ResponsesTool) []Tool { } return "" }(), - Parameters: func() *Schema { - if tool.ResponsesToolFunction.Parameters != nil { - return convertFunctionParametersToSchema(*tool.ResponsesToolFunction.Parameters) - } - return nil - }(), + } + if tool.ResponsesToolFunction.Parameters != nil { + raw, err := providerUtils.MarshalSorted(tool.ResponsesToolFunction.Parameters) + if err != nil { + return []Tool{}, fmt.Errorf("marshal tool %q parameters: %w", *tool.Name, err) + } + funcDecl.ParametersJSONSchema = json.RawMessage(raw) } geminiTool.FunctionDeclarations = append(geminiTool.FunctionDeclarations, funcDecl) } @@ -2833,9 +2847,9 @@ func convertResponsesToolsToGemini(tools []schemas.ResponsesTool) []Tool { } if len(geminiTool.FunctionDeclarations) > 0 || geminiTool.GoogleSearch != nil { - return []Tool{geminiTool} + return []Tool{geminiTool}, nil } - return []Tool{} + return []Tool{}, nil } // convertResponsesToolChoiceToGemini converts Responses tool choice to Gemini tool config diff --git a/core/providers/gemini/utils.go b/core/providers/gemini/utils.go index 43708fd2a2..0e7c3cdc3a 100644 --- a/core/providers/gemini/utils.go +++ b/core/providers/gemini/utils.go @@ -1229,7 +1229,7 @@ func convertParamsToGenerationConfig(params *schemas.ChatParameters, responseMod } // convertBifrostToolsToGemini converts Bifrost tools to Gemini format -func convertBifrostToolsToGemini(bifrostTools []schemas.ChatTool) []Tool { +func convertBifrostToolsToGemini(bifrostTools []schemas.ChatTool) ([]Tool, error) { geminiTool := Tool{} for _, tool := range bifrostTools { @@ -1241,7 +1241,11 @@ func convertBifrostToolsToGemini(bifrostTools []schemas.ChatTool) []Tool { Name: tool.Function.Name, } if tool.Function.Parameters != nil { - fd.Parameters = convertFunctionParametersToSchema(*tool.Function.Parameters) + raw, err := providerUtils.MarshalSorted(tool.Function.Parameters) + if err != nil { + return nil, fmt.Errorf("marshal tool %q parameters: %w", tool.Function.Name, err) + } + fd.ParametersJSONSchema = json.RawMessage(raw) } if tool.Function.Description != nil { fd.Description = *tool.Function.Description @@ -1251,9 +1255,9 @@ func convertBifrostToolsToGemini(bifrostTools []schemas.ChatTool) []Tool { } if len(geminiTool.FunctionDeclarations) > 0 { - return []Tool{geminiTool} + return []Tool{geminiTool}, nil } - return []Tool{} + return []Tool{}, nil } // convertFunctionParametersToSchema converts Bifrost function parameters to Gemini Schema From 9b9895932c50f21cc8622141a29110d48e8801b7 Mon Sep 17 00:00:00 2001 From: Suresh Chaudhary <83772622+impoiler@users.noreply.github.com> Date: Thu, 14 May 2026 15:49:47 +0530 Subject: [PATCH 23/81] feat: replace log delete button with actions dropdown menu and pin actions column (#3480) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Replaces the direct delete button in the logs and MCP logs action columns with a dropdown menu triggered by a `MoreHorizontal` icon. This improves the UI by providing a more scalable actions pattern while keeping the delete functionality accessible. The actions column is also now properly pinned to the right side of the table when the user has delete access. ## Changes - Replaced the inline destructive `Trash2` button with a `DropdownMenu` containing a "Delete" item for both logs and MCP logs tables - The actions column trigger is now a ghost `MoreHorizontal` icon button, reducing visual noise in the table - The actions column is pinned to the right only when `hasDeleteAccess` is true; otherwise no fixed columns are configured - Fixed `fixedColumnIds` to include `"actions"` so the column receives correct sticky positioning behavior - Removed `overflow-hidden` from pinned cells in the MCP logs table to prevent the dropdown from being clipped - Reduced the actions column size from 72 to 56px ## Type of change - [ ] Bug fix - [x] Feature - [ ] Refactor - [ ] Documentation - [ ] Chore/CI ## Affected areas - [ ] Core (Go) - [ ] Transports (HTTP) - [ ] Providers/Integrations - [ ] Plugins - [x] UI (React) - [ ] Docs ## How to test 1. Navigate to the Logs page as a user with delete access. 2. Confirm the actions column is pinned to the right of the table. 3. Click the `⋯` icon on any row and verify the dropdown appears with a "Delete" option. 4. Click "Delete" and confirm the log is deleted without the row click handler firing. 5. Repeat on the MCP Logs page. 6. Log in as a user without delete access and confirm the actions column is not present. ```sh cd ui pnpm i pnpm build ``` ## Screenshots/Recordings _Add before/after screenshots showing the old delete button vs. the new dropdown._ ## Breaking changes - [ ] Yes - [x] No ## Related issues ## Security considerations No new security implications. Delete access gating remains unchanged. ## Checklist - [ ] I read `docs/contributing/README.md` and followed the guidelines - [ ] I added/updated tests where appropriate - [ ] I updated documentation where needed - [ ] I verified builds succeed (Go and UI) - [ ] I verified the CI pipeline passes locally if applicable --- ui/app/workspace/logs/page.tsx | 8 ++- ui/app/workspace/logs/views/columns.tsx | 50 ++++++++++++++----- ui/app/workspace/logs/views/logsTable.tsx | 4 +- ui/app/workspace/mcp-logs/page.tsx | 32 ++++++------ ui/app/workspace/mcp-logs/views/columns.tsx | 39 ++++++++++----- .../workspace/mcp-logs/views/mcpLogsTable.tsx | 6 +-- 6 files changed, 92 insertions(+), 47 deletions(-) diff --git a/ui/app/workspace/logs/page.tsx b/ui/app/workspace/logs/page.tsx index 17c3404b02..46363a4636 100644 --- a/ui/app/workspace/logs/page.tsx +++ b/ui/app/workspace/logs/page.tsx @@ -490,7 +490,11 @@ export default function LogsPage() { togglePin: toggleColumnPin, reorder: reorderColumns, reset: resetColumns, - } = useColumnConfig({ columnIds, paramName: "cols" }); + } = useColumnConfig({ + columnIds, + paramName: "cols", + fixedColumns: hasDeleteAccess ? { right: ["actions"] } : undefined, + }); // Navigation for log detail sheet const logs = logsData?.logs ?? []; @@ -730,4 +734,4 @@ export default function LogsPage() { )}
); -} \ No newline at end of file +} diff --git a/ui/app/workspace/logs/views/columns.tsx b/ui/app/workspace/logs/views/columns.tsx index 824733fd98..e887e144c1 100644 --- a/ui/app/workspace/logs/views/columns.tsx +++ b/ui/app/workspace/logs/views/columns.tsx @@ -5,6 +5,12 @@ import { } from "@/app/workspace/dashboard/utils/chartUtils"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; +import { + DropdownMenu, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuTrigger, +} from "@/components/ui/dropdownMenu"; import { ProviderIconType, RenderProviderIcon } from "@/lib/constants/icons"; import { getProviderLabel, @@ -22,7 +28,7 @@ import { import { cn } from "@/lib/utils"; import { ColumnDef } from "@tanstack/react-table"; import { format, formatDistanceToNow } from "date-fns"; -import { ArrowUpDown, Trash2 } from "lucide-react"; +import { ArrowUpDown, MoreHorizontal, Trash2 } from "lucide-react"; function getAssistantToolCallSummary(log?: LogEntry): string { const toolCalls = log?.output_message?.tool_calls || []; @@ -476,20 +482,40 @@ export const createColumns = ( ? [ { id: "actions", - size: 72, + header: "", + size: 56, cell: ({ row }) => { const log = row.original; return ( - +
+ + event.stopPropagation()}> + + + + { + event.stopPropagation(); + onDelete(log); + }} + > + + Delete + + + +
); }, }, diff --git a/ui/app/workspace/logs/views/logsTable.tsx b/ui/app/workspace/logs/views/logsTable.tsx index eeccc26c31..08c5b1e04e 100644 --- a/ui/app/workspace/logs/views/logsTable.tsx +++ b/ui/app/workspace/logs/views/logsTable.tsx @@ -59,7 +59,7 @@ export function LogsDataTable({ const tableContainerRef = useRef(null); const calculatedPageSize = useTablePageSize(tableContainerRef); - const fixedColumnIds = useMemo(() => new Set([]), []); + const fixedColumnIds = useMemo(() => new Set(["actions"]), []); // Measure actual header cell widths for pixel-perfect pin offsets const { headerCellRefs, setHeaderCellRef } = useHeaderCellRefs(); @@ -279,4 +279,4 @@ export function LogsDataTable({
); -} \ No newline at end of file +} diff --git a/ui/app/workspace/mcp-logs/page.tsx b/ui/app/workspace/mcp-logs/page.tsx index b9370bfd09..03112b8d04 100644 --- a/ui/app/workspace/mcp-logs/page.tsx +++ b/ui/app/workspace/mcp-logs/page.tsx @@ -1,3 +1,4 @@ +import { LogsVolumeChart } from "@/app/workspace/logs/views/logsVolumeChart"; import { MCPFilterSidebar } from "@/components/filters/mcpFilterSidebar"; import FullPageLoader from "@/components/fullPageLoader"; import { useColumnConfig } from "@/components/table"; @@ -10,7 +11,6 @@ import { useGetMCPLogsQuery, useGetMCPLogsStatsQuery, } from "@/lib/store"; -import { LogsVolumeChart } from "@/app/workspace/logs/views/logsVolumeChart"; import { useLazyGetMCPLogsQuery } from "@/lib/store/apis/mcpLogsApi"; import type { MCPToolLogEntry, @@ -108,9 +108,9 @@ export default function MCPLogsPage() { ...(urlState.period ? { period: urlState.period } : { - start_time: dateUtils.toISOString(urlState.start_time), - end_time: dateUtils.toISOString(urlState.end_time), - }), + start_time: dateUtils.toISOString(urlState.start_time), + end_time: dateUtils.toISOString(urlState.end_time), + }), }), [ urlState.tool_names, @@ -396,17 +396,6 @@ export default function MCPLogsPage() { [columns], ); - const MCP_COLUMN_LABELS: Record = useMemo( - () => ({ - timestamp: "Time", - tool_name: "Tool Name", - server_label: "Server", - latency: "Latency", - cost: "Cost", - }), - [], - ); - const { entries: columnEntries, columnOrder, @@ -419,9 +408,20 @@ export default function MCPLogsPage() { } = useColumnConfig({ columnIds, paramName: "mcp_cols", - fixedColumns: { left: [], right: [] }, + fixedColumns: hasDeleteAccess ? { right: ["actions"] } : undefined, }); + const MCP_COLUMN_LABELS: Record = useMemo( + () => ({ + timestamp: "Time", + tool_name: "Tool Name", + server_label: "Server", + latency: "Latency", + cost: "Cost", + }), + [], + ); + const selectedLogIndex = useMemo( () => (selectedLogId ? logs.findIndex((l) => l.id === selectedLogId) : -1), [selectedLogId, logs], diff --git a/ui/app/workspace/mcp-logs/views/columns.tsx b/ui/app/workspace/mcp-logs/views/columns.tsx index c1f1a63146..4308e9d9bc 100644 --- a/ui/app/workspace/mcp-logs/views/columns.tsx +++ b/ui/app/workspace/mcp-logs/views/columns.tsx @@ -1,10 +1,11 @@ import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; +import { DropdownMenu, DropdownMenuContent, DropdownMenuItem, DropdownMenuTrigger } from "@/components/ui/dropdownMenu"; import { Status, StatusBarColors, Statuses } from "@/lib/constants/logs"; import type { MCPToolLogEntry } from "@/lib/types/logs"; import { ColumnDef, Row } from "@tanstack/react-table"; import { format, isValid } from "date-fns"; -import { ArrowUpDown, Trash2 } from "lucide-react"; +import { ArrowUpDown, MoreHorizontal, Trash2 } from "lucide-react"; // Helper function to validate status and return a safe Status value const getValidatedStatus = (status: string): Status => { @@ -99,20 +100,34 @@ export const createMCPColumns = ( ? [ { id: "actions", - size: 72, + header: "", + size: 56, cell: ({ row }: { row: Row }) => { const log = row.original; return ( - +
+ + event.stopPropagation()}> + + + + { + event.stopPropagation(); + void handleDelete(log); + }} + > + + Delete + + + +
); }, }, diff --git a/ui/app/workspace/mcp-logs/views/mcpLogsTable.tsx b/ui/app/workspace/mcp-logs/views/mcpLogsTable.tsx index d72c62f362..0c9d2e1cd0 100644 --- a/ui/app/workspace/mcp-logs/views/mcpLogsTable.tsx +++ b/ui/app/workspace/mcp-logs/views/mcpLogsTable.tsx @@ -56,7 +56,7 @@ export function MCPLogsDataTable({ }: DataTableProps) { const [sorting, setSorting] = useState([{ id: pagination.sort_by, desc: pagination.order === "desc" }]); - const fixedColumnIds = useMemo(() => new Set([]), []); + const fixedColumnIds = useMemo(() => new Set(["actions"]), []); // Measure actual header cell widths for pixel-perfect pin offsets const { headerCellRefs, setHeaderCellRef } = useHeaderCellRefs(); @@ -188,7 +188,7 @@ export function MCPLogsDataTable({ key={cell.id} style={{ width: size, minWidth: size, maxWidth: size, ...buildPinStyle(cell.column, pinOffsets) }} className={cn( - "overflow-hidden", + !pinned && "overflow-hidden", pinned && "bg-card", cell.column.id === lastLeftPinId && PIN_SHADOW_LEFT, cell.column.id === firstRightPinId && PIN_SHADOW_RIGHT, @@ -250,4 +250,4 @@ export function MCPLogsDataTable({ ); -} \ No newline at end of file +} From fd1f6a5161efc361f16c94b2d155a954c0237a1d Mon Sep 17 00:00:00 2001 From: Suresh Chaudhary <83772622+impoiler@users.noreply.github.com> Date: Thu, 14 May 2026 15:50:38 +0530 Subject: [PATCH 24/81] fix: constrain model catalog table column widths and truncate overflowing text (#3481) ## Summary Fixes layout overflow issues in the Model Catalog table where long provider names and model badge text would break out of their columns or cause uneven column sizing. ## Changes - Added `table-fixed` layout with explicit `` column widths (26% / 44% / 16% / 14%) to enforce stable column proportions - Added `overflow-hidden` and `truncate` to the Provider name cell so long names are clipped cleanly instead of overflowing - Added `shrink-0` to the "CUSTOM" badge so it doesn't compress when the provider name is long - Added `max-w-[220px] truncate` to model name badges in `ModelsUsedCell` to prevent individual badges from stretching too wide ## Type of change - [x] Bug fix - [ ] Feature - [ ] Refactor - [ ] Documentation - [ ] Chore/CI ## Affected areas - [ ] Core (Go) - [ ] Transports (HTTP) - [ ] Providers/Integrations - [ ] Plugins - [x] UI (React) - [ ] Docs ## How to test Navigate to the Model Catalog page and verify: 1. Provider names that are long truncate cleanly within their column 2. The "CUSTOM" badge remains visible and does not shrink when next to a long provider name 3. Model name badges in the models column truncate at a reasonable width 4. Column widths remain stable regardless of content length ```sh cd ui pnpm i || npm i pnpm build || npm run build ``` ## Screenshots/Recordings If UI changes, add before/after screenshots or short clips. ## Breaking changes - [ ] Yes - [x] No ## Related issues ## Security considerations None. ## Checklist - [ ] I read `docs/contributing/README.md` and followed the guidelines - [ ] I added/updated tests where appropriate - [ ] I updated documentation where needed - [ ] I verified builds succeed (Go and UI) - [ ] I verified the CI pipeline passes locally if applicable --- .../model-catalog/views/modelCatalogTable.tsx | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/ui/app/workspace/model-catalog/views/modelCatalogTable.tsx b/ui/app/workspace/model-catalog/views/modelCatalogTable.tsx index 5341932c92..a326995bf1 100644 --- a/ui/app/workspace/model-catalog/views/modelCatalogTable.tsx +++ b/ui/app/workspace/model-catalog/views/modelCatalogTable.tsx @@ -92,7 +92,13 @@ export default function ModelCatalogTable({ {/* Table */}
- +
+ + + + + + Provider @@ -123,26 +129,26 @@ export default function ModelCatalogTable({ ) : ( rows.map((row) => ( - +
- + {row.isCustom ? row.providerName : ProviderLabels[row.providerName as keyof typeof ProviderLabels] || row.providerName} {row.isCustom && ( - + CUSTOM )}
- + {isLoadingModels ? (
@@ -179,7 +185,7 @@ function ModelsUsedCell({ models: rawModels }: { models: string[] }) {
{visible.map((m) => ( - + {m} ))} @@ -198,4 +204,4 @@ function ModelsUsedCell({ models: rawModels }: { models: string[] }) {
); -} \ No newline at end of file +} From ca77d2a308d341e6252948168cb15fd74a0e7858 Mon Sep 17 00:00:00 2001 From: Suresh Chaudhary <83772622+impoiler@users.noreply.github.com> Date: Thu, 14 May 2026 15:51:37 +0530 Subject: [PATCH 25/81] fix: constrain provider keys table column widths and truncate long key names (#3482) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Fixes layout issues in the model provider keys table where long key/model/server names would overflow their cells and cause the table to render incorrectly. ## Changes - Applied `table-fixed` layout to the keys table and defined explicit column widths via `
` (64% for the name column, 12% each for the remaining three columns) to enforce stable column sizing regardless of content length - Added `overflow-hidden` to the name cell and `min-w-0` + `truncate` to the name text span so long strings are clipped with an ellipsis instead of breaking the layout - Added a trailing newline at end of file ## Type of change - [x] Bug fix - [ ] Feature - [ ] Refactor - [ ] Documentation - [ ] Chore/CI ## Affected areas - [ ] Core (Go) - [ ] Transports (HTTP) - [ ] Providers/Integrations - [ ] Plugins - [x] UI (React) - [ ] Docs ## How to test 1. Navigate to the Workspace → Providers page and open a provider that has keys with long names (e.g. a vLLM model name or a long API key label). 2. Verify that the name column truncates with an ellipsis rather than overflowing into adjacent columns. 3. Verify that the three action columns (weight, status, actions) maintain consistent widths. ```sh cd ui pnpm i || npm i pnpm test || npm test pnpm build || npm run build ``` ## Screenshots/Recordings Add before/after screenshots showing the table with a long key name to confirm truncation behavior. ## Breaking changes - [ ] Yes - [x] No ## Related issues ## Security considerations None. ## Checklist - [ ] I read `docs/contributing/README.md` and followed the guidelines - [ ] I added/updated tests where appropriate - [ ] I updated documentation where needed - [ ] I verified builds succeed (Go and UI) - [ ] I verified the CI pipeline passes locally if applicable --- .../views/modelProviderKeysTableView.tsx | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/ui/app/workspace/providers/views/modelProviderKeysTableView.tsx b/ui/app/workspace/providers/views/modelProviderKeysTableView.tsx index ff8588eb85..8454c89549 100644 --- a/ui/app/workspace/providers/views/modelProviderKeysTableView.tsx +++ b/ui/app/workspace/providers/views/modelProviderKeysTableView.tsx @@ -128,7 +128,13 @@ export default function ModelProviderKeysTableView({ provider, className, header ) : (
-
+
+ + + + + + {isVLLM ? "Model" : isOllamaOrSGL ? "Server" : "API Key"} @@ -154,8 +160,8 @@ export default function ModelProviderKeysTableView({ provider, className, header className="text-sm transition-colors hover:bg-white" onClick={() => { }} > - -
+ +
{key.status === "success" && ( @@ -218,7 +224,7 @@ export default function ModelProviderKeysTableView({ provider, className, header ); })()} - {key.name} + {key.name}
@@ -299,4 +305,4 @@ export default function ModelProviderKeysTableView({ provider, className, header )}
); -} \ No newline at end of file +} From be8c682caae24beb57e2b007289f56eae585d918 Mon Sep 17 00:00:00 2001 From: Suresh Chaudhary <83772622+impoiler@users.noreply.github.com> Date: Thu, 14 May 2026 15:52:39 +0530 Subject: [PATCH 26/81] feat: replace inline edit/delete buttons with dropdown menu in model limits table (#3483) ## Summary Replaces the individual inline Edit and Delete action buttons in the model limits table with a consolidated `DropdownMenu` (three-dot menu). The delete confirmation dialog is also lifted out of the per-row render loop and rendered once at the table level, driven by a `deleteModelConfigId` state value. ## Changes - Replaced per-row Edit and Delete buttons with a single `MoreHorizontal` icon button that opens a `DropdownMenu` containing Edit and Delete items. - Moved the `AlertDialog` for delete confirmation out of the table row loop into a single top-level instance, controlled by `deleteModelConfigId` state. This prevents multiple dialog instances from being mounted simultaneously. - Added `deletingModelConfig` derived from `deleteModelConfigId` via `useMemo`, keeping it in sync with the RTK cache similarly to `editingModelConfig`. - Cleared `deleteModelConfigId` on successful deletion to close the dialog automatically. - Removed the hover/focus-dependent opacity animation on the action cell since the dropdown replaces that pattern. ## Type of change - [ ] Bug fix - [ ] Feature - [x] Refactor - [ ] Documentation - [ ] Chore/CI ## Affected areas - [ ] Core (Go) - [ ] Transports (HTTP) - [ ] Providers/Integrations - [ ] Plugins - [x] UI (React) - [ ] Docs ## How to test 1. Navigate to the Model Limits table in the workspace. 2. Hover over any model limit row and click the `...` (MoreHorizontal) button. 3. Verify the dropdown shows **Edit** and **Delete** options. 4. Click **Edit** and confirm the model limit sheet opens with the correct config pre-populated. 5. Click **Delete** and confirm the confirmation dialog appears with the correct model name (truncated if over 30 characters). 6. Confirm deletion succeeds, the dialog closes, and a success toast is shown. 7. Confirm that users without update/delete RBAC access see the respective menu items disabled. ```sh cd ui pnpm i || npm i pnpm test || npm test pnpm build || npm run build ``` ## Screenshots/Recordings _Add before/after screenshots showing the old separate Edit/Delete buttons vs. the new dropdown menu._ ## Breaking changes - [x] No ## Related issues ## Security considerations RBAC checks for `Governance` update and delete operations are preserved on the new dropdown menu items. ## Checklist - [ ] I read `docs/contributing/README.md` and followed the guidelines - [ ] I added/updated tests where appropriate - [ ] I updated documentation where needed - [ ] I verified builds succeed (Go and UI) - [ ] I verified the CI pipeline passes locally if applicable --- .../model-limits/views/modelLimitsTable.tsx | 113 +++++++++++------- 1 file changed, 68 insertions(+), 45 deletions(-) diff --git a/ui/app/workspace/model-limits/views/modelLimitsTable.tsx b/ui/app/workspace/model-limits/views/modelLimitsTable.tsx index b8aff71d51..0af00a4a6d 100644 --- a/ui/app/workspace/model-limits/views/modelLimitsTable.tsx +++ b/ui/app/workspace/model-limits/views/modelLimitsTable.tsx @@ -7,10 +7,10 @@ import { AlertDialogFooter, AlertDialogHeader, AlertDialogTitle, - AlertDialogTrigger, } from "@/components/ui/alertDialog"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; +import { DropdownMenu, DropdownMenuContent, DropdownMenuItem, DropdownMenuTrigger } from "@/components/ui/dropdownMenu"; import { Input } from "@/components/ui/input"; import { Progress } from "@/components/ui/progress"; import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table"; @@ -23,7 +23,7 @@ import { ModelConfig } from "@/lib/types/governance"; import { cn } from "@/lib/utils"; import { formatCurrency } from "@/lib/utils/governance"; import { RbacOperation, RbacResource, useRbac } from "@enterprise/lib"; -import { ChevronLeft, ChevronRight, Edit, Plus, Search, Trash2 } from "lucide-react"; +import { ChevronLeft, ChevronRight, Edit, MoreHorizontal, Plus, Search, Trash2 } from "lucide-react"; import { useMemo, useState } from "react"; import { toast } from "sonner"; import ModelLimitSheet from "./modelLimitSheet"; @@ -63,12 +63,17 @@ export default function ModelLimitsTable({ }: ModelLimitsTableProps) { const [showModelLimitSheet, setShowModelLimitSheet] = useState(false); const [editingModelConfigId, setEditingModelConfigId] = useState(null); + const [deleteModelConfigId, setDeleteModelConfigId] = useState(null); // Derive editingModelConfig from props so it stays in sync with RTK cache updates const editingModelConfig = useMemo( () => (editingModelConfigId ? (modelConfigs.find((mc) => mc.id === editingModelConfigId) ?? null) : null), [editingModelConfigId, modelConfigs], ); + const deletingModelConfig = useMemo( + () => (deleteModelConfigId ? (modelConfigs.find((mc) => mc.id === deleteModelConfigId) ?? null) : null), + [deleteModelConfigId, modelConfigs], + ); const hasCreateAccess = useRbac(RbacResource.Governance, RbacOperation.Create); const hasUpdateAccess = useRbac(RbacResource.Governance, RbacOperation.Update); @@ -80,6 +85,7 @@ export default function ModelLimitsTable({ try { await deleteModelConfig(id).unwrap(); toast.success("Model limit deleted successfully"); + setDeleteModelConfigId(null); } catch (error) { toast.error(getErrorMessage(error)); } @@ -90,8 +96,7 @@ export default function ModelLimitsTable({ setShowModelLimitSheet(true); }; - const handleEditModelLimit = (config: ModelConfig, e: React.MouseEvent) => { - e.stopPropagation(); + const handleEditModelLimit = (config: ModelConfig) => { setEditingModelConfigId(config.id); setShowModelLimitSheet(true); }; @@ -120,6 +125,30 @@ export default function ModelLimitsTable({ {showModelLimitSheet && ( setShowModelLimitSheet(false)} /> )} + !open && setDeleteModelConfigId(null)}> + + + Delete Model Limit + + Are you sure you want to delete the limit for " + {deletingModelConfig?.model_name && deletingModelConfig.model_name.length > 30 + ? `${deletingModelConfig.model_name.slice(0, 30)}...` + : deletingModelConfig?.model_name} + "? This action cannot be undone. + + + + Cancel + deletingModelConfig && handleDelete(deletingModelConfig.id)} + disabled={isDeleting} + className="bg-red-600 hover:bg-red-700" + > + {isDeleting ? "Deleting..." : "Delete"} + + + +
@@ -341,53 +370,47 @@ export default function ModelLimitsTable({ )} e.stopPropagation()}> -
- - - +
+ + e.stopPropagation()}> + + + { + e.stopPropagation(); + handleEditModelLimit(config); + }} + data-testid={`model-limit-button-edit-${toTestIdPart(config.model_name)}-${toTestIdPart(config.provider || "all")}`} + > + + Edit + + { + e.stopPropagation(); + setDeleteModelConfigId(config.id); + }} data-testid={`model-limit-button-delete-${toTestIdPart(config.model_name)}-${toTestIdPart(config.provider || "all")}`} > - - - - - Delete Model Limit - - Are you sure you want to delete the limit for " - {config.model_name.length > 30 ? `${config.model_name.slice(0, 30)}...` : config.model_name} - "? This action cannot be undone. - - - - Cancel - handleDelete(config.id)} - disabled={isDeleting} - className="bg-red-600 hover:bg-red-700" - > - {isDeleting ? "Deleting..." : "Delete"} - - - - + Delete + + +
@@ -431,4 +454,4 @@ export default function ModelLimitsTable({
); -} \ No newline at end of file +} From a393ff9908679f997898960cd718e5f44f4f3659 Mon Sep 17 00:00:00 2001 From: Suresh Chaudhary <83772622+impoiler@users.noreply.github.com> Date: Thu, 14 May 2026 15:53:39 +0530 Subject: [PATCH 27/81] feat: replace routing rule action buttons with dropdown menu (#3484) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Replaces the individual Edit and Delete action buttons in the routing rules table with a consolidated `DropdownMenu` triggered by a `MoreHorizontal` icon. This reduces visual clutter in the actions column and provides a more consistent UX pattern for row-level actions. ## Changes - Replaced separate Edit and Delete ghost buttons with a single `MoreHorizontal` icon button that opens a dropdown menu containing both actions - Edit and Delete items within the dropdown remain gated by `canUpdate` and `canDelete` permissions respectively, now using the `disabled` prop instead of conditional rendering - The Delete item uses the `destructive` variant to visually distinguish it from the Edit action - Changed `catch (error: any)` to `catch (error: unknown)` for improved type safety - Added a trailing newline to the end of the file ## Type of change - [ ] Bug fix - [ ] Feature - [x] Refactor - [ ] Documentation - [ ] Chore/CI ## Affected areas - [ ] Core (Go) - [ ] Transports (HTTP) - [ ] Providers/Integrations - [ ] Plugins - [x] UI (React) - [ ] Docs ## How to test Navigate to the routing rules table and verify: 1. Each row displays a `MoreHorizontal` icon button in the actions column 2. Clicking the icon opens a dropdown with Edit and Delete options 3. Edit and Delete options are disabled when the user lacks the respective permissions 4. Selecting Edit opens the edit flow for the correct rule 5. Selecting Delete triggers the delete confirmation dialog for the correct rule 6. Row click propagation is not triggered when interacting with the dropdown ```sh cd ui pnpm i || npm i pnpm test || npm test pnpm build || npm run build ``` ## Screenshots/Recordings Before: Two separate icon buttons (pencil and trash) visible inline on each row. After: A single `⋯` icon button per row that reveals Edit and Delete options in a dropdown, with Delete styled in red. ## Breaking changes - [ ] Yes - [x] No ## Related issues ## Security considerations No security implications. Permission checks (`canUpdate`, `canDelete`) are preserved. ## Checklist - [ ] I read `docs/contributing/README.md` and followed the guidelines - [ ] I added/updated tests where appropriate - [ ] I updated documentation where needed - [ ] I verified builds succeed (Go and UI) - [ ] I verified the CI pipeline passes locally if applicable --- .../routing-rules/views/routingRulesTable.tsx | 71 ++++++++++++------- 1 file changed, 45 insertions(+), 26 deletions(-) diff --git a/ui/app/workspace/routing-rules/views/routingRulesTable.tsx b/ui/app/workspace/routing-rules/views/routingRulesTable.tsx index 9e2f7df8b0..b61ac1f892 100644 --- a/ui/app/workspace/routing-rules/views/routingRulesTable.tsx +++ b/ui/app/workspace/routing-rules/views/routingRulesTable.tsx @@ -15,6 +15,7 @@ import { } from "@/components/ui/alertDialog"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; +import { DropdownMenu, DropdownMenuContent, DropdownMenuItem, DropdownMenuTrigger } from "@/components/ui/dropdownMenu"; import { Input } from "@/components/ui/input"; import { Switch } from "@/components/ui/switch"; import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table"; @@ -24,7 +25,7 @@ import { getErrorMessage } from "@/lib/store"; import { useDeleteRoutingRuleMutation, useUpdateRoutingRuleMutation } from "@/lib/store/apis/routingRulesApi"; import { RoutingRule, RoutingTarget } from "@/lib/types/routingRules"; import { getPriorityBadgeClass, getScopeLabel, truncateCELExpression } from "@/lib/utils/routingRules"; -import { ChevronLeft, ChevronRight, Edit, Search, Trash2 } from "lucide-react"; +import { ChevronLeft, ChevronRight, Edit, MoreHorizontal, Search, Trash2 } from "lucide-react"; import { useState } from "react"; import { toast } from "sonner"; @@ -70,7 +71,7 @@ export function RoutingRulesTable({ await deleteRoutingRule(deleteRuleId).unwrap(); toast.success("Routing rule deleted successfully"); setDeleteRuleId(null); - } catch (error: any) { + } catch (error: unknown) { toast.error(getErrorMessage(error)); } }; @@ -190,29 +191,47 @@ export function RoutingRulesTable({ />
e.stopPropagation()}> -
- {canUpdate && ( - - )} - {canDelete && ( - - )} +
+ + e.stopPropagation()}> + + + + { + e.stopPropagation(); + onEdit(rule); + }} + data-testid={`routing-rule-edit-${rule.id}-btn`} + > + + Edit + + { + e.stopPropagation(); + setDeleteRuleId(rule.id); + }} + data-testid={`routing-rule-delete-${rule.id}-btn`} + > + + Delete + + +
@@ -294,4 +313,4 @@ function TargetsSummary({ targets }: { targets: RoutingTarget[] }) { )}
); -} \ No newline at end of file +} From d1f342a13f63040a41e60b9274d081f371e83b4f Mon Sep 17 00:00:00 2001 From: Suresh Chaudhary <83772622+impoiler@users.noreply.github.com> Date: Thu, 14 May 2026 15:54:38 +0530 Subject: [PATCH 28/81] feat: replace inline edit/delete buttons with dropdown menu in pricing overrides table (#3485) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Replaces the individual Edit and Delete action buttons in the pricing overrides table with a consolidated `DropdownMenu` triggered by a `MoreHorizontal` icon. Also fixes a sidebar active state bug where sub-items were incorrectly matching routes, and adds `hasAPIKeyAccess` to the sidebar's memoization dependencies. ## Changes - Replaced separate Edit and Delete icon buttons in the pricing overrides table rows with a single `MoreHorizontal` actions dropdown containing labeled Edit and Delete menu items. The Delete item uses the destructive variant for visual clarity. - Fixed sidebar sub-item active state detection to use `isRouteMatch` instead of `pathname.startsWith`, preventing incorrect active highlighting on partial path matches. - Added `hasAPIKeyAccess` to the sidebar's `useMemo` dependency array, which was previously missing and could cause stale renders. ## Type of change - [ ] Bug fix - [x] Feature - [x] Refactor - [ ] Documentation - [ ] Chore/CI ## Affected areas - [ ] Core (Go) - [ ] Transports (HTTP) - [ ] Providers/Integrations - [ ] Plugins - [x] UI (React) - [ ] Docs ## How to test 1. Navigate to the custom pricing overrides table. 2. Hover over a row and click the `⋯` (MoreHorizontal) button — a dropdown should appear with **Edit** and **Delete** options. 3. Clicking **Edit** should open the edit drawer without triggering row selection. 4. Clicking **Delete** should open the delete confirmation dialog without triggering row selection. 5. Verify sidebar sub-item active states are correct when navigating between nested routes — only the exact matching route should appear active. ```sh cd ui pnpm i || npm i pnpm test || npm test pnpm build || npm run build ``` ## Screenshots/Recordings Before: Two separate ghost icon buttons (pencil and trash) visible inline on each row. After: A single `⋯` button per row that reveals a dropdown with labeled **Edit** and **Delete** actions. ## Breaking changes - [ ] Yes - [x] No ## Related issues ## Security considerations No security implications. ## Checklist - [ ] I read `docs/contributing/README.md` and followed the guidelines - [ ] I added/updated tests where appropriate - [ ] I updated documentation where needed - [ ] I verified builds succeed (Go and UI) - [ ] I verified the CI pipeline passes locally if applicable --- .../overrides/scopedPricingOverridesView.tsx | 63 ++++++++++------ ui/components/sidebar.tsx | 73 +++++++++---------- 2 files changed, 78 insertions(+), 58 deletions(-) diff --git a/ui/app/workspace/custom-pricing/overrides/scopedPricingOverridesView.tsx b/ui/app/workspace/custom-pricing/overrides/scopedPricingOverridesView.tsx index 4aa25c7fa1..baf006da28 100644 --- a/ui/app/workspace/custom-pricing/overrides/scopedPricingOverridesView.tsx +++ b/ui/app/workspace/custom-pricing/overrides/scopedPricingOverridesView.tsx @@ -10,6 +10,7 @@ import { } from "@/components/ui/alertDialog"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; +import { DropdownMenu, DropdownMenuContent, DropdownMenuItem, DropdownMenuTrigger } from "@/components/ui/dropdownMenu"; import { Input } from "@/components/ui/input"; import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table"; import { useDebouncedValue } from "@/hooks/useDebounce"; @@ -25,7 +26,7 @@ import { import { useGetAllKeysQuery } from "@/lib/store/apis/providersApi"; import { PricingOverride, PricingOverrideScopeKind } from "@/lib/types/governance"; import { useLocation } from "@tanstack/react-router"; -import { ChevronLeft, ChevronRight, Edit, Plus, Search, Trash2 } from "lucide-react"; +import { ChevronLeft, ChevronRight, Edit, MoreHorizontal, Plus, Search, Trash2 } from "lucide-react"; import { useEffect, useMemo, useState } from "react"; import { toast } from "sonner"; import PricingOverrideSheet from "./pricingOverrideSheet"; @@ -312,25 +313,45 @@ export default function ScopedPricingOverridesView() { {keyLabel(row, providerKeyLabelMap)} {row.pattern} e.stopPropagation()}> -
- - +
+ + event.stopPropagation()}> + + + + { + event.stopPropagation(); + openEditDrawer(row); + }} + > + + Edit + + { + event.stopPropagation(); + setDeleteTarget(row); + }} + > + + Delete + + +
@@ -407,4 +428,4 @@ export default function ScopedPricingOverridesView() {
); -} \ No newline at end of file +} diff --git a/ui/components/sidebar.tsx b/ui/components/sidebar.tsx index ba2132a2a7..67a8598e49 100644 --- a/ui/components/sidebar.tsx +++ b/ui/components/sidebar.tsx @@ -260,15 +260,14 @@ const SidebarItemView = ({ const isHighlighted = !hasSubItems && highlightedUrl === item.url; - const buttonClassName = `relative h-7.5 cursor-pointer rounded-sm border px-3 transition-all duration-200 ${ - isHighlighted - ? "bg-sidebar-accent text-accent-foreground border-primary/20" - : isActive || isAnySubItemActive - ? "bg-sidebar-accent text-primary border-primary/20" - : item.hasAccess - ? "hover:bg-sidebar-accent hover:text-accent-foreground border-transparent text-slate-500 dark:text-zinc-400" - : "hover:bg-destructive/5 hover:text-muted-foreground text-muted-foreground cursor-not-allowed border-transparent" - } `; + const buttonClassName = `relative h-7.5 cursor-pointer rounded-sm border px-3 transition-all duration-200 ${isHighlighted + ? "bg-sidebar-accent text-accent-foreground border-primary/20" + : isActive || isAnySubItemActive + ? "bg-sidebar-accent text-primary border-primary/20" + : item.hasAccess + ? "hover:bg-sidebar-accent hover:text-accent-foreground border-transparent text-slate-500 dark:text-zinc-400" + : "hover:bg-destructive/5 hover:text-muted-foreground text-muted-foreground cursor-not-allowed border-transparent" + } `; const innerContent = (
@@ -407,7 +406,7 @@ const SidebarItemView = ({ const href = getSidebarItemHref(subItem); const isSubItemActive = subItem.queryParam ? pathname === subItem.url - : pathname.startsWith(subItem.url); + : isRouteMatch(subItem.url); const SubItemIcon = subItem.icon; const subSlug = slug(subItem.title); const inner = ( @@ -495,15 +494,14 @@ const SidebarItemView = ({ ? subItemHref.startsWith(highlightedUrl) : false; const SubItemIcon = subItem.icon; - const subItemClassName = `h-7 cursor-pointer rounded-sm px-2 transition-all duration-200 ${ - isSubItemHighlighted - ? "bg-sidebar-accent text-accent-foreground" - : isSubItemActive - ? "bg-sidebar-accent text-primary font-medium" - : subItem.hasAccess === false - ? "hover:bg-destructive/5 hover:text-muted-foreground text-muted-foreground cursor-not-allowed border-transparent" - : "hover:bg-sidebar-accent hover:text-accent-foreground text-slate-500 dark:text-zinc-400" - }`; + const subItemClassName = `h-7 cursor-pointer rounded-sm px-2 transition-all duration-200 ${isSubItemHighlighted + ? "bg-sidebar-accent text-accent-foreground" + : isSubItemActive + ? "bg-sidebar-accent text-primary font-medium" + : subItem.hasAccess === false + ? "hover:bg-destructive/5 hover:text-muted-foreground text-muted-foreground cursor-not-allowed border-transparent" + : "hover:bg-sidebar-accent hover:text-accent-foreground text-slate-500 dark:text-zinc-400" + }`; const subInner = (
{SubItemIcon && ( @@ -950,14 +948,14 @@ export default function AppSidebar() { }, ...(isDbConnected ? [ - { - title: "Prompt Repository", - url: "/workspace/prompt-repo", - icon: FolderGit, - description: "Prompt repository", - hasAccess: hasPromptRepositoryAccess, - }, - ] + { + title: "Prompt Repository", + url: "/workspace/prompt-repo", + icon: FolderGit, + description: "Prompt repository", + hasAccess: hasPromptRepositoryAccess, + }, + ] : []), { title: "Evals", @@ -1005,14 +1003,14 @@ export default function AppSidebar() { }, ...(IS_ENTERPRISE ? [ - { - title: "Proxy", - url: "/workspace/config/proxy", - icon: Globe, - description: "Proxy configuration", - hasAccess: hasSettingsAccess, - }, - ] + { + title: "Proxy", + url: "/workspace/config/proxy", + icon: Globe, + description: "Proxy configuration", + hasAccess: hasSettingsAccess, + }, + ] : []), { title: "API Keys", @@ -1033,6 +1031,7 @@ export default function AppSidebar() { ], [ hasLogsAccess, + hasAPIKeyAccess, hasObservabilityAccess, hasModelProvidersAccess, hasMCPGatewayAccess, @@ -1544,8 +1543,8 @@ export default function AppSidebar() { ))} {IS_ENTERPRISE && - userInfo && - (userInfo.name || userInfo.email) ? ( + userInfo && + (userInfo.name || userInfo.email) ? ( Date: Thu, 14 May 2026 15:55:42 +0530 Subject: [PATCH 29/81] feat: replace inline action buttons with dropdown menu and pin actions column in MCP clients table (#3486) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Replaces the per-row inline action buttons (reconnect + delete) in the MCP clients table with a consolidated `MoreHorizontal` dropdown menu, and moves the actions column to a sticky right-pinned position so it remains visible when the table scrolls horizontally. ## Changes - Replaced the individual `Reconnect` (with tooltip) and `Delete` (with inline `AlertDialog`) buttons with a single `DropdownMenu` triggered by a `MoreHorizontal` icon button. - The delete confirmation `AlertDialog` is now lifted out of the table row and controlled via a `clientToDelete` state variable, preventing multiple dialog instances from being mounted inside the DOM simultaneously. - The actions column header and cell are now `sticky right-0` with `PIN_SHADOW_RIGHT` applied, keeping the actions visible during horizontal scroll. - The table container changed from `overflow-hidden` to `overflow-auto` to enable horizontal scrolling. - Reconnect and delete menu items are conditionally rendered based on RBAC access, rather than being rendered-but-disabled. - The `MoreHorizontal` button shows a `Loader2` spinner while a reconnect is in progress for that row. - Added `group` class to table rows to allow the sticky actions cell to mirror the row hover background. ## Type of change - [ ] Bug fix - [ ] Feature - [x] Refactor - [ ] Documentation - [ ] Chore/CI ## Affected areas - [ ] Core (Go) - [ ] Transports (HTTP) - [ ] Providers/Integrations - [ ] Plugins - [x] UI (React) - [ ] Docs ## How to test 1. Navigate to the MCP Registry page in the workspace UI. 2. Verify each MCP client row shows a `⋯` (MoreHorizontal) button in the rightmost column. 3. Click the button and confirm the dropdown shows **Reconnect** and **Delete** options (subject to RBAC permissions). 4. Select **Reconnect** and confirm the spinner appears on the button while reconnecting. 5. Select **Delete** and confirm the confirmation dialog appears with the correct server name, and that confirming removes the client. 6. Resize the browser window to trigger horizontal scrolling and confirm the actions column remains pinned to the right. ```sh cd ui pnpm i || npm i pnpm build || npm run build ``` ## Screenshots/Recordings Before/after screenshots recommended showing the old inline icon buttons vs. the new dropdown menu. ## Breaking changes - [x] No ## Related issues ## Security considerations RBAC checks are preserved — reconnect and delete menu items are only rendered when the user has the corresponding `Update` or `Delete` permission on `MCPGateway`. ## Checklist - [ ] I read `docs/contributing/README.md` and followed the guidelines - [ ] I added/updated tests where appropriate - [ ] I updated documentation where needed - [ ] I verified builds succeed (Go and UI) - [ ] I verified the CI pipeline passes locally if applicable --- .../mcp-registry/views/mcpClientsTable.tsx | 148 +++++++++--------- 1 file changed, 77 insertions(+), 71 deletions(-) diff --git a/ui/app/workspace/mcp-registry/views/mcpClientsTable.tsx b/ui/app/workspace/mcp-registry/views/mcpClientsTable.tsx index 572e034ecc..889c62788c 100644 --- a/ui/app/workspace/mcp-registry/views/mcpClientsTable.tsx +++ b/ui/app/workspace/mcp-registry/views/mcpClientsTable.tsx @@ -8,19 +8,24 @@ import { AlertDialogFooter, AlertDialogHeader, AlertDialogTitle, - AlertDialogTrigger, } from "@/components/ui/alertDialog"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; +import { + DropdownMenu, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuTrigger, +} from "@/components/ui/dropdownMenu"; +import { PIN_SHADOW_RIGHT } from "@/components/table/columnPinning"; import { Input } from "@/components/ui/input"; import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table"; -import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip"; import { useToast } from "@/hooks/use-toast"; import { MCP_STATUS_COLORS } from "@/lib/constants/config"; import { getErrorMessage, useDeleteMCPClientMutation, useReconnectMCPClientMutation } from "@/lib/store"; import { MCPClient } from "@/lib/types/mcp"; import { RbacOperation, RbacResource, useRbac } from "@enterprise/lib"; -import { ChevronLeft, ChevronRight, Loader2, Plus, RefreshCcw, Search, Trash2 } from "lucide-react"; +import { ChevronLeft, ChevronRight, Loader2, MoreHorizontal, Plus, RefreshCcw, Search, Trash2 } from "lucide-react"; import { useState } from "react"; import { MCPServersEmptyState } from "./mcpServersEmptyState"; import MCPClientSheet from "./mcpClientSheet"; @@ -53,6 +58,7 @@ export default function MCPClientsTable({ const hasUpdateMCPClientAccess = useRbac(RbacResource.MCPGateway, RbacOperation.Update); const hasDeleteMCPClientAccess = useRbac(RbacResource.MCPGateway, RbacOperation.Delete); const [selectedMCPClient, setSelectedMCPClient] = useState(null); + const [clientToDelete, setClientToDelete] = useState(null); const [showDetailSheet, setShowDetailSheet] = useState(false); const { toast } = useToast(); @@ -177,6 +183,27 @@ export default function MCPClientsTable({ {showDetailSheet && selectedMCPClient && ( )} + !open && setClientToDelete(null)}> + + + Remove MCP Server + + Are you sure you want to remove MCP server {clientToDelete?.config.name}? You will need to reconnect the server to continue using it. + + + + Cancel + { + if (clientToDelete) void handleDelete(clientToDelete); + }} + className="bg-destructive hover:bg-destructive/90" + > + Delete + + + +
@@ -204,7 +231,7 @@ export default function MCPClientsTable({
-
+
@@ -217,7 +244,7 @@ export default function MCPClientsTable({ Auto-execute Tools State Enabled - + @@ -245,7 +272,7 @@ export default function MCPClientsTable({ return ( handleRowClick(c)} > {c.config.name} @@ -287,76 +314,55 @@ export default function MCPClientsTable({ {c.config.disabled ? "Disabled" : "Enabled"} - e.stopPropagation()}> - - - {/* The wrapping is required: Radix Tooltip (and native title) don't fire on disabled buttons because the browser swallows pointer events. The span receives them and forwards to the tooltip. */} - - - - - - - {isPerUserOAuth - ? "Reconnect is not applicable for per-user OAuth, each user manages their own auth." - : c.config.disabled - ? "Enable the client before reconnecting." - : "Reconnect"} - - - - - - + e.stopPropagation()} + > + + - - - - Remove MCP Server - - Are you sure you want to remove MCP server {c.config.name}? You will need to reconnect the server to continue - using it. - - - - Cancel - handleDelete(c)} className="bg-destructive hover:bg-destructive/90"> + + + {hasUpdateMCPClientAccess && ( + { + e.preventDefault(); + void handleReconnect(c); + }} + > + + Reconnect + + )} + {hasDeleteMCPClientAccess && ( + { + e.preventDefault(); + setClientToDelete(c); + }} + > + Delete - - - - + + )} + + ); @@ -400,4 +406,4 @@ export default function MCPClientsTable({ {formOpen && setFormOpen(false)} onSaved={handleSaved} />} ); -} \ No newline at end of file +} From f2a270b6ed6231be66ca34244a713cb5bfbfd55c Mon Sep 17 00:00:00 2001 From: Suresh Chaudhary <83772622+impoiler@users.noreply.github.com> Date: Thu, 14 May 2026 16:00:33 +0530 Subject: [PATCH 30/81] feat: replace inline action buttons with pinned dropdown menus and add active toggle switch in teams and virtual keys tables (#3487) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Replaces the inline edit/delete action buttons in the Teams and Virtual Keys tables with a consolidated `MoreHorizontal` dropdown menu per row. The actions column is now sticky-pinned to the right edge of the table so it remains visible when the table scrolls horizontally. The Virtual Keys table also replaces the status badge with an inline active/inactive toggle switch. ## Changes - Extracted `TeamActionsMenu` and `VKActionsMenu` components that render a `DropdownMenu` containing Edit and Delete items, with the delete confirmation `AlertDialog` controlled via local state rather than being triggered directly from an `AlertDialogTrigger`. - Removed the hover-only opacity animation on action buttons in favor of always-visible dropdown triggers. - The actions `TableHead` and `TableCell` are now sticky (`sticky right-0 z-10`) with `PIN_SHADOW_RIGHT` applied and background colors that match the row hover state, keeping the pinned column visually consistent. - Tables are given a `min-w` value and their container uses `overflow-auto` to support horizontal scrolling without breaking the sticky column. - `VKStatusBadge` is replaced by `VKActiveSwitch`, which renders a `Switch` component and calls `useUpdateVirtualKeyMutation` to toggle `is_active` inline. Managed-by-profile keys disable the switch and show a tooltip title. - The managed-by-profile delete tooltip/disabled-button pattern is replaced by a disabled destructive `DropdownMenuItem` with a `title` attribute. - `handleEditVirtualKey` no longer requires a `MouseEvent` argument since click propagation is handled at the cell level. ## Type of change - [ ] Bug fix - [ ] Feature - [x] Refactor - [ ] Documentation - [ ] Chore/CI ## Affected areas - [ ] Core (Go) - [ ] Transports (HTTP) - [ ] Providers/Integrations - [ ] Plugins - [x] UI (React) - [ ] Docs ## How to test 1. Navigate to the **Governance → Teams** page. - Verify the actions column stays pinned to the right when scrolling horizontally. - Click the `MoreHorizontal` button on a row and confirm Edit and Delete items appear. - Confirm the delete confirmation dialog opens from the dropdown and completes successfully. 2. Navigate to the **Virtual Keys** page. - Verify the active toggle switch reflects the current `is_active` state and toggling it updates the key immediately with a success toast. - Confirm keys managed by an access profile show a disabled switch and a disabled Delete item in the dropdown. - Verify the sticky actions column behaves correctly on horizontal scroll. ```sh cd ui pnpm i pnpm build ``` ## Screenshots/Recordings _Add before/after screenshots showing the dropdown menu and sticky column behavior._ ## Breaking changes - [x] No ## Related issues ## Security considerations No new auth surfaces introduced. RBAC checks (`hasUpdateAccess`, `hasDeleteAccess`) are preserved on all action items. ## Checklist - [ ] I read `docs/contributing/README.md` and followed the guidelines - [ ] I added/updated tests where appropriate - [ ] I updated documentation where needed - [ ] I verified builds succeed (Go and UI) - [ ] I verified the CI pipeline passes locally if applicable --- .../governance/views/customerTable.tsx | 156 +++++++----- .../workspace/governance/views/teamsTable.tsx | 159 ++++++++----- .../virtual-keys/views/virtualKeysTable.tsx | 222 ++++++++++-------- 3 files changed, 333 insertions(+), 204 deletions(-) diff --git a/ui/app/workspace/governance/views/customerTable.tsx b/ui/app/workspace/governance/views/customerTable.tsx index 50f4b9b127..c3982f2455 100644 --- a/ui/app/workspace/governance/views/customerTable.tsx +++ b/ui/app/workspace/governance/views/customerTable.tsx @@ -1,3 +1,4 @@ +import { PIN_SHADOW_RIGHT } from "@/components/table/columnPinning"; import { AlertDialog, AlertDialogAction, @@ -7,10 +8,10 @@ import { AlertDialogFooter, AlertDialogHeader, AlertDialogTitle, - AlertDialogTrigger, } from "@/components/ui/alertDialog"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; +import { DropdownMenu, DropdownMenuContent, DropdownMenuItem, DropdownMenuTrigger } from "@/components/ui/dropdownMenu"; import { Progress } from "@/components/ui/progress"; import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table"; import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip"; @@ -21,7 +22,7 @@ import { cn } from "@/lib/utils"; import { formatCurrency } from "@/lib/utils/governance"; import { RbacOperation, RbacResource, useRbac } from "@enterprise/lib"; import { Input } from "@/components/ui/input"; -import { ChevronLeft, ChevronRight, Edit, Plus, Search, Trash2 } from "lucide-react"; +import { ChevronLeft, ChevronRight, Edit, MoreHorizontal, Plus, Search, Trash2 } from "lucide-react"; import { useState } from "react"; import { toast } from "sonner"; import CustomerDialog from "./customerDialog"; @@ -32,6 +33,61 @@ const formatResetDuration = (duration: string) => { return resetDurationLabels[duration] || duration; }; +const ACTIONS_COLUMN_CLASS = `sticky right-0 z-10 w-[56px] min-w-[56px] text-right ${PIN_SHADOW_RIGHT}`; + +interface CustomerActionsMenuProps { + customer: Customer; + canUpdate: boolean; + canDelete: boolean; + onEdit: (customer: Customer) => void; + onDelete: (customer: Customer) => void; +} + +function CustomerActionsMenu({ customer, canUpdate, canDelete, onEdit, onDelete }: CustomerActionsMenuProps) { + return ( + + + + + + { + e.stopPropagation(); + onEdit(customer); + }} + > + + Edit + + { + e.stopPropagation(); + onDelete(customer); + }} + > + + Delete + + + + ); +} + interface CustomersTableProps { customers: Customer[]; totalCount: number; @@ -59,6 +115,7 @@ export default function CustomersTable({ }: CustomersTableProps) { const [showCustomerDialog, setShowCustomerDialog] = useState(false); const [editingCustomer, setEditingCustomer] = useState(null); + const [confirmDeleteCustomer, setConfirmDeleteCustomer] = useState(null); const hasCreateAccess = useRbac(RbacResource.Customers, RbacOperation.Create); const hasUpdateAccess = useRbac(RbacResource.Customers, RbacOperation.Update); @@ -72,6 +129,8 @@ export default function CustomersTable({ toast.success("Customer deleted successfully"); } catch (error) { toast.error(getErrorMessage(error)); + } finally { + setConfirmDeleteCustomer(null); } }; @@ -147,8 +206,8 @@ export default function CustomersTable({ -
-
+
+
Name @@ -156,7 +215,7 @@ export default function CustomersTable({ Budget Rate Limit Virtual Keys - + @@ -363,54 +422,20 @@ export default function CustomersTable({ - )} - -
- - - - - - - - Delete Customer - - Are you sure you want to delete "{customer.name}"? This will also delete all associated teams - and unassign any virtual keys. This action cannot be undone. - - - - Cancel - handleDelete(customer.id)} - disabled={isDeleting} - className="bg-red-600 hover:bg-red-700" - > - {isDeleting ? "Deleting..." : "Delete"} - - - - -
+ + ); @@ -449,7 +474,30 @@ export default function CustomersTable({ )} + + !open && setConfirmDeleteCustomer(null)}> + + + Delete Customer + + Are you sure you want to delete "{confirmDeleteCustomer?.name}"? This will also delete all associated teams and + unassign any virtual keys. This action cannot be undone. + + + + Cancel + confirmDeleteCustomer && handleDelete(confirmDeleteCustomer.id)} + disabled={isDeleting} + className="bg-red-600 hover:bg-red-700" + > + {isDeleting ? "Deleting..." : "Delete"} + + + + ); -} \ No newline at end of file +} diff --git a/ui/app/workspace/governance/views/teamsTable.tsx b/ui/app/workspace/governance/views/teamsTable.tsx index 4445f057b0..98c3eae6b9 100644 --- a/ui/app/workspace/governance/views/teamsTable.tsx +++ b/ui/app/workspace/governance/views/teamsTable.tsx @@ -7,10 +7,16 @@ import { AlertDialogFooter, AlertDialogHeader, AlertDialogTitle, - AlertDialogTrigger, } from "@/components/ui/alertDialog"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; +import { PIN_SHADOW_RIGHT } from "@/components/table/columnPinning"; +import { + DropdownMenu, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuTrigger, +} from "@/components/ui/dropdownMenu"; import { Progress } from "@/components/ui/progress"; import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table"; import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip"; @@ -21,8 +27,8 @@ import { cn } from "@/lib/utils"; import { formatCurrency } from "@/lib/utils/governance"; import { RbacOperation, RbacResource, useRbac } from "@enterprise/lib"; import { Input } from "@/components/ui/input"; -import { ChevronLeft, ChevronRight, Edit, Plus, Search, Trash2 } from "lucide-react"; -import { useEffect } from "react"; +import { ChevronLeft, ChevronRight, Edit, MoreHorizontal, Plus, Search, Trash2 } from "lucide-react"; +import { useEffect, useState } from "react"; import { toast } from "sonner"; import TeamDialog from "./teamDialog"; import { TeamsEmptyState } from "./teamsEmptyState"; @@ -32,6 +38,89 @@ const formatResetDuration = (duration: string) => { return resetDurationLabels[duration] || duration; }; +function TeamActionsMenu({ + team, + hasUpdateAccess, + hasDeleteAccess, + isDeleting, + onEdit, + onDelete, +}: { + team: Team; + hasUpdateAccess: boolean; + hasDeleteAccess: boolean; + isDeleting: boolean; + onEdit: (team: Team) => void; + onDelete: (teamId: string) => void; +}) { + const [deleteOpen, setDeleteOpen] = useState(false); + + return ( + <> + + + + + + { + e.preventDefault(); + onEdit(team); + }} + > + + Edit + + { + e.preventDefault(); + setDeleteOpen(true); + }} + > + + Delete + + + + + + + Delete Team + + Are you sure you want to delete "{team.name}"? This will also unassign any virtual keys from this team. This action cannot be undone. + + + + Cancel + onDelete(team.id)} + disabled={isDeleting} + className="bg-red-600 hover:bg-red-700" + > + {isDeleting ? "Deleting..." : "Delete"} + + + + + + ); +} + interface TeamsTableProps { teams: Team[]; totalCount: number; @@ -164,8 +253,8 @@ export default function TeamsTable({ -
-
+
+
Name @@ -173,7 +262,7 @@ export default function TeamsTable({ Budget Rate Limit Virtual Keys - + @@ -374,53 +463,15 @@ export default function TeamsTable({ - )} - -
- - - - - - - - Delete Team - - Are you sure you want to delete "{team.name}"? This will also unassign any virtual keys from - this team. This action cannot be undone. - - - - Cancel - handleDelete(team.id)} - disabled={isDeleting} - className="bg-red-600 hover:bg-red-700" - > - {isDeleting ? "Deleting..." : "Delete"} - - - - -
+ + ); @@ -462,4 +513,4 @@ export default function TeamsTable({ ); -} \ No newline at end of file +} diff --git a/ui/app/workspace/virtual-keys/views/virtualKeysTable.tsx b/ui/app/workspace/virtual-keys/views/virtualKeysTable.tsx index 5bd9a3ee6b..88b3d873e8 100644 --- a/ui/app/workspace/virtual-keys/views/virtualKeysTable.tsx +++ b/ui/app/workspace/virtual-keys/views/virtualKeysTable.tsx @@ -1,4 +1,5 @@ import { RateLimitDisplay } from "@/components/rateLimitDisplay"; +import { PIN_SHADOW_RIGHT } from "@/components/table/columnPinning"; import { AlertDialog, AlertDialogAction, @@ -8,19 +9,24 @@ import { AlertDialogFooter, AlertDialogHeader, AlertDialogTitle, - AlertDialogTrigger, } from "@/components/ui/alertDialog"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; import { ComboboxSelect } from "@/components/ui/combobox"; import { Dialog, DialogContent, DialogDescription, DialogFooter, DialogHeader, DialogTitle } from "@/components/ui/dialog"; +import { + DropdownMenu, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuTrigger, +} from "@/components/ui/dropdownMenu"; import { Input } from "@/components/ui/input"; import { Label } from "@/components/ui/label"; +import { Switch } from "@/components/ui/switch"; import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table"; -import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip"; import { useCopyToClipboard } from "@/hooks/useCopyToClipboard"; import { resetDurationLabels } from "@/lib/constants/governance"; -import { getErrorMessage, useDeleteVirtualKeyMutation, useLazyGetVirtualKeysQuery } from "@/lib/store"; +import { getErrorMessage, useDeleteVirtualKeyMutation, useLazyGetVirtualKeysQuery, useUpdateVirtualKeyMutation } from "@/lib/store"; import { Customer, Team, VirtualKey } from "@/lib/types/governance"; import { cn } from "@/lib/utils"; import { formatCurrency } from "@/lib/utils/governance"; @@ -37,6 +43,7 @@ import { Eye, EyeOff, Loader2, + MoreHorizontal, Plus, Search, ShieldCheck, @@ -113,96 +120,110 @@ function VKRateLimitCell({ vk }: { vk: VirtualKey }) { return ; } -// Status badge derives exhaustion from the same AP-backed source as the budget/rate-limit cells -// so managed keys don't show "Active" next to an exhausted-looking bar. -function VKStatusBadge({ vk }: { vk: VirtualKey }) { - const { isExhausted } = useVirtualKeyUsage(vk); +function VKActiveSwitch({ + vk, + hasUpdateAccess, + onToggle, +}: { + vk: VirtualKey; + hasUpdateAccess: boolean; + onToggle: (vk: VirtualKey, checked: boolean) => Promise; +}) { + const { isManagedByProfile } = useVirtualKeyUsage(vk); + return ( - - {vk.is_active ? (isExhausted ? "Exhausted" : "Active") : "Inactive"} - + onToggle(vk, checked)} + /> ); } -// Per-row delete button. Calls useVirtualKeyUsage (same cached query as the budget/ -// rate-limit cells — RTK dedupes) to detect managed-by-AP VKs and swap the normal -// delete AlertDialog for a disabled button + tooltip so users aren't lured into a -// confirm-then-403 loop. -function VKDeleteButton({ +function VKActionsMenu({ vk, + hasUpdateAccess, hasDeleteAccess, isDeleting, + onEdit, onDelete, }: { vk: VirtualKey; + hasUpdateAccess: boolean; hasDeleteAccess: boolean; isDeleting: boolean; + onEdit: (vk: VirtualKey) => void; onDelete: (vkId: string) => void; }) { const { isManagedByProfile } = useVirtualKeyUsage(vk); - - if (isManagedByProfile) { - return ( - - - - - - - - -

- This virtual key is managed by an access profile and can't be deleted here. Detach the profile from the user or delete it - from the access profile settings. -

-
-
-
- ); - } + const [deleteOpen, setDeleteOpen] = useState(false); return ( - - - - - - - Delete Virtual Key - - Are you sure you want to delete "{vk.name.length > 20 ? `${vk.name.slice(0, 20)}...` : vk.name}"? This action cannot be undone. - - - - Cancel - onDelete(vk.id)} - disabled={isDeleting} - className="bg-destructive hover:bg-destructive/90" - data-testid={`vk-delete-confirm-${vk.name}`} + <> + + + + + + { + e.preventDefault(); + onEdit(vk); + }} > - {isDeleting ? "Deleting..." : "Delete"} - - - - + + Edit + + { + e.preventDefault(); + setDeleteOpen(true); + }} + > + + Delete + + + + + + + Delete Virtual Key + + Are you sure you want to delete "{vk.name.length > 20 ? `${vk.name.slice(0, 20)}...` : vk.name}"? This action cannot be undone. + + + + Cancel + onDelete(vk.id)} + disabled={isDeleting} + className="bg-destructive hover:bg-destructive/90" + data-testid={`vk-delete-confirm-${vk.name}`} + > + {isDeleting ? "Deleting..." : "Delete"} + + + + + ); } @@ -270,6 +291,7 @@ export default function VirtualKeysTable({ const hasDeleteAccess = useRbac(RbacResource.VirtualKeys, RbacOperation.Delete); const [deleteVirtualKey, { isLoading: isDeleting }] = useDeleteVirtualKeyMutation(); + const [updateVirtualKey] = useUpdateVirtualKeyMutation(); const handleDelete = async (vkId: string) => { try { @@ -280,13 +302,22 @@ export default function VirtualKeysTable({ } }; + const handleToggleActive = async (vk: VirtualKey, checked: boolean) => { + try { + await updateVirtualKey({ vkId: vk.id, data: { is_active: checked } }).unwrap(); + toast.success(`Virtual key ${checked ? "enabled" : "disabled"}`); + } catch (error) { + toast.error(getErrorMessage(error)); + throw error; + } + }; + const handleAddVirtualKey = () => { setEditingVirtualKeyId(null); setShowVirtualKeySheet(true); }; - const handleEditVirtualKey = (vk: VirtualKey, e: React.MouseEvent) => { - e.stopPropagation(); // Prevent row click + const handleEditVirtualKey = (vk: VirtualKey) => { setEditingVirtualKeyId(vk.id); setShowVirtualKeySheet(true); }; @@ -563,7 +594,7 @@ export default function VirtualKeysTable({
-
+
@@ -578,7 +609,7 @@ export default function VirtualKeysTable({ - + @@ -596,7 +627,7 @@ export default function VirtualKeysTable({ handleRowClick(vk)} > @@ -642,22 +673,21 @@ export default function VirtualKeysTable({ - - + e.stopPropagation()}> + - e.stopPropagation()}> -
- - -
+ e.stopPropagation()} + > +
); @@ -700,4 +730,4 @@ export default function VirtualKeysTable({ ); -} \ No newline at end of file +} From 9a83fde208cabe71dc1a1f9dc26502dbca301400 Mon Sep 17 00:00:00 2001 From: Suresh Chaudhary <83772622+impoiler@users.noreply.github.com> Date: Thu, 14 May 2026 16:03:37 +0530 Subject: [PATCH 31/81] chore: `/ui` code formatting (#3494) ## Summary Formatted the /ui codebase ## Changes - What was changed and why - Any notable design decisions or trade-offs ## Type of change - [ ] Bug fix - [ ] Feature - [x] Refactor - [ ] Documentation - [x] Chore/CI ## Affected areas - [ ] Core (Go) - [ ] Transports (HTTP) - [ ] Providers/Integrations - [ ] Plugins - [ ] UI (React) - [ ] Docs ## How to test Describe the steps to validate this change. Include commands and expected outcomes. ```sh # Core/Transports go version go test ./... # UI cd ui pnpm i || npm i pnpm test || npm test pnpm build || npm run build ``` If adding new configs or environment variables, document them here. ## Screenshots/Recordings If UI changes, add before/after screenshots or short clips. ## Breaking changes - [ ] Yes - [x] No If yes, describe impact and migration instructions. ## Related issues ## Security considerations Note any security implications (auth, secrets, PII, sandboxing, etc.). ## Checklist - [ ] I read `docs/contributing/README.md` and followed the guidelines - [ ] I added/updated tests where appropriate - [ ] I updated documentation where needed - [ ] I verified builds succeed (Go and UI) - [ ] I verified the CI pipeline passes locally if applicable --- .../mcp-tool-groups/mcpToolGroups.tsx | 2 +- .../components/user-groups/teamsView.tsx | 4 +- .../enterprise/lib/contexts/rbacContext.tsx | 133 +- .../lib/store/apis/accessProfileApi.ts | 2 +- .../enterprise/lib/store/apis/scimApi.ts | 2 +- .../lib/store/apis/virtualKeyUsersApi.ts | 2 +- .../enterprise/lib/types/accessProfile.ts | 2 +- .../_fallbacks/enterprise/lib/types/user.ts | 2 +- ui/app/workspace/cluster/page.tsx | 2 +- ui/app/workspace/config/views/cachingView.tsx | 98 +- ui/app/workspace/config/views/loggingView.tsx | 47 +- ui/app/workspace/config/views/mcpView.tsx | 703 ++- .../workspace/config/views/securityView.tsx | 777 ++- .../overrides/pricingOverrideSheet.tsx | 36 +- .../overrides/scopedPricingOverridesView.tsx | 2 +- .../dashboard/components/charts/chartCard.tsx | 98 +- .../dashboard/components/charts/costChart.tsx | 11 +- .../charts/externalCacheTokenMeterChart.tsx | 8 +- .../components/charts/latencyChart.tsx | 2 +- .../charts/localCacheTokenMeterChart.tsx | 2 +- .../components/charts/logVolumeChart.tsx | 343 +- .../components/charts/mcpCostChart.tsx | 2 +- .../components/charts/mcpTopToolsChart.tsx | 2 +- .../components/charts/mcpVolumeChart.tsx | 2 +- .../components/charts/modelUsageChart.tsx | 9 +- .../components/charts/providerCostChart.tsx | 14 +- .../charts/providerLatencyChart.tsx | 14 +- .../components/charts/providerTokenChart.tsx | 20 +- .../components/charts/tokenUsageChart.tsx | 2 +- .../workspace/dashboard/components/mcpTab.tsx | 2 +- .../dashboard/components/modelRankingsTab.tsx | 13 +- .../dashboard/components/overviewTab.tsx | 2 +- .../dashboard/components/providerUsageTab.tsx | 2 +- ui/app/workspace/governance/layout.tsx | 2 +- ui/app/workspace/governance/teams/page.tsx | 6 +- ui/app/workspace/governance/users/page.tsx | 2 +- .../governance/views/customerTable.tsx | 2 +- .../workspace/governance/views/teamDialog.tsx | 1282 ++--- .../workspace/governance/views/teamsTable.tsx | 49 +- .../governance/virtual-keys/page.tsx | 270 +- ui/app/workspace/logs/page.tsx | 32 +- .../workspace/logs/sheets/logDetailView.tsx | 4675 +++++++---------- .../logs/sheets/sessionDetailsSheet.tsx | 1 - ui/app/workspace/logs/views/columns.tsx | 852 ++- .../logs/views/logChatMessageView.tsx | 7 +- .../logs/views/logResponsesMessageView.tsx | 37 +- ui/app/workspace/logs/views/logsTable.tsx | 12 +- .../workspace/logs/views/logsVolumeChart.tsx | 940 ++-- ui/app/workspace/logs/views/ocrView.tsx | 24 +- ui/app/workspace/mcp-logs/page.tsx | 1101 ++-- ui/app/workspace/mcp-logs/views/columns.tsx | 146 +- .../workspace/mcp-logs/views/mcpLogsTable.tsx | 2 +- .../mcp-registry/views/mcpClientForm.tsx | 30 +- .../mcp-registry/views/mcpClientSheet.tsx | 20 +- .../mcp-registry/views/mcpClientsTable.tsx | 24 +- .../mcp-registry/views/oauth2Authorizer.tsx | 34 +- ui/app/workspace/mcp-tool-groups/layout.tsx | 2 +- .../model-catalog/views/modelCatalogTable.tsx | 2 +- .../model-catalog/views/modelCatalogView.tsx | 16 +- .../model-limits/views/modelLimitSheet.tsx | 10 +- .../model-limits/views/modelLimitsTable.tsx | 2 +- .../fragments/maximFormFragment.tsx | 8 +- .../fragments/otelFormFragment.tsx | 8 +- .../fragments/prometheusFormFragment.tsx | 1086 ++-- .../plugins/sheets/addNewPluginSheet.tsx | 2 +- .../dialogs/addNewCustomProviderSheet.tsx | 9 +- .../fragments/apiStructureFormFragment.tsx | 6 +- .../fragments/governanceFormFragment.tsx | 18 +- .../fragments/networkFormFragment.tsx | 6 +- .../fragments/performanceFormFragment.tsx | 2 +- .../providers/fragments/proxyFormFragment.tsx | 2 +- ui/app/workspace/providers/page.tsx | 20 +- .../views/modelProviderKeysTableView.tsx | 10 +- .../providers/views/providerKeyForm.tsx | 18 +- ui/app/workspace/routing-rules/layout.tsx | 2 +- .../routing-rules/views/routingRuleSheet.tsx | 18 +- .../routing-rules/views/routingRulesTable.tsx | 2 +- ui/app/workspace/scim/page.tsx | 2 +- .../virtual-keys/hooks/useVirtualKeyUsage.ts | 2 +- .../views/virtualKeyDetailsSheet.tsx | 30 +- .../virtual-keys/views/virtualKeySheet.tsx | 47 +- .../virtual-keys/views/virtualKeysTable.tsx | 45 +- ui/components/filters/logsFilterSidebar.tsx | 76 +- ui/components/filters/mcpFilterSidebar.tsx | 27 +- .../prompts/fragments/settingsPanel.tsx | 6 +- ui/components/prompts/sheets/promptSheet.tsx | 8 +- ui/components/provider.tsx | 2 +- ui/components/rateLimitDisplay.tsx | 20 +- ui/components/sidebar.tsx | 2799 +++++----- ui/components/trialExpiryBanner.tsx | 10 +- ui/components/ui/asyncMultiselect.tsx | 4 +- ui/components/ui/badge.tsx | 60 +- ui/components/ui/combobox.tsx | 7 +- .../ui/custom/celBuilder/valueEditor.tsx | 2 +- ui/components/ui/dialog.tsx | 4 +- ui/components/ui/envVarInput.tsx | 10 +- ui/components/ui/multibudgets.tsx | 238 +- ui/components/ui/popover.tsx | 11 +- ui/components/ui/select.tsx | 4 +- ui/components/ui/sheet.tsx | 8 +- ui/components/ui/tooltip.tsx | 62 +- ui/lib/constants/icons.tsx | 1301 +++-- ui/lib/store/apis/devApi.ts | 134 +- ui/lib/types/logs.ts | 2 +- ui/lib/types/mcp.ts | 2 +- ui/lib/types/schemas.ts | 1956 ++++--- ui/lib/utils.ts | 4 +- ui/lib/utils/browser-download.ts | 56 +- ui/lib/utils/envVarForm.ts | 2 +- ui/lib/utils/governance.ts | 17 +- ui/lib/utils/routingRuleGroupQuery.ts | 7 +- 111 files changed, 9036 insertions(+), 11090 deletions(-) diff --git a/ui/app/_fallbacks/enterprise/components/mcp-tool-groups/mcpToolGroups.tsx b/ui/app/_fallbacks/enterprise/components/mcp-tool-groups/mcpToolGroups.tsx index 8e6dfc089d..a301c4302a 100644 --- a/ui/app/_fallbacks/enterprise/components/mcp-tool-groups/mcpToolGroups.tsx +++ b/ui/app/_fallbacks/enterprise/components/mcp-tool-groups/mcpToolGroups.tsx @@ -4,7 +4,7 @@ import ContactUsView from "../views/contactUsView"; export default function MCPToolGroups() { return ( <> -
+

MCP tool groups

Configure tool groups for MCP servers to organize and govern tools.

diff --git a/ui/app/_fallbacks/enterprise/components/user-groups/teamsView.tsx b/ui/app/_fallbacks/enterprise/components/user-groups/teamsView.tsx index 51f8547dd8..997da5fae2 100644 --- a/ui/app/_fallbacks/enterprise/components/user-groups/teamsView.tsx +++ b/ui/app/_fallbacks/enterprise/components/user-groups/teamsView.tsx @@ -105,7 +105,9 @@ export function TeamsView() { onOffsetChange={(newOffset) => setUrlState({ offset: newOffset })} selectedTeamId={urlState.selected_team || null} onTeamAdd={() => setUrlState({ selected_team: "new" })} - onTeamSelect={(team) => { setUrlState({ selected_team: team?.id ?? null }) }} + onTeamSelect={(team) => { + setUrlState({ selected_team: team?.id ?? null }); + }} onDialogClose={() => setUrlState({ selected_team: null })} />
diff --git a/ui/app/_fallbacks/enterprise/lib/contexts/rbacContext.tsx b/ui/app/_fallbacks/enterprise/lib/contexts/rbacContext.tsx index 4a4e777b1f..92f75fac06 100644 --- a/ui/app/_fallbacks/enterprise/lib/contexts/rbacContext.tsx +++ b/ui/app/_fallbacks/enterprise/lib/contexts/rbacContext.tsx @@ -2,91 +2,88 @@ import { createContext, useContext } from "react"; // RBAC Resource Names (must match backend definitions) export enum RbacResource { - GuardrailsConfig = "GuardrailsConfig", - GuardrailsProviders = "GuardrailsProviders", - GuardrailRules = "GuardrailRules", - UserProvisioning = "UserProvisioning", - Cluster = "Cluster", - Settings = "Settings", - Users = "Users", - Logs = "Logs", - Observability = "Observability", - VirtualKeys = "VirtualKeys", - ModelProvider = "ModelProvider", - Plugins = "Plugins", - MCPGateway = "MCPGateway", - MCPToolGroups = "MCPToolGroups", - MCPLogs = "MCPLogs", - AdaptiveRouter = "AdaptiveRouter", - AuditLogs = "AuditLogs", - Customers = "Customers", - Teams = "Teams", - RBAC = "RBAC", - Governance = "Governance", - RoutingRules = "RoutingRules", - PIIRedactor = "PIIRedactor", - PromptRepository = "PromptRepository", - PromptDeploymentStrategy = "PromptDeploymentStrategy", - AccessProfiles = "AccessProfiles", - APIKeys = "APIKeys", - Inference = "Inference", - Metrics = "Metrics", + GuardrailsConfig = "GuardrailsConfig", + GuardrailsProviders = "GuardrailsProviders", + GuardrailRules = "GuardrailRules", + UserProvisioning = "UserProvisioning", + Cluster = "Cluster", + Settings = "Settings", + Users = "Users", + Logs = "Logs", + Observability = "Observability", + VirtualKeys = "VirtualKeys", + ModelProvider = "ModelProvider", + Plugins = "Plugins", + MCPGateway = "MCPGateway", + MCPToolGroups = "MCPToolGroups", + MCPLogs = "MCPLogs", + AdaptiveRouter = "AdaptiveRouter", + AuditLogs = "AuditLogs", + Customers = "Customers", + Teams = "Teams", + RBAC = "RBAC", + Governance = "Governance", + RoutingRules = "RoutingRules", + PIIRedactor = "PIIRedactor", + PromptRepository = "PromptRepository", + PromptDeploymentStrategy = "PromptDeploymentStrategy", + AccessProfiles = "AccessProfiles", + APIKeys = "APIKeys", + Inference = "Inference", + Metrics = "Metrics", } // RBAC Operation Names (must match backend definitions) export enum RbacOperation { - Read = "Read", - View = "View", - Create = "Create", - Update = "Update", - Delete = "Delete", - Download = "Download", + Read = "Read", + View = "View", + Create = "Create", + Update = "Update", + Delete = "Delete", + Download = "Download", } interface RbacContextType { - isAllowed: (resource: RbacResource, operation: RbacOperation) => boolean; - permissions: Record>; - isLoading: boolean; - refetch: () => void; + isAllowed: (resource: RbacResource, operation: RbacOperation) => boolean; + permissions: Record>; + isLoading: boolean; + refetch: () => void; } const RbacContext = createContext(null); // Dummy provider that allows all permissions export function RbacProvider({ children }: { children: React.ReactNode }) { - return ( - true, // Always allow in OSS - permissions: {}, - isLoading: false, - refetch: () => {}, - }} - > - {children} - - ); + return ( + true, // Always allow in OSS + permissions: {}, + isLoading: false, + refetch: () => {}, + }} + > + {children} + + ); } // Hook that always returns true (no restrictions in OSS) -export function useRbac( - _resource: RbacResource, - _operation: RbacOperation, -): boolean { - return true; +export function useRbac(_resource: RbacResource, _operation: RbacOperation): boolean { + return true; } // Hook to access full RBAC context export function useRbacContext() { - const context = useContext(RbacContext); - if (!context) { - // Return dummy values if used outside provider - return { - isAllowed: () => true, - permissions: {}, - isLoading: false, - refetch: () => {}, - }; - } - return context; -} + const context = useContext(RbacContext); + if (!context) { + // Return dummy values if used outside provider + return { + isAllowed: () => true, + permissions: {}, + isLoading: false, + refetch: () => {}, + }; + } + return context; +} \ No newline at end of file diff --git a/ui/app/_fallbacks/enterprise/lib/store/apis/accessProfileApi.ts b/ui/app/_fallbacks/enterprise/lib/store/apis/accessProfileApi.ts index 5830eed3bb..c5038baeee 100644 --- a/ui/app/_fallbacks/enterprise/lib/store/apis/accessProfileApi.ts +++ b/ui/app/_fallbacks/enterprise/lib/store/apis/accessProfileApi.ts @@ -15,4 +15,4 @@ export const useGetUserAccessProfilesQuery = ( isLoading: false, isError: false, error: null, -}); +}); \ No newline at end of file diff --git a/ui/app/_fallbacks/enterprise/lib/store/apis/scimApi.ts b/ui/app/_fallbacks/enterprise/lib/store/apis/scimApi.ts index 9b2031999d..5f31f3c0c1 100644 --- a/ui/app/_fallbacks/enterprise/lib/store/apis/scimApi.ts +++ b/ui/app/_fallbacks/enterprise/lib/store/apis/scimApi.ts @@ -13,4 +13,4 @@ export const useGetAuthTypeQuery = ( isLoading: false, isError: false, error: null, -}); +}); \ No newline at end of file diff --git a/ui/app/_fallbacks/enterprise/lib/store/apis/virtualKeyUsersApi.ts b/ui/app/_fallbacks/enterprise/lib/store/apis/virtualKeyUsersApi.ts index fc33a9ff78..e45cc092a7 100644 --- a/ui/app/_fallbacks/enterprise/lib/store/apis/virtualKeyUsersApi.ts +++ b/ui/app/_fallbacks/enterprise/lib/store/apis/virtualKeyUsersApi.ts @@ -19,4 +19,4 @@ export const useGetVirtualKeyUsersQuery = ( isLoading: false, isError: false, error: null, -}); +}); \ No newline at end of file diff --git a/ui/app/_fallbacks/enterprise/lib/types/accessProfile.ts b/ui/app/_fallbacks/enterprise/lib/types/accessProfile.ts index 66dfb23d98..414743dafb 100644 --- a/ui/app/_fallbacks/enterprise/lib/types/accessProfile.ts +++ b/ui/app/_fallbacks/enterprise/lib/types/accessProfile.ts @@ -38,4 +38,4 @@ export interface UserAccessProfile { export interface GetUserAccessProfilesResponse { access_profiles: UserAccessProfile[]; -} +} \ No newline at end of file diff --git a/ui/app/_fallbacks/enterprise/lib/types/user.ts b/ui/app/_fallbacks/enterprise/lib/types/user.ts index b5b6727fe9..8aea128c02 100644 --- a/ui/app/_fallbacks/enterprise/lib/types/user.ts +++ b/ui/app/_fallbacks/enterprise/lib/types/user.ts @@ -27,4 +27,4 @@ export interface GetUsersResponse { limit: number; total_pages: number; has_more: boolean; -} +} \ No newline at end of file diff --git a/ui/app/workspace/cluster/page.tsx b/ui/app/workspace/cluster/page.tsx index 8cb1b4be26..155d593efb 100644 --- a/ui/app/workspace/cluster/page.tsx +++ b/ui/app/workspace/cluster/page.tsx @@ -2,7 +2,7 @@ import ClusterView from "@enterprise/components/cluster/clusterView"; export default function ClusterPage() { return ( -
+
); diff --git a/ui/app/workspace/config/views/cachingView.tsx b/ui/app/workspace/config/views/cachingView.tsx index 1118d0f6b0..c1279ce1a8 100644 --- a/ui/app/workspace/config/views/cachingView.tsx +++ b/ui/app/workspace/config/views/cachingView.tsx @@ -240,15 +240,15 @@ export default function CachingView() { try { const updated = semanticCachePlugin ? await updatePlugin({ - name: SEMANTIC_CACHE_PLUGIN, - data: { enabled: semanticCachePlugin.enabled, config: payload }, - }).unwrap() + name: SEMANTIC_CACHE_PLUGIN, + data: { enabled: semanticCachePlugin.enabled, config: payload }, + }).unwrap() : await createPlugin({ - name: SEMANTIC_CACHE_PLUGIN, - enabled: false, - config: payload, - path: "", - }).unwrap(); + name: SEMANTIC_CACHE_PLUGIN, + enabled: false, + config: payload, + path: "", + }).unwrap(); const editor = toEditorCacheConfig(updated.config as Partial); setCacheConfig(editor); setServerCacheConfig(editor); @@ -267,9 +267,9 @@ export default function CachingView() {

Local Cache

- Cache responses locally with two complementary lookup paths: direct hash matching for exact replays, and{" "} - semantic similarity search for related content. Send the x-bf-cache-key header to scope cached - responses to a tenant or feature. {!isVectorStoreEnabled && ( + Cache responses locally with two complementary lookup paths: direct hash matching for exact replays, and semantic{" "} + similarity search for related content. Send the x-bf-cache-key header to scope cached responses to a tenant or feature.{" "} + {!isVectorStoreEnabled && ( Requires a vector store to be configured and enabled in config.json. @@ -303,9 +303,8 @@ export default function CachingView() { Enable Caching

- Loads (or unloads) the plugin without a server restart. Configuration changes you make below mutate the live - plugin in place, no redeploy needed.{" "} - + Loads (or unloads) the plugin without a server restart. Configuration changes you make below mutate the live plugin in + place, no redeploy needed.{" "}

) : ( <> -
+
{/* Mode picker. Direct-only is first-class. */}
@@ -341,9 +337,7 @@ export default function CachingView() { data-testid="caching-mode-semantic-tab" disabled={embeddingProviders.length === 0} title={ - embeddingProviders.length === 0 - ? "Configure an embedding-capable provider to enable semantic mode." - : undefined + embeddingProviders.length === 0 ? "Configure an embedding-capable provider to enable semantic mode." : undefined } > Direct + Semantic @@ -353,14 +347,14 @@ export default function CachingView() {

{mode === "direct" ? ( <> - Direct-only mode hashes each request and replays an exact match. No embeddings, no provider needed. - Cheapest path, perfect for stable prompts. + Direct-only mode hashes each request and replays an exact match. No embeddings, no provider needed. Cheapest path, + perfect for stable prompts. ) : ( <> - Direct + semantic mode adds vector similarity search on top of direct hash matching. Requires an - embedding-capable provider and the model's real dimension. Direct hits are still served first; - semantic search runs only when the direct lookup misses. + Direct + semantic mode adds vector similarity search on top of direct hash matching. Requires an embedding-capable + provider and the model's real dimension. Direct hits are still served first; semantic search runs only when the + direct lookup misses. )}

@@ -377,11 +371,11 @@ export default function CachingView() { <> {hasStructuralChange && (
- Heads up: a vector store namespace can only hold vectors of one dimension. Whenever you - change the embedding provider, model, or dimension, make sure the dimension{" "} - still matches what the model produces, otherwise writes to the existing namespace will fail and reads - will silently miss. The namespace is not recreated automatically; either use a fresh namespace - or drop the existing class/index in your vector store before saving. + Heads up: a vector store namespace can only hold vectors of one dimension. Whenever you change the + embedding provider, model, or dimension, make sure the dimension still matches what the + model produces, otherwise writes to the existing namespace will fail and reads will silently miss. The namespace is{" "} + not recreated automatically; either use a fresh namespace or drop the existing class/index in your vector + store before saving.
)} @@ -408,11 +402,7 @@ export default function CachingView() { .map((provider) => (
- + {getProviderLabel(provider.name)}
@@ -435,8 +425,8 @@ export default function CachingView() {

- API keys are inherited from the embedding provider's main configuration, you don't need to - add them again here. + API keys are inherited from the embedding provider's main configuration, you don't need to add them again + here.

@@ -445,9 +435,7 @@ export default function CachingView() { data-testid="caching-dimension-input" type="number" min="2" - value={ - cacheConfig.dimension === undefined || Number.isNaN(cacheConfig.dimension) ? "" : cacheConfig.dimension - } + value={cacheConfig.dimension === undefined || Number.isNaN(cacheConfig.dimension) ? "" : cacheConfig.dimension} onChange={(e) => { const value = e.target.value; if (value === "") { @@ -461,9 +449,9 @@ export default function CachingView() { }} />

- Vector size produced by the embedding model. Must match the model exactly (e.g. 1536{" "} - for OpenAI text-embedding-3-small, 3072 for{" "} - text-embedding-3-large, 768 for many Cohere/Voyage models). + Vector size produced by the embedding model. Must match the model exactly (e.g. 1536 for OpenAI{" "} + text-embedding-3-small, 3072 for text-embedding-3-large, 768{" "} + for many Cohere/Voyage models).

@@ -508,9 +496,7 @@ export default function CachingView() { min="0" max="1" step="0.01" - value={ - cacheConfig.threshold === undefined || Number.isNaN(cacheConfig.threshold) ? "" : cacheConfig.threshold - } + value={cacheConfig.threshold === undefined || Number.isNaN(cacheConfig.threshold) ? "" : cacheConfig.threshold} onChange={(e) => { const value = e.target.value; if (value === "") { @@ -524,8 +510,7 @@ export default function CachingView() { }} />

- Minimum cosine similarity for a semantic hit. Override per-request via{" "} - x-bf-cache-threshold. + Minimum cosine similarity for a semantic hit. Override per-request via x-bf-cache-threshold.

)} @@ -547,9 +532,9 @@ export default function CachingView() { onChange={(e) => updateLocal({ vector_store_namespace: e.target.value })} />

- Bucket/index name where cache entries live. Leave blank to use the default ( - BifrostLocalCachePlugin). Changing this points the plugin at a different (possibly empty) - bucket. Old entries are not deleted, they just stop being queried. + Bucket/index name where cache entries live. Leave blank to use the default (BifrostLocalCachePlugin). + Changing this points the plugin at a different (possibly empty) bucket. Old entries are not deleted, they just stop + being queried.

@@ -563,9 +548,8 @@ export default function CachingView() { onChange={(e) => updateLocal({ default_cache_key: e.target.value })} />

- Fallback partition key used when a request doesn't set x-bf-cache-key. Cache keys isolate - entries: same key ↔ shared cache pool. Leave blank to disable caching for any request that - doesn't send the header. + Fallback partition key used when a request doesn't set x-bf-cache-key. Cache keys isolate entries: same + key ↔ shared cache pool. Leave blank to disable caching for any request that doesn't send the header.

@@ -602,8 +586,8 @@ export default function CachingView() { }} />

- Skip caching for conversations with more than this many messages. Long histories rarely match exactly - and inflate the cache without paying off. + Skip caching for conversations with more than this many messages. Long histories rarely match exactly and inflate + the cache without paying off.

diff --git a/ui/app/workspace/config/views/loggingView.tsx b/ui/app/workspace/config/views/loggingView.tsx index 55eaf6c083..e9a9af94b8 100644 --- a/ui/app/workspace/config/views/loggingView.tsx +++ b/ui/app/workspace/config/views/loggingView.tsx @@ -144,12 +144,12 @@ export default function LoggingView() {

When enabled, individual requests can override the global content logging setting using the{" "} - x-bf-disable-content-logging header or context key, and can opt-in to persisting raw provider - bytes in logs using the x-bf-store-raw-request-response header. Raw-byte storage requires - content logging to be on — either globally, or via{" "} - x-bf-disable-content-logging: false on the same request. If content logging is off, raw bytes - are dropped from the log record even when x-bf-store-raw-request-response: true. Does not - control sending raw bytes back to callers — see Allow Per-Request Raw Override. + x-bf-disable-content-logging header or context key, and can opt-in to persisting raw + provider bytes in logs using the x-bf-store-raw-request-response header. Raw-byte storage + requires content logging to be on — either globally, or via{" "} + x-bf-disable-content-logging: false on the same request. If content logging is off, raw + bytes are dropped from the log record even when x-bf-store-raw-request-response: true. Does + not control sending raw bytes back to callers — see Allow Per-Request Raw Override.

-
- -

- When enabled, individual requests can send raw provider request/response bytes back to the caller using the{" "} - x-bf-send-back-raw-request and{" "} - x-bf-send-back-raw-response headers. Does not affect log storage — raw-byte persistence in logs is controlled by Allow Per-Request Content Storage Override. -

-
- handleConfigChange("allow_per_request_raw_override", checked)} - /> +
+ +

+ When enabled, individual requests can send raw provider request/response bytes back to the caller using the{" "} + x-bf-send-back-raw-request and x-bf-send-back-raw-response{" "} + headers. Does not affect log storage — raw-byte persistence in logs is controlled by Allow Per-Request Content Storage + Override. +

+ handleConfigChange("allow_per_request_raw_override", checked)} + /> + {/* Log Retention Days */} {localConfig.enable_logging && bifrostConfig?.is_logs_connected && ( diff --git a/ui/app/workspace/config/views/mcpView.tsx b/ui/app/workspace/config/views/mcpView.tsx index 3bed1d4f1c..59baf54c9a 100644 --- a/ui/app/workspace/config/views/mcpView.tsx +++ b/ui/app/workspace/config/views/mcpView.tsx @@ -1,19 +1,9 @@ import { Button } from "@/components/ui/button"; import { EnvVarInput } from "@/components/ui/envVarInput"; import { Input } from "@/components/ui/input"; -import { - Select, - SelectContent, - SelectItem, - SelectTrigger, - SelectValue, -} from "@/components/ui/select"; +import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select"; import { Switch } from "@/components/ui/switch"; -import { - getErrorMessage, - useGetCoreConfigQuery, - useUpdateCoreConfigMutation, -} from "@/lib/store"; +import { getErrorMessage, useGetCoreConfigQuery, useUpdateCoreConfigMutation } from "@/lib/store"; import { CoreConfig, DefaultCoreConfig } from "@/lib/types/config"; import { EnvVar } from "@/lib/types/schemas"; import { RbacOperation, RbacResource, useRbac } from "@enterprise/lib"; @@ -21,406 +11,339 @@ import { useCallback, useEffect, useMemo, useState } from "react"; import { toast } from "sonner"; const envVarEquals = (a?: EnvVar, b?: EnvVar) => - (a?.value ?? "") === (b?.value ?? "") && - (a?.env_var ?? "") === (b?.env_var ?? "") && - (a?.from_env ?? false) === (b?.from_env ?? false); + (a?.value ?? "") === (b?.value ?? "") && (a?.env_var ?? "") === (b?.env_var ?? "") && (a?.from_env ?? false) === (b?.from_env ?? false); export default function MCPView() { - const hasSettingsUpdateAccess = useRbac( - RbacResource.Settings, - RbacOperation.Update, - ); - const { data: bifrostConfig } = useGetCoreConfigQuery({ fromDB: true }); - const config = bifrostConfig?.client_config; - const [updateCoreConfig, { isLoading }] = useUpdateCoreConfigMutation(); - const [localConfig, setLocalConfig] = useState(DefaultCoreConfig); + const hasSettingsUpdateAccess = useRbac(RbacResource.Settings, RbacOperation.Update); + const { data: bifrostConfig } = useGetCoreConfigQuery({ fromDB: true }); + const config = bifrostConfig?.client_config; + const [updateCoreConfig, { isLoading }] = useUpdateCoreConfigMutation(); + const [localConfig, setLocalConfig] = useState(DefaultCoreConfig); - const [localValues, setLocalValues] = useState<{ - mcp_agent_depth: string; - mcp_tool_execution_timeout: string; - mcp_code_mode_binding_level: string; - mcp_tool_sync_interval: string; - }>({ - mcp_agent_depth: "10", - mcp_tool_execution_timeout: "30", - mcp_code_mode_binding_level: "server", - mcp_tool_sync_interval: "10", - }); + const [localValues, setLocalValues] = useState<{ + mcp_agent_depth: string; + mcp_tool_execution_timeout: string; + mcp_code_mode_binding_level: string; + mcp_tool_sync_interval: string; + }>({ + mcp_agent_depth: "10", + mcp_tool_execution_timeout: "30", + mcp_code_mode_binding_level: "server", + mcp_tool_sync_interval: "10", + }); - useEffect(() => { - if (bifrostConfig && config) { - setLocalConfig(config); - setLocalValues({ - mcp_agent_depth: config?.mcp_agent_depth?.toString() || "10", - mcp_tool_execution_timeout: - config?.mcp_tool_execution_timeout?.toString() || "30", - mcp_code_mode_binding_level: - config?.mcp_code_mode_binding_level || "server", - mcp_tool_sync_interval: - config?.mcp_tool_sync_interval?.toString() || "10", - }); - } - }, [config, bifrostConfig]); + useEffect(() => { + if (bifrostConfig && config) { + setLocalConfig(config); + setLocalValues({ + mcp_agent_depth: config?.mcp_agent_depth?.toString() || "10", + mcp_tool_execution_timeout: config?.mcp_tool_execution_timeout?.toString() || "30", + mcp_code_mode_binding_level: config?.mcp_code_mode_binding_level || "server", + mcp_tool_sync_interval: config?.mcp_tool_sync_interval?.toString() || "10", + }); + } + }, [config, bifrostConfig]); - const hasChanges = useMemo(() => { - if (!config) return false; - const serverURLChanged = !envVarEquals( - localConfig.mcp_external_server_url, - config.mcp_external_server_url, - ); - const clientURLChanged = !envVarEquals( - localConfig.mcp_external_client_url, - config.mcp_external_client_url, - ); - return ( - localConfig.mcp_agent_depth !== config.mcp_agent_depth || - localConfig.mcp_tool_execution_timeout !== - config.mcp_tool_execution_timeout || - localConfig.mcp_code_mode_binding_level !== - (config.mcp_code_mode_binding_level || "server") || - localConfig.mcp_tool_sync_interval !== - (config.mcp_tool_sync_interval ?? 10) || - localConfig.mcp_disable_auto_tool_inject !== - (config.mcp_disable_auto_tool_inject ?? false) || - serverURLChanged || - clientURLChanged - ); - }, [config, localConfig]); + const hasChanges = useMemo(() => { + if (!config) return false; + const serverURLChanged = !envVarEquals(localConfig.mcp_external_server_url, config.mcp_external_server_url); + const clientURLChanged = !envVarEquals(localConfig.mcp_external_client_url, config.mcp_external_client_url); + return ( + localConfig.mcp_agent_depth !== config.mcp_agent_depth || + localConfig.mcp_tool_execution_timeout !== config.mcp_tool_execution_timeout || + localConfig.mcp_code_mode_binding_level !== (config.mcp_code_mode_binding_level || "server") || + localConfig.mcp_tool_sync_interval !== (config.mcp_tool_sync_interval ?? 10) || + localConfig.mcp_disable_auto_tool_inject !== (config.mcp_disable_auto_tool_inject ?? false) || + serverURLChanged || + clientURLChanged + ); + }, [config, localConfig]); - const handleAgentDepthChange = useCallback((value: string) => { - setLocalValues((prev) => ({ ...prev, mcp_agent_depth: value })); - const numValue = Number.parseInt(value); - if (!isNaN(numValue) && numValue > 0) { - setLocalConfig((prev) => ({ ...prev, mcp_agent_depth: numValue })); - } - }, []); + const handleAgentDepthChange = useCallback((value: string) => { + setLocalValues((prev) => ({ ...prev, mcp_agent_depth: value })); + const numValue = Number.parseInt(value); + if (!isNaN(numValue) && numValue > 0) { + setLocalConfig((prev) => ({ ...prev, mcp_agent_depth: numValue })); + } + }, []); - const handleToolExecutionTimeoutChange = useCallback((value: string) => { - setLocalValues((prev) => ({ ...prev, mcp_tool_execution_timeout: value })); - const numValue = Number.parseInt(value); - if (!isNaN(numValue) && numValue > 0) { - setLocalConfig((prev) => ({ - ...prev, - mcp_tool_execution_timeout: numValue, - })); - } - }, []); + const handleToolExecutionTimeoutChange = useCallback((value: string) => { + setLocalValues((prev) => ({ ...prev, mcp_tool_execution_timeout: value })); + const numValue = Number.parseInt(value); + if (!isNaN(numValue) && numValue > 0) { + setLocalConfig((prev) => ({ + ...prev, + mcp_tool_execution_timeout: numValue, + })); + } + }, []); - const handleCodeModeBindingLevelChange = useCallback((value: string) => { - setLocalValues((prev) => ({ ...prev, mcp_code_mode_binding_level: value })); - if (value === "server" || value === "tool") { - setLocalConfig((prev) => ({ - ...prev, - mcp_code_mode_binding_level: value, - })); - } - }, []); + const handleCodeModeBindingLevelChange = useCallback((value: string) => { + setLocalValues((prev) => ({ ...prev, mcp_code_mode_binding_level: value })); + if (value === "server" || value === "tool") { + setLocalConfig((prev) => ({ + ...prev, + mcp_code_mode_binding_level: value, + })); + } + }, []); - const handleToolSyncIntervalChange = useCallback((value: string) => { - setLocalValues((prev) => ({ ...prev, mcp_tool_sync_interval: value })); - const numValue = Number.parseInt(value); - if (!isNaN(numValue) && numValue >= 0) { - setLocalConfig((prev) => ({ ...prev, mcp_tool_sync_interval: numValue })); - } - }, []); + const handleToolSyncIntervalChange = useCallback((value: string) => { + setLocalValues((prev) => ({ ...prev, mcp_tool_sync_interval: value })); + const numValue = Number.parseInt(value); + if (!isNaN(numValue) && numValue >= 0) { + setLocalConfig((prev) => ({ ...prev, mcp_tool_sync_interval: numValue })); + } + }, []); - const handleDisableAutoToolInjectChange = useCallback((checked: boolean) => { - setLocalConfig((prev) => ({ - ...prev, - mcp_disable_auto_tool_inject: checked, - })); - }, []); + const handleDisableAutoToolInjectChange = useCallback((checked: boolean) => { + setLocalConfig((prev) => ({ + ...prev, + mcp_disable_auto_tool_inject: checked, + })); + }, []); - const handleServerURLChange = useCallback((value: EnvVar) => { - setLocalConfig((prev) => ({ ...prev, mcp_external_server_url: value })); - }, []); + const handleServerURLChange = useCallback((value: EnvVar) => { + setLocalConfig((prev) => ({ ...prev, mcp_external_server_url: value })); + }, []); - const handleClientURLChange = useCallback((value: EnvVar) => { - setLocalConfig((prev) => ({ ...prev, mcp_external_client_url: value })); - }, []); + const handleClientURLChange = useCallback((value: EnvVar) => { + setLocalConfig((prev) => ({ ...prev, mcp_external_client_url: value })); + }, []); - const handleSave = useCallback(async () => { - try { - const agentDepth = Number.parseInt(localValues.mcp_agent_depth); - const toolTimeout = Number.parseInt( - localValues.mcp_tool_execution_timeout, - ); + const handleSave = useCallback(async () => { + try { + const agentDepth = Number.parseInt(localValues.mcp_agent_depth); + const toolTimeout = Number.parseInt(localValues.mcp_tool_execution_timeout); - if (isNaN(agentDepth) || agentDepth <= 0) { - toast.error("Max agent depth must be a positive number."); - return; - } + if (isNaN(agentDepth) || agentDepth <= 0) { + toast.error("Max agent depth must be a positive number."); + return; + } - if (isNaN(toolTimeout) || toolTimeout <= 0) { - toast.error("Tool execution timeout must be a positive number."); - return; - } + if (isNaN(toolTimeout) || toolTimeout <= 0) { + toast.error("Tool execution timeout must be a positive number."); + return; + } - if (!bifrostConfig) { - toast.error("Configuration not loaded. Please refresh and try again."); - return; - } - await updateCoreConfig({ - ...bifrostConfig, - client_config: localConfig, - }).unwrap(); - toast.success("MCP settings updated successfully."); - } catch (error) { - toast.error(getErrorMessage(error)); - } - }, [bifrostConfig, localConfig, localValues, updateCoreConfig]); + if (!bifrostConfig) { + toast.error("Configuration not loaded. Please refresh and try again."); + return; + } + await updateCoreConfig({ + ...bifrostConfig, + client_config: localConfig, + }).unwrap(); + toast.success("MCP settings updated successfully."); + } catch (error) { + toast.error(getErrorMessage(error)); + } + }, [bifrostConfig, localConfig, localValues, updateCoreConfig]); - return ( -
-
-

MCP Settings

-

- Configure MCP (Model Context Protocol) agent and tool settings. -

-
-
- {/* Max Agent Depth */} -
-
- -

- Maximum depth for MCP agent execution. -

-
- handleAgentDepthChange(e.target.value)} - min="1" - /> -
+ return ( +
+
+

MCP Settings

+

Configure MCP (Model Context Protocol) agent and tool settings.

+
+
+ {/* Max Agent Depth */} +
+
+ +

Maximum depth for MCP agent execution.

+
+ handleAgentDepthChange(e.target.value)} + min="1" + /> +
- {/* Tool Execution Timeout */} -
-
- -

- Maximum time in seconds for tool execution. -

-
- handleToolExecutionTimeoutChange(e.target.value)} - min="1" - /> -
+ {/* Tool Execution Timeout */} +
+
+ +

Maximum time in seconds for tool execution.

+
+ handleToolExecutionTimeoutChange(e.target.value)} + min="1" + /> +
- {/* Tool Sync Interval */} -
-
- -

- How often to refresh tool lists from MCP servers. Set to 0 to - disable. -

-
- handleToolSyncIntervalChange(e.target.value)} - min="0" - /> -
+ {/* Tool Sync Interval */} +
+
+ +

How often to refresh tool lists from MCP servers. Set to 0 to disable.

+
+ handleToolSyncIntervalChange(e.target.value)} + min="0" + /> +
- {/* Disable Auto Tool Injection */} -
-
- -

- When enabled, MCP tools are not automatically included in every - request. Tools are only injected when explicitly specified via - request headers ( - x-bf-mcp-include-tools) and still - must be allowed by the virtual key MCP configuration. -

-
- -
+ {/* Disable Auto Tool Injection */} +
+
+ +

+ When enabled, MCP tools are not automatically included in every request. Tools are only injected when explicitly specified via + request headers (x-bf-mcp-include-tools) and still must be allowed by the virtual key MCP + configuration. +

+
+ +
- {/* Code Mode Binding Level */} -
-
- -

- How tools are exposed in the VFS: server-level (all tools per - server) or tool-level (individual tools). -

-
- + {/* Code Mode Binding Level */} +
+
+ +

+ How tools are exposed in the VFS: server-level (all tools per server) or tool-level (individual tools). +

+
+ - {/* Visual Example */} -
-

- VFS Structure: -

+ {/* Visual Example */} +
+

VFS Structure:

- {localValues.mcp_code_mode_binding_level === "server" ? ( -
-
-
servers/
-
├─ calculator.py
-
├─ youtube.py
-
└─ weather.py
-
-

- All tools per server in a single .py file -

-
- ) : ( -
-
-
servers/
-
├─ calculator/
-
├─ add.py
-
└─ subtract.py
-
├─ youtube/
-
├─ GET_CHANNELS.py
-
└─ SEARCH_VIDEOS.py
-
└─ weather/
-
└─ get_forecast.py
-
-

- Individual .py file for each tool -

-
- )} -
-
- {/* External Base URLs */} -
-
-

External Base URLs

-

- Override Bifrost's public base URL when it runs behind a reverse proxy. In most setups - both URLs are the same — leave them blank to derive the URL from the incoming{" "} - Host header. Both fields support env var syntax (e.g.{" "} - env.BIFROST_EXTERNAL_URL). -

-
+ {localValues.mcp_code_mode_binding_level === "server" ? ( +
+
+
servers/
+
├─ calculator.py
+
├─ youtube.py
+
└─ weather.py
+
+

All tools per server in a single .py file

+
+ ) : ( +
+
+
servers/
+
├─ calculator/
+
├─ add.py
+
└─ subtract.py
+
├─ youtube/
+
├─ GET_CHANNELS.py
+
└─ SEARCH_VIDEOS.py
+
└─ weather/
+
└─ get_forecast.py
+
+

Individual .py file for each tool

+
+ )} +
+
+ {/* External Base URLs */} +
+
+

External Base URLs

+

+ Override Bifrost's public base URL when it runs behind a reverse proxy. In most setups both URLs are the same —{" "} + leave them blank to derive the URL from the incoming Host header. Both fields support + env var syntax (e.g. env.BIFROST_EXTERNAL_URL). +

+
-
-
- -

- Advertised in OAuth server metadata that downstream clients read about - Bifrost — e.g. /.well-known/oauth-authorization-server{" "} - and the WWW-Authenticate header on{" "} - /mcp. Example: Claude Code connects to{" "} - https://bifrost.example.com/mcp and discovers the - authorize/token endpoints from this URL. -

-
- -
+
+
+ +

+ Advertised in OAuth server metadata that downstream clients read about Bifrost — e.g.{" "} + /.well-known/oauth-authorization-server and the{" "} + WWW-Authenticate header on /mcp. Example: Claude Code + connects to https://bifrost.example.com/mcp and discovers the authorize/token endpoints + from this URL. +

+
+ +
-
-
- -

- Used as the redirect_uri Bifrost registers with{" "} - upstream OAuth providers when it acts as a client to an MCP server. - Example: when a user connects an MCP server like Notion or Jira, this is the URL - Notion/Jira will redirect the browser to after login ( - {"/api/oauth/callback"}). -

-

- Heads up: changing this after MCP clients have already completed OAuth - will break them. The upstream provider locks the redirect_uri{" "} - to whatever was registered initially, so existing clients will fail with{" "} - "Invalid redirect URI". Clear the stored OAuth client credentials - for affected MCP servers and re-authorize so Bifrost re-runs Dynamic Client Registration - with the new URL. -

-
- -
-
-
-
- -
-
- ); -} +
+
+ +

+ Used as the redirect_uri Bifrost registers with upstream OAuth providers{" "} + when it acts as a client to an MCP server. Example: when a user connects an MCP server like Notion or Jira, this is the URL + Notion/Jira will redirect the browser to after login ({"/api/oauth/callback"}). +

+

+ Heads up: changing this after MCP clients have already completed OAuth will break them. The upstream + provider locks the redirect_uri to whatever was registered initially, so existing clients + will fail with "Invalid redirect URI". Clear the stored OAuth client credentials for affected MCP servers + and re-authorize so Bifrost re-runs Dynamic Client Registration with the new URL. +

+
+ +
+
+
+
+ +
+
+ ); +} \ No newline at end of file diff --git a/ui/app/workspace/config/views/securityView.tsx b/ui/app/workspace/config/views/securityView.tsx index 7deab3011e..eb2b66ce1c 100644 --- a/ui/app/workspace/config/views/securityView.tsx +++ b/ui/app/workspace/config/views/securityView.tsx @@ -19,444 +19,401 @@ import { useCallback, useEffect, useMemo, useState } from "react"; import { toast } from "sonner"; export default function SecurityView() { - const hasSettingsUpdateAccess = useRbac(RbacResource.Settings, RbacOperation.Update); - const { data: bifrostConfig } = useGetCoreConfigQuery({ fromDB: true }); - const { - data: authType, - isLoading: authTypeLoading, - error: authTypeError, - } = useGetAuthTypeQuery(undefined, { skip: !IS_ENTERPRISE }); - const config = bifrostConfig?.client_config; - const [updateCoreConfig, { isLoading }] = useUpdateCoreConfigMutation(); - const [localConfig, setLocalConfig] = useState(DefaultCoreConfig); - const showPasswordSection = - !IS_ENTERPRISE || (!authTypeLoading && !authTypeError && authType?.type !== "sso"); + const hasSettingsUpdateAccess = useRbac(RbacResource.Settings, RbacOperation.Update); + const { data: bifrostConfig } = useGetCoreConfigQuery({ fromDB: true }); + const { data: authType, isLoading: authTypeLoading, error: authTypeError } = useGetAuthTypeQuery(undefined, { skip: !IS_ENTERPRISE }); + const config = bifrostConfig?.client_config; + const [updateCoreConfig, { isLoading }] = useUpdateCoreConfigMutation(); + const [localConfig, setLocalConfig] = useState(DefaultCoreConfig); + const showPasswordSection = !IS_ENTERPRISE || (!authTypeLoading && !authTypeError && authType?.type !== "sso"); - const [localValues, setLocalValues] = useState<{ - allowed_origins: string; - allowed_headers: string; - required_headers: string; - whitelisted_routes: string; - }>({ - allowed_origins: "", - allowed_headers: "", - required_headers: "", - whitelisted_routes: "", - }); + const [localValues, setLocalValues] = useState<{ + allowed_origins: string; + allowed_headers: string; + required_headers: string; + whitelisted_routes: string; + }>({ + allowed_origins: "", + allowed_headers: "", + required_headers: "", + whitelisted_routes: "", + }); - const [authConfig, setAuthConfig] = useState({ - admin_username: { value: "", env_var: "", from_env: false }, - admin_password: { value: "", env_var: "", from_env: false }, - is_enabled: false, - disable_auth_on_inference: true, - }); + const [authConfig, setAuthConfig] = useState({ + admin_username: { value: "", env_var: "", from_env: false }, + admin_password: { value: "", env_var: "", from_env: false }, + is_enabled: false, + disable_auth_on_inference: true, + }); - useEffect(() => { - if (bifrostConfig && config) { - setLocalConfig(config); - setLocalValues({ - allowed_origins: config?.allowed_origins?.join(", ") || "", - allowed_headers: config?.allowed_headers?.join(", ") || "", - required_headers: config?.required_headers?.join(", ") || "", - whitelisted_routes: config?.whitelisted_routes?.join(", ") || "", - }); - } - if (bifrostConfig?.auth_config) { - setAuthConfig(bifrostConfig.auth_config); - } - }, [config, bifrostConfig]); + useEffect(() => { + if (bifrostConfig && config) { + setLocalConfig(config); + setLocalValues({ + allowed_origins: config?.allowed_origins?.join(", ") || "", + allowed_headers: config?.allowed_headers?.join(", ") || "", + required_headers: config?.required_headers?.join(", ") || "", + whitelisted_routes: config?.whitelisted_routes?.join(", ") || "", + }); + } + if (bifrostConfig?.auth_config) { + setAuthConfig(bifrostConfig.auth_config); + } + }, [config, bifrostConfig]); - const hasChanges = useMemo(() => { - if (!config) return false; - const localOrigins = localConfig.allowed_origins?.slice().sort().join(","); - const serverOrigins = config.allowed_origins?.slice().sort().join(","); - const originsChanged = localOrigins !== serverOrigins; + const hasChanges = useMemo(() => { + if (!config) return false; + const localOrigins = localConfig.allowed_origins?.slice().sort().join(","); + const serverOrigins = config.allowed_origins?.slice().sort().join(","); + const originsChanged = localOrigins !== serverOrigins; - const localHeaders = localConfig.allowed_headers?.slice().sort().join(","); - const serverHeaders = config.allowed_headers?.slice().sort().join(","); - const headersChanged = localHeaders !== serverHeaders; + const localHeaders = localConfig.allowed_headers?.slice().sort().join(","); + const serverHeaders = config.allowed_headers?.slice().sort().join(","); + const headersChanged = localHeaders !== serverHeaders; - const usernameChanged = - authConfig.admin_username?.value !== bifrostConfig?.auth_config?.admin_username?.value || - authConfig.admin_username?.env_var !== bifrostConfig?.auth_config?.admin_username?.env_var || - authConfig.admin_username?.from_env !== bifrostConfig?.auth_config?.admin_username?.from_env; - const passwordChanged = - authConfig.admin_password?.value !== bifrostConfig?.auth_config?.admin_password?.value || - authConfig.admin_password?.env_var !== bifrostConfig?.auth_config?.admin_password?.env_var || - authConfig.admin_password?.from_env !== bifrostConfig?.auth_config?.admin_password?.from_env; - const authChanged = showPasswordSection - ? authConfig.is_enabled !== bifrostConfig?.auth_config?.is_enabled || - usernameChanged || - passwordChanged || - authConfig.disable_auth_on_inference !== - bifrostConfig?.auth_config?.disable_auth_on_inference - : false; + const usernameChanged = + authConfig.admin_username?.value !== bifrostConfig?.auth_config?.admin_username?.value || + authConfig.admin_username?.env_var !== bifrostConfig?.auth_config?.admin_username?.env_var || + authConfig.admin_username?.from_env !== bifrostConfig?.auth_config?.admin_username?.from_env; + const passwordChanged = + authConfig.admin_password?.value !== bifrostConfig?.auth_config?.admin_password?.value || + authConfig.admin_password?.env_var !== bifrostConfig?.auth_config?.admin_password?.env_var || + authConfig.admin_password?.from_env !== bifrostConfig?.auth_config?.admin_password?.from_env; + const authChanged = showPasswordSection + ? authConfig.is_enabled !== bifrostConfig?.auth_config?.is_enabled || + usernameChanged || + passwordChanged || + authConfig.disable_auth_on_inference !== bifrostConfig?.auth_config?.disable_auth_on_inference + : false; - const localRequired = localConfig.required_headers?.slice().sort().join(","); - const serverRequired = config.required_headers?.slice().sort().join(","); - const requiredChanged = localRequired !== serverRequired; + const localRequired = localConfig.required_headers?.slice().sort().join(","); + const serverRequired = config.required_headers?.slice().sort().join(","); + const requiredChanged = localRequired !== serverRequired; - const localWhitelistedRoutes = localConfig.whitelisted_routes?.slice().sort().join(","); - const serverWhitelistedRoutes = config.whitelisted_routes?.slice().sort().join(","); - const whitelistedRoutesChanged = localWhitelistedRoutes !== serverWhitelistedRoutes; + const localWhitelistedRoutes = localConfig.whitelisted_routes?.slice().sort().join(","); + const serverWhitelistedRoutes = config.whitelisted_routes?.slice().sort().join(","); + const whitelistedRoutesChanged = localWhitelistedRoutes !== serverWhitelistedRoutes; - const enforceAuthOnInferenceChanged = - localConfig.enforce_auth_on_inference !== config.enforce_auth_on_inference; + const enforceAuthOnInferenceChanged = localConfig.enforce_auth_on_inference !== config.enforce_auth_on_inference; - return ( - originsChanged || - headersChanged || - requiredChanged || - whitelistedRoutesChanged || - authChanged || - enforceAuthOnInferenceChanged - ); - }, [config, localConfig, authConfig, bifrostConfig, showPasswordSection]); + return originsChanged || headersChanged || requiredChanged || whitelistedRoutesChanged || authChanged || enforceAuthOnInferenceChanged; + }, [config, localConfig, authConfig, bifrostConfig, showPasswordSection]); - const needsRestart = useMemo(() => { - if (!config) return false; + const needsRestart = useMemo(() => { + if (!config) return false; - const localOrigins = localConfig.allowed_origins?.slice().sort().join(","); - const serverOrigins = config.allowed_origins?.slice().sort().join(","); - const originsChanged = localOrigins !== serverOrigins; + const localOrigins = localConfig.allowed_origins?.slice().sort().join(","); + const serverOrigins = config.allowed_origins?.slice().sort().join(","); + const originsChanged = localOrigins !== serverOrigins; - const localHeaders = localConfig.allowed_headers?.slice().sort().join(","); - const serverHeaders = config.allowed_headers?.slice().sort().join(","); - const headersChanged = localHeaders !== serverHeaders; + const localHeaders = localConfig.allowed_headers?.slice().sort().join(","); + const serverHeaders = config.allowed_headers?.slice().sort().join(","); + const headersChanged = localHeaders !== serverHeaders; - const enforceAuthOnInferenceChanged = - localConfig.enforce_auth_on_inference !== config.enforce_auth_on_inference && IS_ENTERPRISE; + const enforceAuthOnInferenceChanged = localConfig.enforce_auth_on_inference !== config.enforce_auth_on_inference && IS_ENTERPRISE; - return originsChanged || headersChanged || enforceAuthOnInferenceChanged; - }, [config, localConfig]); + return originsChanged || headersChanged || enforceAuthOnInferenceChanged; + }, [config, localConfig]); - const handleAllowedOriginsChange = useCallback((value: string) => { - setLocalValues((prev) => ({ ...prev, allowed_origins: value })); - setLocalConfig((prev) => ({ ...prev, allowed_origins: parseArrayFromText(value) })); - }, []); + const handleAllowedOriginsChange = useCallback((value: string) => { + setLocalValues((prev) => ({ ...prev, allowed_origins: value })); + setLocalConfig((prev) => ({ ...prev, allowed_origins: parseArrayFromText(value) })); + }, []); - const handleAllowedHeadersChange = useCallback((value: string) => { - setLocalValues((prev) => ({ ...prev, allowed_headers: value })); - setLocalConfig((prev) => ({ ...prev, allowed_headers: parseArrayFromText(value) })); - }, []); + const handleAllowedHeadersChange = useCallback((value: string) => { + setLocalValues((prev) => ({ ...prev, allowed_headers: value })); + setLocalConfig((prev) => ({ ...prev, allowed_headers: parseArrayFromText(value) })); + }, []); - const handleRequiredHeadersChange = useCallback((value: string) => { - setLocalValues((prev) => ({ ...prev, required_headers: value })); - setLocalConfig((prev) => ({ ...prev, required_headers: parseArrayFromText(value) })); - }, []); + const handleRequiredHeadersChange = useCallback((value: string) => { + setLocalValues((prev) => ({ ...prev, required_headers: value })); + setLocalConfig((prev) => ({ ...prev, required_headers: parseArrayFromText(value) })); + }, []); - const handleWhitelistedRoutesChange = useCallback((value: string) => { - setLocalValues((prev) => ({ ...prev, whitelisted_routes: value })); - setLocalConfig((prev) => ({ ...prev, whitelisted_routes: parseArrayFromText(value) })); - }, []); + const handleWhitelistedRoutesChange = useCallback((value: string) => { + setLocalValues((prev) => ({ ...prev, whitelisted_routes: value })); + setLocalConfig((prev) => ({ ...prev, whitelisted_routes: parseArrayFromText(value) })); + }, []); - const handleConfigChange = useCallback((field: keyof CoreConfig, value: boolean) => { - setLocalConfig((prev) => ({ ...prev, [field]: value })); - }, []); + const handleConfigChange = useCallback((field: keyof CoreConfig, value: boolean) => { + setLocalConfig((prev) => ({ ...prev, [field]: value })); + }, []); - const handleAuthToggle = useCallback((checked: boolean) => { - setAuthConfig((prev) => ({ ...prev, is_enabled: checked })); - }, []); + const handleAuthToggle = useCallback((checked: boolean) => { + setAuthConfig((prev) => ({ ...prev, is_enabled: checked })); + }, []); - const handleDisableAuthOnInferenceToggle = useCallback((checked: boolean) => { - setAuthConfig((prev) => ({ ...prev, disable_auth_on_inference: checked })); - }, []); + const handleDisableAuthOnInferenceToggle = useCallback((checked: boolean) => { + setAuthConfig((prev) => ({ ...prev, disable_auth_on_inference: checked })); + }, []); - const handleAuthFieldChange = useCallback( - (field: "admin_username" | "admin_password", value: EnvVar) => { - setAuthConfig((prev) => ({ ...prev, [field]: value })); - }, - [], - ); + const handleAuthFieldChange = useCallback((field: "admin_username" | "admin_password", value: EnvVar) => { + setAuthConfig((prev) => ({ ...prev, [field]: value })); + }, []); - const handleSave = useCallback(async () => { - try { - const validation = validateOrigins(localConfig.allowed_origins); + const handleSave = useCallback(async () => { + try { + const validation = validateOrigins(localConfig.allowed_origins); - if (!validation.isValid && localConfig.allowed_origins.length > 0) { - toast.error( - `Invalid origins: ${validation.invalidOrigins.join(", ")}. Origins must be valid URLs like https://example.com, wildcard patterns like https://*.example.com, or "*" to allow all origins`, - ); - return; - } - const hasUsername = authConfig.admin_username?.value || authConfig.admin_username?.env_var; - const hasPassword = authConfig.admin_password?.value || authConfig.admin_password?.env_var; - await updateCoreConfig({ - ...bifrostConfig!, - client_config: localConfig, - ...(showPasswordSection - ? { - auth_config: - authConfig.is_enabled && hasUsername && hasPassword - ? authConfig - : { ...authConfig, is_enabled: false }, - } - : {}), - }).unwrap(); - toast.success("Security settings updated successfully."); - } catch (error) { - toast.error(getErrorMessage(error)); - } - }, [bifrostConfig, localConfig, authConfig, showPasswordSection, updateCoreConfig]); + if (!validation.isValid && localConfig.allowed_origins.length > 0) { + toast.error( + `Invalid origins: ${validation.invalidOrigins.join(", ")}. Origins must be valid URLs like https://example.com, wildcard patterns like https://*.example.com, or "*" to allow all origins`, + ); + return; + } + const hasUsername = authConfig.admin_username?.value || authConfig.admin_username?.env_var; + const hasPassword = authConfig.admin_password?.value || authConfig.admin_password?.env_var; + await updateCoreConfig({ + ...bifrostConfig!, + client_config: localConfig, + ...(showPasswordSection + ? { + auth_config: authConfig.is_enabled && hasUsername && hasPassword ? authConfig : { ...authConfig, is_enabled: false }, + } + : {}), + }).unwrap(); + toast.success("Security settings updated successfully."); + } catch (error) { + toast.error(getErrorMessage(error)); + } + }, [bifrostConfig, localConfig, authConfig, showPasswordSection, updateCoreConfig]); - return ( -
-
-

Security Settings

-

- Configure security and access control settings. -

-
+ return ( +
+
+

Security Settings

+

Configure security and access control settings.

+
-
- {authConfig.is_enabled && !authConfig.disable_auth_on_inference && ( - - - - You will need to use Basic Auth for all your inference calls (including MCP tool - execution). You can disable it below. Check{" "} - - API Keys - - - - )} - {authConfig.is_enabled && (authConfig.disable_auth_on_inference ?? true) && ( - - - - Authentication is disabled for inference calls. Only dashboard, admin API and MCP tool - execution calls require authentication. - - - )} - {/* Password Protect the Dashboard */} - {IS_ENTERPRISE && authTypeLoading ? ( -
- - Loading authentication settings -
- ) : null} - {IS_ENTERPRISE && !authTypeLoading && authTypeError ? ( - - - - Could not load authentication type. Dashboard password settings are hidden until this - request succeeds. {getErrorMessage(authTypeError)} - - - ) : null} - {showPasswordSection && ( -
-
-
-
- -

- Set up authentication credentials to protect your Bifrost dashboard. Once - configured, use the generated token for all admin API calls. -

-
- -
-
-
- - handleAuthFieldChange("admin_username", value)} - /> -
-
- - handleAuthFieldChange("admin_password", value)} - /> -
- {authConfig.is_enabled && ( -
-
- -

- When enabled, inference API calls (chat completions, embeddings, etc.) will - not require authentication. Dashboard and admin API calls will still require - authentication. -

-
- -
- )} -
-
-
- )} - {/* Enable Auth on Inference */} -
-
- -

- {IS_ENTERPRISE - ? "Require authentication (virtual key, API key, or user token) for all inference endpoints." - : "Require a virtual key for all inference requests."}{" "} - See{" "} - - documentation - {" "} - for details. -

-
- handleConfigChange("enforce_auth_on_inference", checked)} - /> -
- {/* Allowed Origins */} - {needsRestart && } -
-
-
- -

- Comma-separated list of allowed origins for CORS and WebSocket connections. - Localhost origins are always allowed. Each origin must be a complete URL with - protocol (e.g., https://app.example.com, http://10.0.0.100:3000). Wildcards are - supported for subdomains (e.g., https://*.example.com) or use "*" to allow all - origins. -

-
-