diff --git a/CHANGELOG.md b/CHANGELOG.md index 9563e61a..d9acf6e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,16 @@ All notable changes to [Claudish](https://github.com/MadAppGang/claudish). +## [Unreleased] + +### Bug Fixes + +- **vertex**: fix `v@`/`vertex@` models silently falling through to the OpenRouter default (HTTP 401). Vertex has a static `baseUrl: ""` (its endpoint is built per-region in the vertex transport), so the empty-baseUrl filter in `getRemoteProviders()` dropped it from the registry and `resolveRemoteProvider()` returned null. Keep Vertex regardless of its empty static baseUrl. + +### New Features + +- **vertex**: support EU/US data-residency multi-region (`aiplatform..rep.googleapis.com`) and `global` endpoints via `VERTEX_LOCATION=eu|us|global`. Enables Gemini models published only on the REP endpoint, such as `gemini-3.5-flash` with EU data residency. + ## [7.5.0] - 2026-06-10 ### Documentation diff --git a/docs/settings-reference.md b/docs/settings-reference.md index ee056596..6ad7007c 100644 --- a/docs/settings-reference.md +++ b/docs/settings-reference.md @@ -161,7 +161,7 @@ Claudish automatically loads `.env` from the current working directory at startu | `POE_API_KEY` | Poe (`poe@`) | | https://poe.com/ | | `VERTEX_API_KEY` | Vertex AI Express mode (`v@`, `vertex@`) | | https://console.cloud.google.com/vertex-ai | | `VERTEX_PROJECT` | Vertex AI OAuth mode — GCP project ID | `GOOGLE_CLOUD_PROJECT` | GCP Console | -| `VERTEX_LOCATION` | Vertex AI region | `us-central1` | | +| `VERTEX_LOCATION` | Vertex AI region. A normal region (e.g. `europe-west4`) hits `-aiplatform.googleapis.com`. `eu`/`us` hit the data-residency multi-region endpoint `aiplatform..rep.googleapis.com` (required for models published only there, e.g. `gemini-3.5-flash`). `global` hits `aiplatform.googleapis.com` (max availability, no data-residency guarantee). | `us-central1` | | | `GOOGLE_APPLICATION_CREDENTIALS` | Path to GCP service account JSON file (Vertex OAuth) | | GCP Console | | `GOOGLE_CLOUD_PROJECT` | GCP project ID (also used by Gemini Code Assist OAuth) | `GOOGLE_CLOUD_PROJECT_ID` | | diff --git a/packages/cli/.gitignore b/packages/cli/.gitignore index 39bdf969..cef2982a 100644 --- a/packages/cli/.gitignore +++ b/packages/cli/.gitignore @@ -1 +1,5 @@ .claudish-team-* + +# Build artifacts +claudish +*.bun-build diff --git a/packages/cli/scripts/smoke/providers.ts b/packages/cli/scripts/smoke/providers.ts index 7234e6f5..72f1e66f 100644 --- a/packages/cli/scripts/smoke/providers.ts +++ b/packages/cli/scripts/smoke/providers.ts @@ -8,6 +8,7 @@ import type { RemoteProvider } from "../../src/handlers/shared/remote-provider-types.js"; import { getRegisteredRemoteProviders } from "../../src/providers/remote-provider-registry.js"; +import { vertexApiHost } from "../../src/auth/vertex-auth.js"; import type { SmokeProviderConfig, WireFormat } from "./types.js"; // Providers to skip in v1 smoke tests @@ -142,7 +143,7 @@ function getApiPath(provider: RemoteProvider): string { function getBaseUrl(provider: RemoteProvider): string { if (provider.name === "vertex") { const location = process.env.VERTEX_LOCATION || "us-central1"; - return `https://${location}-aiplatform.googleapis.com`; + return `https://${vertexApiHost(location)}`; } return provider.baseUrl; } diff --git a/packages/cli/src/auth/vertex-auth.test.ts b/packages/cli/src/auth/vertex-auth.test.ts new file mode 100644 index 00000000..e464c12e --- /dev/null +++ b/packages/cli/src/auth/vertex-auth.test.ts @@ -0,0 +1,76 @@ +import { describe, it, expect } from "bun:test"; +import { vertexApiHost, buildVertexOAuthEndpoint } from "./vertex-auth.js"; + +describe("vertexApiHost", () => { + it("uses the classic single-region host for normal regions", () => { + expect(vertexApiHost("europe-west4")).toBe( + "europe-west4-aiplatform.googleapis.com" + ); + expect(vertexApiHost("us-central1")).toBe( + "us-central1-aiplatform.googleapis.com" + ); + }); + + it("uses the bare global host for location=global", () => { + expect(vertexApiHost("global")).toBe("aiplatform.googleapis.com"); + }); + + it("uses the data-residency REP host for eu/us multi-region", () => { + expect(vertexApiHost("eu")).toBe("aiplatform.eu.rep.googleapis.com"); + expect(vertexApiHost("us")).toBe("aiplatform.us.rep.googleapis.com"); + }); +}); + +describe("buildVertexOAuthEndpoint (google)", () => { + const cfg = { projectId: "p", location: "eu" }; + + it("targets the EU REP host while keeping the locations/ path", () => { + expect( + buildVertexOAuthEndpoint(cfg, "google", "gemini-3.5-flash", false) + ).toBe( + "https://aiplatform.eu.rep.googleapis.com/v1/" + + "projects/p/locations/eu/publishers/google/models/" + + "gemini-3.5-flash:generateContent" + ); + }); + + it("appends ?alt=sse for streaming", () => { + expect( + buildVertexOAuthEndpoint(cfg, "google", "gemini-3.5-flash", true) + ).toBe( + "https://aiplatform.eu.rep.googleapis.com/v1/" + + "projects/p/locations/eu/publishers/google/models/" + + "gemini-3.5-flash:streamGenerateContent?alt=sse" + ); + }); + + it("still builds the classic regional host for a normal region", () => { + expect( + buildVertexOAuthEndpoint( + { projectId: "p", location: "europe-west4" }, + "google", + "gemini-2.5-pro", + false + ) + ).toBe( + "https://europe-west4-aiplatform.googleapis.com/v1/" + + "projects/p/locations/europe-west4/publishers/google/models/" + + "gemini-2.5-pro:generateContent" + ); + }); + + it("targets the bare global host for location=global", () => { + expect( + buildVertexOAuthEndpoint( + { projectId: "p", location: "global" }, + "google", + "gemini-2.5-pro", + false + ) + ).toBe( + "https://aiplatform.googleapis.com/v1/" + + "projects/p/locations/global/publishers/google/models/" + + "gemini-2.5-pro:generateContent" + ); + }); +}); diff --git a/packages/cli/src/auth/vertex-auth.ts b/packages/cli/src/auth/vertex-auth.ts index d95e59e4..04297f5a 100644 --- a/packages/cli/src/auth/vertex-auth.ts +++ b/packages/cli/src/auth/vertex-auth.ts @@ -231,6 +231,31 @@ export function validateVertexOAuthConfig(): string | null { return null; } +/** + * Resolve the Vertex AI API host for a given location. + * + * Vertex exposes Gemini through three host families, each with a different + * shape — picking the wrong one yields a 404, so we can't always prefix with + * `-`: + * - `global` -> aiplatform.googleapis.com (highest + * availability, NO data-residency guarantee) + * - `eu` / `us` -> aiplatform..rep.googleapis.com (data-residency + * multi-region "REP" endpoints; some newer models + * such as gemini-3.5-flash are published ONLY here) + * - any other region -> -aiplatform.googleapis.com (classic + * single-region endpoint, e.g. europe-west4) + * + * The `locations/` path segment is unchanged in every case. + * See: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/learn/locations + */ +export function vertexApiHost(location: string): string { + if (location === "global") return "aiplatform.googleapis.com"; + if (location === "eu" || location === "us") { + return `aiplatform.${location}.rep.googleapis.com`; + } + return `${location}-aiplatform.googleapis.com`; +} + /** * Build Vertex AI endpoint URL for OAuth mode */ @@ -241,6 +266,7 @@ export function buildVertexOAuthEndpoint( streaming: boolean = true ): string { const method = streaming ? "streamGenerateContent" : "generateContent"; + const host = vertexApiHost(config.location); // For Gemini models (publisher: google), use generateContent // For partner models (publisher: anthropic, mistral), use rawPredict @@ -248,7 +274,7 @@ export function buildVertexOAuthEndpoint( // Add ?alt=sse for SSE streaming format const sseParam = streaming ? "?alt=sse" : ""; return ( - `https://${config.location}-aiplatform.googleapis.com/v1/` + + `https://${host}/v1/` + `projects/${config.projectId}/locations/${config.location}/` + `publishers/${publisher}/models/${model}:${method}${sseParam}` ); @@ -256,7 +282,7 @@ export function buildVertexOAuthEndpoint( // Mistral uses regional rawPredict/streamRawPredict endpoint const mistralMethod = streaming ? "streamRawPredict" : "rawPredict"; return ( - `https://${config.location}-aiplatform.googleapis.com/v1/` + + `https://${host}/v1/` + `projects/${config.projectId}/locations/${config.location}/` + `publishers/mistralai/models/${model}:${mistralMethod}` ); diff --git a/packages/cli/src/providers/remote-provider-registry.test.ts b/packages/cli/src/providers/remote-provider-registry.test.ts new file mode 100644 index 00000000..28973efe --- /dev/null +++ b/packages/cli/src/providers/remote-provider-registry.test.ts @@ -0,0 +1,29 @@ +import { describe, it, expect } from "bun:test"; +import { resolveRemoteProvider } from "./remote-provider-registry.js"; + +describe("resolveRemoteProvider — vertex", () => { + // Regression: Vertex has a static baseUrl of "" (its endpoint is built + // per-region in the vertex transport). A baseUrl-emptiness filter in + // getRemoteProviders() used to drop it, so `v@`/`vertex@` resolved to null + // and every Vertex request silently fell through to the OpenRouter default + // (HTTP 401). Vertex must resolve regardless of its empty static baseUrl. + it("resolves v@ to the vertex provider", () => { + const r = resolveRemoteProvider("v@gemini-3.5-flash"); + expect(r).not.toBeNull(); + expect(r!.provider.name).toBe("vertex"); + expect(r!.modelName).toBe("gemini-3.5-flash"); + }); + + it("resolves vertex@ to the vertex provider", () => { + const r = resolveRemoteProvider("vertex@gemini-2.5-pro"); + expect(r).not.toBeNull(); + expect(r!.provider.name).toBe("vertex"); + expect(r!.modelName).toBe("gemini-2.5-pro"); + }); + + it("resolves the legacy v/ prefix to the vertex provider", () => { + const r = resolveRemoteProvider("v/gemini-3.5-flash"); + expect(r).not.toBeNull(); + expect(r!.provider.name).toBe("vertex"); + }); +}); diff --git a/packages/cli/src/providers/remote-provider-registry.ts b/packages/cli/src/providers/remote-provider-registry.ts index 3346b1d0..60ed1036 100644 --- a/packages/cli/src/providers/remote-provider-registry.ts +++ b/packages/cli/src/providers/remote-provider-registry.ts @@ -49,7 +49,13 @@ const getRemoteProviders = (): RemoteProvider[] => { // (static baseUrl: "", populated via LITELLM_BASE_URL) aren't filtered // out. Without this, resolveRemoteProvider("litellm@...") returns null // and probe-discovery / runtime routing both fail. - getEffectiveBaseUrl(def) !== "" && + // + // Vertex AI also has a static baseUrl of "" because its endpoint is + // constructed per-region/publisher in the vertex transport (see + // buildVertexOAuthEndpoint). Keep it regardless of baseUrl, otherwise + // resolveRemoteProvider("v@...") returns null and every Vertex request + // silently falls through to the OpenRouter default → HTTP 401. + (getEffectiveBaseUrl(def) !== "" || def.transport === "vertex") && def.name !== "qwen" && def.name !== "native-anthropic" )