diff --git a/integ-tests/baml_src/clients.baml b/integ-tests/baml_src/clients.baml
index 2931d80ef6..0236bc13ce 100644
--- a/integ-tests/baml_src/clients.baml
+++ b/integ-tests/baml_src/clients.baml
@@ -74,6 +74,23 @@ client<llm> GPT35LegacyProvider {
   }
 }
 
+client<llm> OpenAIConcurrencyTestClientEnvBaseUrl {
+  provider openai-generic
+  options {
+    base_url env.OPENAI_CONCURRENCY_TEST_BASE_URL
+    model "concurrency-test"
+    api_key env.OPENAI_API_KEY
+  }
+}
+
+client<llm> OpenAIConcurrencyTestClientHardocodedBaseUrl {
+  provider openai-generic
+  options {
+    base_url "http://127.0.0.1:9876/v1/"
+    model "concurrency-test"
+    api_key env.OPENAI_API_KEY
+  }
+}
 
 client<llm> Ollama {
   provider ollama
diff --git a/integ-tests/baml_src/test-files/providers/openai.baml b/integ-tests/baml_src/test-files/providers/openai.baml
index 7784584c20..b8a772e8d3 100644
--- a/integ-tests/baml_src/test-files/providers/openai.baml
+++ b/integ-tests/baml_src/test-files/providers/openai.baml
@@ -42,6 +42,26 @@ function TestOpenAI(input: string) -> string {
   "#
 }
 
+function TestOpenAIConcurrencyClientEnvBaseUrl(input: string) -> string {
+  client OpenAIConcurrencyTestClientEnvBaseUrl
+  prompt #"
+    {{ _.role("user") }}
+    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs
+
+    Input: {{ input }}
+  "#
+}
+
+function TestOpenAIConcurrencyClientHardocodedBaseUrl(input: string) -> string {
+  client OpenAIConcurrencyTestClientHardocodedBaseUrl
+  prompt #"
+    {{ _.role("user") }}
+    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs
+
+    Input: {{ input }}
+  "#
+}
+
 // Test O1 model without max_tokens (should not add default)
 function TestOpenAIO1NoMaxTokens(input: string) -> string {
   client OpenAIO1
diff --git a/integ-tests/common/concurrent_server.js b/integ-tests/common/concurrent_server.js
new file mode 100644
index 0000000000..9c89643978
--- /dev/null
+++ b/integ-tests/common/concurrent_server.js
@@ -0,0 +1,166 @@
+// Used to test connection pool concurrency
+
+const http = require("http");
+const { URL } = require("url");
+
+// Get host and port.
+const HOST = getArg("--host") || process.env.HOST || "127.0.0.1";
+const PORT = Number(getArg("--port") || process.env.PORT || 8001);
+
+// Latency in milliseconds.
+const LATENCY = Number(getArg("--latency") || process.env.LATENCY || 50);
+
+// Get CLI args.
+function getArg(flag) {
+    const i = process.argv.indexOf(flag);
+    return i !== -1 ? process.argv[i + 1] : undefined;
+}
+
+// Sleep millis.
+function sleep(ms) {
+    return new Promise((res) => setTimeout(res, ms));
+}
+
+// Respond with JSON.
+function json(res, status, bodyObj) {
+    const body = JSON.stringify(bodyObj);
+    res.writeHead(status, {
+        "Content-Type": "application/json",
+        "Content-Length": Buffer.byteLength(body),
+        "Cache-Control": "no-store",
+        "Connection": "keep-alive",
+        // CORS (harmless if you curl)
+        "Access-Control-Allow-Origin": "*",
+        "Access-Control-Allow-Headers": "Content-Type, Authorization",
+    });
+    res.end(body);
+}
+
+async function handleRequest(req, res) {
+    const url = new URL(req.url, `http://${req.headers.host}`);
+
+    // Health
+    if (req.method === "GET" && url.pathname === "/health") {
+        return json(res, 200, { ok: true });
+    }
+
+    // OpenAI generic.
+    if (req.method === "POST" && url.pathname === "/v1/chat/completions") {
+        let body = "";
+
+        req.on("data", chunk => body += chunk);
+
+        req.on("end", async () => {
+            // We don't actually need the request payload for this test.
+            // But parse if present to avoid client errors.
+            try {
+                if (body && body.length) {
+                    JSON.parse(body);
+                }
+            } catch {
+                return json(res, 400, { error: { message: "Invalid JSON" } });
+            }
+
+            // Simulate latency for concurrency testing
+            await sleep(LATENCY);
+
+            const now = Math.floor(Date.now() / 1000);
+
+            return json(res, 200, {
+                id: `cmpl-${now}-${Math.random().toString(36).slice(2, 8)}`,
+                object: "chat.completion",
+                created: now,
+                model: "concurrency-test",
+                choices: [
+                    {
+                        index: 0,
+                        message: { role: "assistant", content: "OpenAI" },
+                        finish_reason: "stop",
+                    },
+                ],
+                usage: { prompt_tokens: 0, completion_tokens: 1, total_tokens: 1 },
+            });
+        });
+
+        return;
+    }
+
+    // Anthropic.
+    if (req.method === "POST" && url.pathname === "/v1/messages") {
+        let body = "";
+
+        req.on("data", chunk => body += chunk);
+
+        req.on("end", async () => {
+            // We don't actually need the request payload for this test.
+            // But parse if present to avoid client errors.
+            try {
+                if (body && body.length) {
+                    JSON.parse(body);
+                }
+            } catch {
+                return json(res, 400, { error: { message: "Invalid JSON" } });
+            }
+
+            // Simulate latency for concurrency testing
+            await sleep(LATENCY);
+
+            const now = Math.floor(Date.now() / 1000);
+
+            return json(res, 200, {
+                id: `msg_${Math.random().toString(36).slice(2, 10)}`,
+                type: "message",
+                role: "assistant",
+                model: "concurrency-test",
+                content: [
+                    { type: "text", text: "Anthropic" }
+                ],
+                stop_reason: "end_turn",
+                stop_sequence: null,
+                usage: { input_tokens: 0, output_tokens: 1 },
+                created_at: now,
+            });
+        });
+
+        return;
+    }
+
+    // Not found
+    json(res, 404, { error: { message: "Not found" } });
+}
+
+const server = http.createServer(async (req, res) => {
+    console.log(`${req.method} ${req.url}`);
+
+    try {
+        await handleRequest(req, res);
+    } catch (e) {
+        json(res, 500, { error: { message: e?.message || "Internal error" } });
+    }
+});
+
+server.listen({ host: HOST, port: PORT, reuseAddress: true }, () => {
+    process.stdout.write(`Concurrency test server listening on http://${HOST}:${PORT}\n`);
+});
+
+const sockets = new Set();
+
+server.on("connection", (socket) => {
+    sockets.add(socket);
+    socket.on("close", () => sockets.delete(socket));
+});
+
+
+function shutdown() {
+    server.close(() => process.exit(0));
+    for (const s of sockets) {
+        try {
+            s.destroy();
+        } catch {
+            // Ignore errors
+        }
+    }
+}
+
+process.on("SIGINT", shutdown);
+process.on("SIGTERM", shutdown);
\ No newline at end of file
diff --git a/integ-tests/go/baml_client/baml_source_map.go b/integ-tests/go/baml_client/baml_source_map.go
index 96bc6c5c8d..13b10192a1 100644
--- a/integ-tests/go/baml_client/baml_source_map.go
+++ b/integ-tests/go/baml_client/baml_source_map.go
@@ -15,7 +15,7 @@ package baml_client
 
 var file_map = map[string]string{
 
-	"clients.baml":                                                                     "retry_policy Bar {\n  max_retries 3\n  strategy {\n    type exponential_backoff\n  }\n}\n\nretry_policy Foo {\n  max_retries 3\n  strategy {\n    type constant_delay\n    delay_ms 100\n  }\n}\n\nclient<llm> GPT4 {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> GPT4o {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// For integ tests\nclient<llm> GPT4oBaseUrlNotSet {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n    base_url env.OPEN_API_BASE_DO_NOT_SET_THIS\n  }\n}\n\n\nclient<llm> GPT4Turbo {\n  retry_policy Bar\n  provider openai\n  options {\n    model gpt-4-turbo\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nretry_policy GPT4oRetry {\n  max_retries 2\n  strategy {\n    type exponential_backoff\n  }\n}\n\nclient<llm> GPT35 {\n  provider openai\n  retry_policy GPT4oRetry\n  options {\n    model \"gpt-4o-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> GPT35LegacyProvider {\n  provider openai\n  options {\n    model \"gpt-3.5-turbo\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> Ollama {\n  provider ollama\n  options {\n    model llama3.1\n  }\n}\n\nclient<llm> GPT35Azure {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    // base_url \"https://west-us-azure-baml.openai.azure.com/openai/deployments/gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\n// Azure O1 client without max_tokens (should not add default)\nclient<llm> AzureO1 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O1 client with explicit max_tokens (should keep user value)\nclient<llm> AzureO1WithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> AzureO1WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// Azure GPT-35 client with explicit max_tokens (should keep user value)\nclient<llm> GPT35AzureWithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure client with invalid resource name (for testing failures)\nclient<llm> GPT35AzureFailed {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml-incorrect-suffix\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\nclient<llm> Gemini {\n  provider google-ai\n  options {\n    model gemini-2.5-flash\n    api_key env.GOOGLE_API_KEY\n    safetySettings {\n      category HARM_CATEGORY_HATE_SPEECH\n      threshold BLOCK_LOW_AND_ABOVE\n    }\n  }\n}\n\nclient<llm> Gemini25ProThinking {\n  provider google-ai\n  retry_policy Constant\n  options {\n    model \"gemini-2.5-pro\"\n    api_key env.GOOGLE_API_KEY\n    generationConfig {\n      thinkingConfig {\n          thinkingBudget 1024\n          includeThoughts true\n      }\n    }\n  }\n}\n\nclient<llm> GeminiOpenAiGeneric {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://generativelanguage.googleapis.com/v1beta/\"\n    model \"gemini-2.5-flash\"\n    api_key env.GOOGLE_API_KEY\n  }\n}\n\nclient<llm> Vertex {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> VertexWithQueryParams {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    project_id gloo-ai\n    query_params {\n      key env.VERTEX_API_KEY\n    }\n  }\n}\n\nclient<llm> VertexClaude {\n  provider vertex-ai\n  options {\n    model \"claude-3-5-sonnet@20240620\"\n    location us-east5\n    anthropic_version \"vertex-2023-10-16\"\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> AwsBedrock {\n  provider aws-bedrock\n  options {\n    inference_configuration {\n      max_tokens 2048\n    }\n    // max_tokens 100000\n    // max_completion_tokens 100000\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    //model \"arn:aws:bedrock:us-east-1:404337120808:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-east-1.amazonaws.com/\"\n    // region \"us-east-1\"\n    // access_key_id env.AWS_ACCESS_KEY_ID\n    // secret_access_key env.AWS_SECRET_ACCESS_KEY\n    // session_token env.AWS_SESSION_TOKEN\n    // session_token null\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidRegion {\n  provider aws-bedrock\n  options {\n    region \"us-invalid-7\"\n    inference_configuration {\n      max_tokens 100\n    }\n    // model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidEndpoint {\n  provider aws-bedrock\n  options {\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-doenotexist-3.amazonaws.com\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidAccessKey {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidProfile {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    profile \"boundaryml-dev-invalid\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidSessionToken {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\n\nclient<llm> Invalid{\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> Sonnet {\n  provider anthropic\n  options {\n    model claude-3-5-sonnet-20241022\n    api_key env.ANTHROPIC_API_KEY\n  }\n}\n\n\nclient<llm> SonnetThinking {\n  provider anthropic\n  options {\n    model \"claude-3-7-sonnet-20250219\"\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 2048\n    thinking {\n      type \"enabled\"\n      budget_tokens 1024\n    }\n  }\n}\n\nclient<llm> Claude {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> ClaudeWithCaching {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 500\n    allowed_role_metadata [\"cache_control\"]\n    headers {\n      \"anthropic-beta\" \"prompt-caching-2024-07-31\"\n    }\n  }\n}\n\nclient<llm> Resilient_SimpleSyntax {\n  retry_policy Foo\n  provider baml-fallback\n  options {\n    strategy [\n      GPT4Turbo\n      GPT35\n      Lottery_SimpleSyntax\n    ]\n  }\n}\n\nclient<llm> Lottery_SimpleSyntax {\n  provider baml-round-robin\n  options {\n    start 0\n    strategy [\n      Claude\n      GPT35\n    ]\n  }\n}\n\nclient<llm> TogetherAi {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://api.together.ai/v1\"\n    api_key env.TOGETHER_API_KEY\n    model \"meta-llama/Llama-3-70b-chat-hf\"\n  }\n}\n\n// OpenAI O1 client without max_tokens (should not add default)\nclient<llm> OpenAIO1 {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// OpenAI O1 client with explicit max_tokens (should fail)\nclient<llm> OpenAIO1WithMaxTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// OpenAI O1 client with explicit max_completion_tokens\nclient<llm> OpenAIO1WithMaxCompletionTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// OpenAI GPT-4 client with explicit max_tokens\nclient<llm> GPT4WithMaxTokens {\n  provider openai\n  options {\n    model \"gpt-4\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure O3 client without max_tokens (should not add default)\nclient<llm> AzureO3 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O3 client with explicit max_completion_tokens\nclient<llm> AzureO3WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n",
+	"clients.baml":                                                                     "retry_policy Bar {\n  max_retries 3\n  strategy {\n    type exponential_backoff\n  }\n}\n\nretry_policy Foo {\n  max_retries 3\n  strategy {\n    type constant_delay\n    delay_ms 100\n  }\n}\n\nclient<llm> GPT4 {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> GPT4o {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// For integ tests\nclient<llm> GPT4oBaseUrlNotSet {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n    base_url env.OPEN_API_BASE_DO_NOT_SET_THIS\n  }\n}\n\n\nclient<llm> GPT4Turbo {\n  retry_policy Bar\n  provider openai\n  options {\n    model gpt-4-turbo\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nretry_policy GPT4oRetry {\n  max_retries 2\n  strategy {\n    type exponential_backoff\n  }\n}\n\nclient<llm> GPT35 {\n  provider openai\n  retry_policy GPT4oRetry\n  options {\n    model \"gpt-4o-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> GPT35LegacyProvider {\n  provider openai\n  options {\n    model \"gpt-3.5-turbo\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> OpenAIConcurrencyTestClientEnvBaseUrl {\n  provider openai-generic\n  options {\n    base_url env.OPENAI_CONCURRENCY_TEST_BASE_URL\n    model \"concurrency-test\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> OpenAIConcurrencyTestClientHardocodedBaseUrl {\n  provider openai-generic\n  options {\n    base_url \"http://127.0.0.1:9876/v1/\"\n    model \"concurrency-test\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> Ollama {\n  provider ollama\n  options {\n    model llama3.1\n  }\n}\n\nclient<llm> GPT35Azure {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    // base_url \"https://west-us-azure-baml.openai.azure.com/openai/deployments/gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\n// Azure O1 client without max_tokens (should not add default)\nclient<llm> AzureO1 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O1 client with explicit max_tokens (should keep user value)\nclient<llm> AzureO1WithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> AzureO1WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// Azure GPT-35 client with explicit max_tokens (should keep user value)\nclient<llm> GPT35AzureWithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure client with invalid resource name (for testing failures)\nclient<llm> GPT35AzureFailed {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml-incorrect-suffix\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\nclient<llm> Gemini {\n  provider google-ai\n  options {\n    model gemini-2.5-flash\n    api_key env.GOOGLE_API_KEY\n    safetySettings {\n      category HARM_CATEGORY_HATE_SPEECH\n      threshold BLOCK_LOW_AND_ABOVE\n    }\n  }\n}\n\nclient<llm> Gemini25ProThinking {\n  provider google-ai\n  retry_policy Constant\n  options {\n    model \"gemini-2.5-pro\"\n    api_key env.GOOGLE_API_KEY\n    generationConfig {\n      thinkingConfig {\n          thinkingBudget 1024\n          includeThoughts true\n      }\n    }\n  }\n}\n\nclient<llm> GeminiOpenAiGeneric {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://generativelanguage.googleapis.com/v1beta/\"\n    model \"gemini-2.5-flash\"\n    api_key env.GOOGLE_API_KEY\n  }\n}\n\nclient<llm> Vertex {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> VertexWithQueryParams {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    project_id gloo-ai\n    query_params {\n      key env.VERTEX_API_KEY\n    }\n  }\n}\n\nclient<llm> VertexClaude {\n  provider vertex-ai\n  options {\n    model \"claude-3-5-sonnet@20240620\"\n    location us-east5\n    anthropic_version \"vertex-2023-10-16\"\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> AwsBedrock {\n  provider aws-bedrock\n  options {\n    inference_configuration {\n      max_tokens 2048\n    }\n    // max_tokens 100000\n    // max_completion_tokens 100000\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    //model \"arn:aws:bedrock:us-east-1:404337120808:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-east-1.amazonaws.com/\"\n    // region \"us-east-1\"\n    // access_key_id env.AWS_ACCESS_KEY_ID\n    // secret_access_key env.AWS_SECRET_ACCESS_KEY\n    // session_token env.AWS_SESSION_TOKEN\n    // session_token null\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidRegion {\n  provider aws-bedrock\n  options {\n    region \"us-invalid-7\"\n    inference_configuration {\n      max_tokens 100\n    }\n    // model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidEndpoint {\n  provider aws-bedrock\n  options {\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-doenotexist-3.amazonaws.com\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidAccessKey {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidProfile {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    profile \"boundaryml-dev-invalid\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidSessionToken {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\n\nclient<llm> Invalid{\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> Sonnet {\n  provider anthropic\n  options {\n    model claude-3-5-sonnet-20241022\n    api_key env.ANTHROPIC_API_KEY\n  }\n}\n\n\nclient<llm> SonnetThinking {\n  provider anthropic\n  options {\n    model \"claude-3-7-sonnet-20250219\"\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 2048\n    thinking {\n      type \"enabled\"\n      budget_tokens 1024\n    }\n  }\n}\n\nclient<llm> Claude {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> ClaudeWithCaching {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 500\n    allowed_role_metadata [\"cache_control\"]\n    headers {\n      \"anthropic-beta\" \"prompt-caching-2024-07-31\"\n    }\n  }\n}\n\nclient<llm> Resilient_SimpleSyntax {\n  retry_policy Foo\n  provider baml-fallback\n  options {\n    strategy [\n      GPT4Turbo\n      GPT35\n      Lottery_SimpleSyntax\n    ]\n  }\n}\n\nclient<llm> Lottery_SimpleSyntax {\n  provider baml-round-robin\n  options {\n    start 0\n    strategy [\n      Claude\n      GPT35\n    ]\n  }\n}\n\nclient<llm> TogetherAi {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://api.together.ai/v1\"\n    api_key env.TOGETHER_API_KEY\n    model \"meta-llama/Llama-3-70b-chat-hf\"\n  }\n}\n\n// OpenAI O1 client without max_tokens (should not add default)\nclient<llm> OpenAIO1 {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// OpenAI O1 client with explicit max_tokens (should fail)\nclient<llm> OpenAIO1WithMaxTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// OpenAI O1 client with explicit max_completion_tokens\nclient<llm> OpenAIO1WithMaxCompletionTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// OpenAI GPT-4 client with explicit max_tokens\nclient<llm> GPT4WithMaxTokens {\n  provider openai\n  options {\n    model \"gpt-4\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure O3 client without max_tokens (should not add default)\nclient<llm> AzureO3 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O3 client with explicit max_completion_tokens\nclient<llm> AzureO3WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n",
 	"custom-task.baml":                                                                 "class BookOrder {\n  orderId string @description(#\"\n    The ID of the book order\n  \"#)\n  title string @description(#\"\n    The title of the ordered book\n  \"#)\n  quantity int @description(#\"\n    The quantity of books ordered\n  \"#)\n  price float @description(#\"\n    The price of the book\n  \"#)\n}\n\nclass FlightConfirmation {\n  confirmationNumber string @description(#\"\n    The flight confirmation number\n  \"#)\n  flightNumber string @description(#\"\n    The flight number\n  \"#)\n  departureTime string @description(#\"\n    The scheduled departure time of the flight\n  \"#)\n  arrivalTime string @description(#\"\n    The scheduled arrival time of the flight\n  \"#)\n  seatNumber string @description(#\"\n    The seat number assigned on the flight\n  \"#)\n}\n\nclass GroceryReceipt {\n  receiptId string @description(#\"\n    The ID of the grocery receipt\n  \"#)\n  storeName string @description(#\"\n    The name of the grocery store\n  \"#)\n  items (string | int | float)[] @description(#\"\n    A list of items purchased. Each item consists of a name, quantity, and price.\n  \"#)\n  totalAmount float @description(#\"\n    The total amount spent on groceries\n  \"#)\n}\n \nclass CustomTaskResult {\n  bookOrder BookOrder | null\n  flightConfirmation FlightConfirmation | null\n  groceryReceipt GroceryReceipt | null\n}\n\nfunction CustomTask(input: string) -> BookOrder | FlightConfirmation | GroceryReceipt {\n  client \"openai/gpt-4o-mini\"\n  prompt #\"\n    Given the input string, extract either an order for a book, a flight confirmation, or a grocery receipt.\n\n    {{ ctx.output_format }}\n\n    Input:\n    \n    {{ input}}\n  \"#\n}\n\ntest CustomTask {\n  functions [CustomTask]\n  args {\n    input #\"\nDear [Your Name],\n\nThank you for booking with [Airline Name]! We are pleased to confirm your upcoming flight.\n\nFlight Confirmation Details:\n\nBooking Reference: ABC123\nPassenger Name: [Your Name]\nFlight Number: XY789\nDeparture Date: September 15, 2024\nDeparture Time: 10:30 AM\nArrival Time: 1:45 PM\nDeparture Airport: John F. Kennedy International Airport (JFK), New York, NY\nArrival Airport: Los Angeles International Airport (LAX), Los Angeles, CA\nSeat Number: 12A\nClass: Economy\nBaggage Allowance:\n\nChecked Baggage: 1 piece, up to 23 kg\nCarry-On Baggage: 1 piece, up to 7 kg\nImportant Information:\n\nPlease arrive at the airport at least 2 hours before your scheduled departure.\nCheck-in online via our website or mobile app to save time at the airport.\nEnsure that your identification documents are up to date and match the name on your booking.\nContact Us:\n\nIf you have any questions or need to make changes to your booking, please contact our customer service team at 1-800-123-4567 or email us at support@[airline].com.\n\nWe wish you a pleasant journey and thank you for choosing [Airline Name].\n\nBest regards,\n\n[Airline Name] Customer Service\n    \"#\n  }\n}",
 	"fiddle-examples/audio/audio.baml":                                                 "function DescribeAudio(audio: audio) -> string {\n  client GPT4o\n  prompt #\"\n    Describe the audio below in 20 words:\n    {{ _.role(\"user\") }}\n    {{ audio }}\n  \"#\n\n}\n\n\n\n\n// chat role user present\nfunction DescribeAudio2(audio: audio) -> string {\n  client GPT4Turbo\n  prompt #\"\n    {{ _.role(\"user\") }}\n    You should return 1 answer that answer the following command.\n\n    Describe this in 5 words:\n    {{ audio }}\n  \"#\n}\n\ntest TestAudio {\n    functions [DescribeAudio]\n  args {\n    audio { url \"https://www.pacdv.com/sounds/voices/friday-rocks.wav\"}\n  }\n}\n\ntest TestAudio2 {\n  functions [DescribeAudio2]\n  args {\n    audio { file \"friday-rocks.wav\" }\n      }\n}\n",
 	"fiddle-examples/chain-of-thought.baml":                                            "class Email {\n    subject string\n    body string\n    from_address string\n}\n\nenum OrderStatus {\n    ORDERED\n    SHIPPED\n    DELIVERED\n    CANCELLED\n}\n\nclass OrderInfo {\n    order_status OrderStatus\n    tracking_number string?\n    estimated_arrival_date string?\n}\n\nfunction GetOrderInfo(email: Email) -> OrderInfo {\n  client GPT4\n  prompt #\"\n    Given the email below:\n\n    ```\n    from: {{email.from_address}}\n    Email Subject: {{email.subject}}\n    Email Body: {{email.body}}\n    ```\n\n    Extract this info from the email in JSON format:\n    {{ ctx.output_format }}\n\n    Before you output the JSON, please explain your\n    reasoning step-by-step. Here is an example on how to do this:\n    'If we think step by step we can see that ...\n     therefore the output JSON is:\n    {\n      ... the json schema ...\n    }'\n  \"#\n}",
@@ -115,7 +115,7 @@ var file_map = map[string]string{
 	"test-files/providers/openai-responses-validation.baml":                            "// OpenAI Responses Provider Validation Tests\n// These tests validate that the openai-responses provider is properly configured\n\n// Test 1: Basic provider recognition\n// This should parse successfully once openai-responses is available\nclient<llm> ValidateOpenAIResponses {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n  }\n}\n\n// Test 2: Valid client_response_type values for openai-responses\nclient<llm> ValidateResponseTypeOpenAI {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    client_response_type \"openai\"\n  }\n}\n\nclient<llm> ValidateResponseTypeOpenAIResponses {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    client_response_type \"openai-responses\"\n  }\n}\n\nclient<llm> ValidateResponseTypeAnthropic {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    client_response_type \"anthropic\"\n  }\n}\n\n// Test 3: Provider should be in allowed list\n// This will validate that \"openai-responses\" is included in ClientProvider::allowed_providers()\n\n// Test 4: Default base URL should be correct\nclient<llm> ValidateDefaultBaseURL {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    // Should default to https://api.openai.com/v1\n  }\n}\n\n// Test 5: Custom base URL should work\nclient<llm> ValidateCustomBaseURL {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    base_url \"https://custom.openai.com/v1\"\n  }\n}\n\n// Simple test functions to validate the clients work\nfunction ValidateBasicResponses(input: string) -> string {\n  client ValidateOpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Say \"success\" if you can read this: {{ input }}\n  \"#\n}\n\nfunction ValidateResponseTypes(input: string) -> string {\n  client ValidateResponseTypeOpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Respond with \"response-type-works\" for: {{ input }}\n  \"#\n}\n\n// Validation test suite\ntest ValidateOpenAIResponsesProvider {\n  functions [\n    ValidateBasicResponses,\n    ValidateResponseTypes\n  ]\n  args {\n    input \"test\"\n  }\n}",
 	"test-files/providers/openai-responses.baml":                                       "// OpenAI Responses API Provider Tests\n// Tests the new openai-responses provider that uses the OpenAI Responses API\n\n// Basic OpenAI Responses client\nclient<llm> OpenAIResponses {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n  }\n}\n\n// OpenAI Responses client with explicit response type\nclient<llm> OpenAIResponsesExplicit {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    client_response_type \"openai-responses\"\n  }\n}\n\n// OpenAI Responses client with custom base URL (for testing)\nclient<llm> OpenAIResponsesCustomURL {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    base_url \"https://api.openai.com/v1\"\n  }\n}\n\n// Test basic functionality with responses API\nfunction TestOpenAIResponses(input: string) -> string {\n  client OpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a short haiku about {{ input }}. Make it simple and beautiful.\n  \"#\n}\n\n// Test with explicit response type configuration\nfunction TestOpenAIResponsesExplicit(input: string) -> string {\n  client OpenAIResponsesExplicit\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Create a brief poem about {{ input }}. Keep it under 50 words.\n  \"#\n}\n\n// Test with custom base URL\nfunction TestOpenAIResponsesCustomURL(input: string) -> string {\n  client OpenAIResponsesCustomURL\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Tell me an interesting fact about {{ input }}.\n  \"#\n}\n\n// Test with multi-turn conversation\nfunction TestOpenAIResponsesConversation(topic: string) -> string {\n  client OpenAIResponses\n  prompt #\"\n    {{ _.role(\"system\") }}\n    You are a helpful assistant that provides concise answers.\n    \n    {{ _.role(\"user\") }}\n    What is {{ topic }}?\n    \n    {{ _.role(\"assistant\") }}\n    {{ topic }} is a fascinating subject. Let me explain briefly.\n    \n    {{ _.role(\"user\") }}\n    Can you give me a simple example?\n  \"#\n}\n\n// Test with different model parameter\nclient<llm> OpenAIResponsesGPT4 {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4\"\n  }\n}\n\nfunction TestOpenAIResponsesDifferentModel(input: string) -> string {\n  client OpenAIResponsesGPT4\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Explain {{ input }} in one sentence.\n  \"#\n}\n\n// Test error handling with invalid configuration\nclient<llm> OpenAIResponsesInvalidResponseType {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    // This should work since openai response type is valid for responses provider\n    client_response_type \"openai\"\n  }\n}\n\nfunction TestOpenAIResponsesWithOpenAIResponseType(input: string) -> string {\n  client OpenAIResponsesInvalidResponseType\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write about {{ input }}.\n  \"#\n}\n\n// Comprehensive test suite for OpenAI Responses\ntest TestOpenAIResponsesProviders {\n  functions [\n    TestOpenAIResponses,\n    TestOpenAIResponsesExplicit,\n    TestOpenAIResponsesCustomURL,\n    TestOpenAIResponsesConversation,\n    TestOpenAIResponsesDifferentModel,\n    TestOpenAIResponsesWithOpenAIResponseType\n  ]\n  args {\n    input \"mountains\"\n    topic \"machine learning\"\n  }\n}\n\n// Test shorthand syntax (this should work but use standard openai, not responses)\nfunction TestOpenAIResponsesShorthand(input: string) -> string {\n  client \"openai/gpt-5-mini\"\n  prompt #\"\n    {{ _.role(\"user\") }}\n    What do you think about {{ input }}?\n  \"#\n}\n\n// Test to ensure the provider correctly routes to /v1/responses endpoint\n// This is validated by the implementation, not by the test execution\nfunction TestOpenAIResponsesEndpoint(input: string) -> string {\n  client OpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    This request should go to /v1/responses endpoint, not /v1/chat/completions.\n    Respond with a short message about {{ input }}.\n  \"#\n}\n\n// Test that demonstrates automatic response type selection\nclient<llm> OpenAIResponsesAutoType {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    // No explicit client_response_type - should automatically use openai-responses\n  }\n}\n\nfunction TestOpenAIResponsesAutoType(input: string) -> string {\n  client OpenAIResponsesAutoType\n  prompt #\"\n    {{ _.role(\"user\") }}\n    This client should automatically use openai-responses response type.\n    Write a short description of {{ input }}.\n  \"#\n}\n\n// Additional test for validation\ntest TestOpenAIResponsesValidation {\n  functions [\n    TestOpenAIResponsesShorthand,\n    TestOpenAIResponsesEndpoint,\n    TestOpenAIResponsesAutoType,\n    TestOpenAIResponsesExplicit,\n    TestOpenAIProviderWithResponsesType\n  ]\n  args {\n    input \"artificial intelligence\"\n  }\n}\n\n// Test image input/output with OpenAI Responses API\nclient<llm> OpenAIResponsesImage {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5\"\n  }\n}\n\nfunction TestOpenAIResponsesImageInput(image: image | string | pdf | audio) -> string {\n  client OpenAIResponsesImage\n  prompt #\"\n    {{ _.role(\"user\") }}\n    what is in this content?\n    {{ image }}\n  \"#\n}\n\n// Test for image analysis\ntest TestOpenAIResponsesImageAnalysis {\n  functions [\n    TestOpenAIResponsesImageInput\n  ]\n  args {\n    image \"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg\"\n  }\n}\n\n// Test web search with OpenAI Responses API\nclient<llm> OpenAIResponsesWebSearch {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    tools [\n      {\n        type \"web_search_preview\"\n      }\n    ]\n  }\n}\n\nfunction TestOpenAIResponsesWebSearch(query: string) -> string {\n  client OpenAIResponsesWebSearch\n  prompt #\"\n    {{ _.role(\"user\") }}\n    {{ query }}\n  \"#\n}\n\n// Test for web search functionality\ntest TestOpenAIResponsesWebSearchTest {\n  functions [\n    TestOpenAIResponsesWebSearch\n  ]\n  args {\n    query \"What was a positive news story from today?\"\n  }\n}\n\n\n// Test function calling with OpenAI Responses API\nclient<llm> OpenAIResponsesFunctionCall {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    tools [\n      {\n        type \"function\"\n        name \"get_current_weather\"\n        description \"Get the current weather in a given location\"\n        parameters {\n          type \"object\"\n          properties {\n            location {\n              type \"string\"\n              description \"The city and state, e.g. San Francisco, CA\"\n            }\n            unit {\n              type \"string\"\n              enum [\"celsius\", \"fahrenheit\"]\n            }\n          }\n          required [\"location\", \"unit\"]\n        }\n      }\n    ]\n    tool_choice \"auto\"\n  }\n}\n\nfunction TestOpenAIResponsesFunctionCall(query: string) -> string {\n  client OpenAIResponsesFunctionCall\n  prompt #\"\n    {{ _.role(\"user\") }}\n    {{ query }}\n  \"#\n}\n\n// Test for function calling\ntest TestOpenAIResponsesFunctionCallTest {\n  functions [\n    TestOpenAIResponsesFunctionCall\n  ]\n  args {\n    query \"What is the weather like in Boston today?\"\n  }\n}\n\n// Test using standard openai provider with openai-responses client_response_type\nclient<llm> OpenAIWithResponsesType {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    client_response_type \"openai-responses\"\n  }\n}\n\nfunction TestOpenAIProviderWithResponsesType(input: string) -> string {\n  client OpenAIWithResponsesType\n  prompt #\"\n    {{ _.role(\"user\") }}\n    This uses the openai provider but with openai-responses client_response_type.\n    Write a short summary about {{ input }}.\n  \"#\n}\n\n// Test reasoning with OpenAI Responses API\nclient<llm> OpenAIResponsesReasoning {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5\"\n    reasoning{\n      effort \"high\"\n    }\n  }\n}\n\nfunction TestOpenAIResponsesReasoning(problem: string) -> string {\n  client OpenAIResponsesReasoning\n  prompt #\"\n    {{ _.role(\"user\") }}\n    {{ problem }}\n  \"#\n}\n\n// Test for reasoning capability\ntest TestOpenAIResponsesReasoningTest {\n  functions [\n    TestOpenAIResponsesReasoning\n  ]\n  args {\n    problem \"Solve this step by step: If a train travels at 60 mph for 2.5 hours, then at 80 mph for 1.5 hours, what is the total distance traveled?\"\n  }\n}\n\nclient<llm> Gpt5 {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5\"\n  }\n}\n\n\nfunction TestOpenAIResponsesAllRoles(problem: string) -> string {\n  client Gpt5\n  prompt #\"\n    {{ _.role(\"system\") }}\n    Hi\n    {{ _.role(\"developer\") }}\n    Hi\n    {{ _.role(\"assistant\") }}\n    Hi\n    {{ _.role(\"user\") }}\n    {{ problem }}\n  \"#\n}\n\n\nfunction TestOpenaiResponsesPdfs(pdf: pdf) -> string {\n  client Gpt5\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Summarize in one sentence the contents of this:\n    {{ pdf }}\n  \"#\n} \n\ntest TestOpenaiResponsesPdfsTest {\n  functions [\n    TestOpenaiResponsesPdfs\n  ]\n  args {\n    pdf { url \"https://www.berkshirehathaway.com/letters/2024ltr.pdf\" }\n  }\n}\n\ntest TestOpenaiResponsesPdfsTestFile {\n  functions [\n    TestOpenaiResponsesPdfs\n  ]\n  args {\n    pdf { file \"../../dummy.pdf\" }\n  }\n}\n\n\ntest TestOpenAIResponsesAllRolesTest {\n  functions [\n    TestOpenAIResponsesAllRoles\n  ]\n  args {\n    problem \"What is the weather like in Boston today?\"\n  }\n}",
 	"test-files/providers/openai-with-anthropic-response.baml":                         "client<llm> OpenAIWithAnthropicResponse {\n  provider openai-responses\n  options {\n    model \"gpt-4o\"\n    client_response_type \"openai-responses\"\n    base_url \"http://localhost:8000\"\n  }\n}\n\nfunction OpenAIWithAnthropicResponseHello(s: string) -> string {\n  client OpenAIWithAnthropicResponse\n  prompt #\"\n    Return the string \"Hello, world!\" with {{ s }} included in the response.\n    {{ _.role(\"user\") }}\n  \"#\n}\n\ntest TestOpenAIWithAnthropicResponse {\n  functions [\n    OpenAIWithAnthropicResponseHello\n  ]\n  args {\n    s \"Cherry blossoms\"\n  }\n}",
-	"test-files/providers/openai.baml":                                                 "function PromptTestOpenAI(input: string) -> string {\n  client GPT35\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAILegacyProvider(input: string) -> string {\n  client GPT35LegacyProvider\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIShorthand(input: string) -> string {\n  client \"openai/gpt-4o\"\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n\n\n\n// Test standard GPT-4 (should add default max_tokens)\nfunction TestOpenAI(input: string) -> string {\n  client GPT4\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model without max_tokens (should not add default)\nfunction TestOpenAIO1NoMaxTokens(input: string) -> string {\n  client OpenAIO1\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_tokens (should fail)\nfunction TestOpenAIO1WithMaxTokens(input: string) -> string {\n  client OpenAIO1WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_completion_tokens\nfunction TestOpenAIO1WithMaxCompletionTokens(input: string) -> string {\n  client OpenAIO1WithMaxCompletionTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test GPT-4 with explicit max_tokens (should keep user value)\nfunction TestOpenAIWithMaxTokens(input: string) -> string {\n  client GPT4WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test OpenAI with null max_tokens (should not add default)\nfunction TestOpenAIWithNullMaxTokens(input: string) -> string {\n  client OpenAIWithNullMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nclient GPT4oMini {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o-mini\"\n  }\n}\n\nfunction TestOpenAIGPT4oMini(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n    {{ input }}\n  \"#\n}\nfunction TestOpenAIGPT4oMini2(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction TestOpenAIGPT4oMini3(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction OpenAIGPT4oMissingBaseUrlEnvVar(input: string) -> string {\n  client GPT4oBaseUrlNotSet\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\n// Add test cases to verify the behavior\ntest TestOpenAIClients {\n  functions [\n    TestOpenAI,\n    TestOpenAIO1NoMaxTokens,\n    TestOpenAIO1WithMaxTokens,\n    TestOpenAIWithMaxTokens,\n    TestOpenAIO1WithMaxCompletionTokens,\n    TestOpenAIWithNullMaxTokens\n  ]\n  args {\n    input #\"\n    Cherry blossoms\n    \"#\n  }\n}\n\nclient<llm> OpenAIWithNullMaxTokens {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o\"\n    max_tokens null\n  }\n}",
+	"test-files/providers/openai.baml":                                                 "function PromptTestOpenAI(input: string) -> string {\n  client GPT35\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAILegacyProvider(input: string) -> string {\n  client GPT35LegacyProvider\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIShorthand(input: string) -> string {\n  client \"openai/gpt-4o\"\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n\n\n\n// Test standard GPT-4 (should add default max_tokens)\nfunction TestOpenAI(input: string) -> string {\n  client GPT4\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIConcurrencyClientEnvBaseUrl(input: string) -> string {\n  client OpenAIConcurrencyTestClientEnvBaseUrl\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIConcurrencyClientHardocodedBaseUrl(input: string) -> string {\n  client OpenAIConcurrencyTestClientHardocodedBaseUrl\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model without max_tokens (should not add default)\nfunction TestOpenAIO1NoMaxTokens(input: string) -> string {\n  client OpenAIO1\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_tokens (should fail)\nfunction TestOpenAIO1WithMaxTokens(input: string) -> string {\n  client OpenAIO1WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_completion_tokens\nfunction TestOpenAIO1WithMaxCompletionTokens(input: string) -> string {\n  client OpenAIO1WithMaxCompletionTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test GPT-4 with explicit max_tokens (should keep user value)\nfunction TestOpenAIWithMaxTokens(input: string) -> string {\n  client GPT4WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test OpenAI with null max_tokens (should not add default)\nfunction TestOpenAIWithNullMaxTokens(input: string) -> string {\n  client OpenAIWithNullMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nclient GPT4oMini {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o-mini\"\n  }\n}\n\nfunction TestOpenAIGPT4oMini(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n    {{ input }}\n  \"#\n}\nfunction TestOpenAIGPT4oMini2(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction TestOpenAIGPT4oMini3(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction OpenAIGPT4oMissingBaseUrlEnvVar(input: string) -> string {\n  client GPT4oBaseUrlNotSet\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\n// Add test cases to verify the behavior\ntest TestOpenAIClients {\n  functions [\n    TestOpenAI,\n    TestOpenAIO1NoMaxTokens,\n    TestOpenAIO1WithMaxTokens,\n    TestOpenAIWithMaxTokens,\n    TestOpenAIO1WithMaxCompletionTokens,\n    TestOpenAIWithNullMaxTokens\n  ]\n  args {\n    input #\"\n    Cherry blossoms\n    \"#\n  }\n}\n\nclient<llm> OpenAIWithNullMaxTokens {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o\"\n    max_tokens null\n  }\n}",
 	"test-files/providers/openrouter.baml":                                             "function TestOpenRouterMistralSmall3_1_24b(input: string) -> string {\n  client OpenRouterMistralSmall3_1_24b\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n \n \ntest TestName {\n  functions [TestOpenRouterMistralSmall3_1_24b]\n  args {\n    input #\"\n      hello world\n    \"#\n  }\n}\n  \n \n\nclient<llm> OpenRouterMistralSmall3_1_24b {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://openrouter.ai/api/v1\"\n    api_key env.OPENROUTER_API_KEY\n    model \"mistralai/mistral-small-3.1-24b-instruct\"\n    temperature 0.1\n    headers {\n      \"HTTP-Referer\" \"https://me.com\" // Optional\n      \"X-Title\" \"me\" // Optional\n    }\n  }\n}",
 	"test-files/providers/strategy.baml":                                               "function TestFallbackStrategy(input: string) -> string {\n  client Resilient_SimpleSyntax\n  prompt #\"\n    {{ _.role('system') }}\n    You are a helpful assistant.\n\n    {{ _.role('user') }}\n    Write a nice short story about {{ input }}\n  \"#\n}\n\nfunction TestRoundRobinStrategy(input: string) -> string {\n  client Lottery_SimpleSyntax\n  prompt #\"\n    {{ _.role('system') }}\n    You are a helpful assistant.\n\n    {{ _.role('user') }}\n    Write a nice short story about {{ input }}\n  \"#\n}\n",
 	"test-files/providers/tests.baml":                                                  "test TestOpenAIShorthand {\n  functions [TestOpenAIShorthand]\n  args {\n    input \"Donkey kong and peanut butter\"\n  }\n}\n\ntest TestAWS {\n  functions [\n    TestAws\n  ]\n  args {\n    input \"Donkey kong and peanut butter\"\n  }\n}\n\ntest TestProvider {\n  functions [\n    TestAnthropic, TestVertex, PromptTestOpenAI, TestAzure, TestOllama, TestGemini, TestGeminiThinking, TestAws,\n    TestAwsInvalidRegion,\n    TestOpenAIShorthand,\n    TestAnthropicShorthand,\n    TestAwsInvalidAccessKey,\n    TestAwsInvalidProfile,\n    TestAwsInvalidSessionToken\n  ]\n  args {\n    input \"Donkey kong and peanut butter\"\n  }\n}\n\ntest TestName {\n  functions [TestCaching]\n  args {\n    input #\"\nIn a near-future society where dreams have become a tradable commodity and shared experience, a lonely and socially awkward teenager named Alex discovers they possess a rare and powerful ability to not only view but also manipulate the dreams of others. Initially thrilled by this newfound power, Alex begins subtly altering the dreams of classmates and family members, helping them overcome fears, boost confidence, or experience fantastical adventures. As Alex's skills grow, so does their influence. They start selling premium dream experiences on the black market, crafting intricate and addictive dreamscapes for wealthy clients. However, the line between dream and reality begins to blur for those exposed to Alex's creations. Some clients struggle to differentiate between their true memories and the artificial ones implanted by Alex's dream manipulation.\n\nComplications arise when a mysterious government agency takes notice of Alex's unique abilities. They offer Alex a chance to use their gift for \"the greater good,\" hinting at applications in therapy, criminal rehabilitation, and even national security. Simultaneously, an underground resistance movement reaches out, warning Alex about the dangers of dream manipulation and the potential for mass control and exploitation. Caught between these opposing forces, Alex must navigate a complex web of ethical dilemmas. They grapple with questions of free will, the nature of consciousness, and the responsibility that comes with having power over people's minds. As the consequences of their actions spiral outward, affecting the lives of loved ones and strangers alike, Alex is forced to confront the true nature of their ability and decide how—or if—it should be used.\n\nThe story explores themes of identity, the subconscious mind, the ethics of technology, and the power of imagination. It delves into the potential consequences of a world where our most private thoughts and experiences are no longer truly our own, and examines the fine line between helping others and manipulating them for personal gain or a perceived greater good. The narrative further expands on the societal implications of such abilities, questioning the moral boundaries of altering consciousness and the potential for abuse in a world where dreams can be commodified. It challenges the reader to consider the impact of technology on personal autonomy and the ethical responsibilities of those who wield such power.\n\nAs Alex's journey unfolds, they encounter various individuals whose lives have been touched by their dream manipulations, each presenting a unique perspective on the ethical quandaries at hand. From a classmate who gains newfound confidence to a wealthy client who becomes addicted to the dreamscapes, the ripple effects of Alex's actions are profound and far-reaching. The government agency's interest in Alex's abilities raises questions about the potential for state control and surveillance, while the resistance movement highlights the dangers of unchecked power and the importance of safeguarding individual freedoms.\n\nUltimately, Alex's story is one of self-discovery and moral reckoning, as they must decide whether to embrace their abilities for personal gain, align with the government's vision of a controlled utopia, or join the resistance in their fight for freedom and autonomy. The narrative invites readers to reflect on the nature of reality, the boundaries of human experience, and the ethical implications of a world where dreams are no longer private sanctuaries but shared and manipulated commodities. It also explores the psychological impact on Alex, who must deal with the burden of knowing the intimate fears and desires of others, and the isolation that comes from being unable to share their own dreams without altering them.\n\nThe story further examines the technological advancements that have made dream manipulation possible, questioning the role of innovation in society and the potential for both progress and peril. It considers the societal divide between those who can afford to buy enhanced dream experiences and those who cannot, highlighting issues of inequality and access. As Alex becomes more entangled in the web of their own making, they must confront the possibility that their actions could lead to unintended consequences, not just for themselves but for the fabric of society as a whole.\n\nIn the end, Alex's journey is a cautionary tale about the power of dreams and the responsibilities that come with wielding such influence. It serves as a reminder of the importance of ethical considerations in the face of technological advancement and the need to balance innovation with humanity. The story leaves readers pondering the true cost of a world where dreams are no longer sacred, and the potential for both wonder and danger in the uncharted territories of the mind. But it's also a story about the power of imagination and the potential for change, even in a world where our deepest thoughts are no longer our own. And it's a story about the power of choice, and the importance of fighting for the freedom to dream.\n\nIn conclusion, this story is a reflection on the power of dreams and the responsibilities that come with wielding such influence. It serves as a reminder of the importance of ethical considerations in the face of technological advancement and the need to balance innovation with humanity. The story leaves readers pondering the true cost of a world where dreams are no longer sacred, and the potential for both wonder and danger in the uncharted territories of the mind. But it's also a story about the power of imagination and the potential for change, even in a world where our deepest thoughts are no longer our own. And it's a story about the power of choice, and the importance of fighting for the freedom to dream.\n    \"#\n    not_cached #\"\n      hello world\n    \"#\n  }\n}",
diff --git a/integ-tests/go/baml_client/functions.go b/integ-tests/go/baml_client/functions.go
index 2d6f3290aa..376a215d06 100644
--- a/integ-tests/go/baml_client/functions.go
+++ b/integ-tests/go/baml_client/functions.go
@@ -11909,6 +11909,138 @@ func TestOpenAI(ctx context.Context, input string, opts ...CallOptionFunc) (stri
 	}
 }
 
+func TestOpenAIConcurrencyClientEnvBaseUrl(ctx context.Context, input string, opts ...CallOptionFunc) (string, error) {
+
+	var callOpts callOption
+	for _, opt := range opts {
+		opt(&callOpts)
+	}
+
+	args := baml.BamlFunctionArguments{
+		Kwargs: map[string]any{"input": input},
+		Env:    getEnvVars(callOpts.env),
+	}
+
+	if callOpts.clientRegistry != nil {
+		args.ClientRegistry = callOpts.clientRegistry
+	}
+
+	if callOpts.collectors != nil {
+		args.Collectors = callOpts.collectors
+	}
+
+	if callOpts.typeBuilder != nil {
+		args.TypeBuilder = callOpts.typeBuilder
+	}
+
+	if callOpts.tags != nil {
+		args.Tags = callOpts.tags
+	}
+
+	encoded, err := args.Encode()
+	if err != nil {
+		panic(err)
+	}
+
+	if callOpts.onTick == nil {
+		result, err := bamlRuntime.CallFunction(ctx, "TestOpenAIConcurrencyClientEnvBaseUrl", encoded, callOpts.onTick)
+		if err != nil {
+			return "", err
+		}
+
+		if result.Error != nil {
+			return "", result.Error
+		}
+
+		casted := (result.Data).(string)
+
+		return casted, nil
+	} else {
+		channel, err := bamlRuntime.CallFunctionStream(ctx, "TestOpenAIConcurrencyClientEnvBaseUrl", encoded, callOpts.onTick)
+		if err != nil {
+			return "", err
+		}
+
+		for result := range channel {
+			if result.Error != nil {
+				return "", result.Error
+			}
+
+			if result.HasData {
+				return result.Data.(string), nil
+			}
+		}
+
+		return "", fmt.Errorf("No data returned from stream")
+	}
+}
+
+func TestOpenAIConcurrencyClientHardocodedBaseUrl(ctx context.Context, input string, opts ...CallOptionFunc) (string, error) {
+
+	var callOpts callOption
+	for _, opt := range opts {
+		opt(&callOpts)
+	}
+
+	args := baml.BamlFunctionArguments{
+		Kwargs: map[string]any{"input": input},
+		Env:    getEnvVars(callOpts.env),
+	}
+
+	if callOpts.clientRegistry != nil {
+		args.ClientRegistry = callOpts.clientRegistry
+	}
+
+	if callOpts.collectors != nil {
+		args.Collectors = callOpts.collectors
+	}
+
+	if callOpts.typeBuilder != nil {
+		args.TypeBuilder = callOpts.typeBuilder
+	}
+
+	if callOpts.tags != nil {
+		args.Tags = callOpts.tags
+	}
+
+	encoded, err := args.Encode()
+	if err != nil {
+		panic(err)
+	}
+
+	if callOpts.onTick == nil {
+		result, err := bamlRuntime.CallFunction(ctx, "TestOpenAIConcurrencyClientHardocodedBaseUrl", encoded, callOpts.onTick)
+		if err != nil {
+			return "", err
+		}
+
+		if result.Error != nil {
+			return "", result.Error
+		}
+
+		casted := (result.Data).(string)
+
+		return casted, nil
+	} else {
+		channel, err := bamlRuntime.CallFunctionStream(ctx, "TestOpenAIConcurrencyClientHardocodedBaseUrl", encoded, callOpts.onTick)
+		if err != nil {
+			return "", err
+		}
+
+		for result := range channel {
+			if result.Error != nil {
+				return "", result.Error
+			}
+
+			if result.HasData {
+				return result.Data.(string), nil
+			}
+		}
+
+		return "", fmt.Errorf("No data returned from stream")
+	}
+}
+
 func TestOpenAIDummyClient(ctx context.Context, input string, opts ...CallOptionFunc) (string, error) {
 
 	var callOpts callOption
diff --git a/integ-tests/go/baml_client/functions_parse.go b/integ-tests/go/baml_client/functions_parse.go
index 60d4d7b78a..f649b3a39d 100644
--- a/integ-tests/go/baml_client/functions_parse.go
+++ b/integ-tests/go/baml_client/functions_parse.go
@@ -8493,6 +8493,100 @@ func (*parse) TestOpenAI(text string, opts ...CallOptionFunc) (string, error) {
 	return casted, nil
 }
 
+// / Parse version of TestOpenAIConcurrencyClientEnvBaseUrl (Takes in string and returns string)
+func (*parse) TestOpenAIConcurrencyClientEnvBaseUrl(text string, opts ...CallOptionFunc) (string, error) {
+
+	var callOpts callOption
+	for _, opt := range opts {
+		opt(&callOpts)
+	}
+
+	args := baml.BamlFunctionArguments{
+		Kwargs: map[string]any{"text": text, "stream": false},
+		Env:    getEnvVars(callOpts.env),
+	}
+
+	if callOpts.clientRegistry != nil {
+		args.ClientRegistry = callOpts.clientRegistry
+	}
+
+	if callOpts.collectors != nil {
+		args.Collectors = callOpts.collectors
+	}
+
+	if callOpts.typeBuilder != nil {
+		args.TypeBuilder = callOpts.typeBuilder
+	}
+
+	if callOpts.tags != nil {
+		args.Tags = callOpts.tags
+	}
+
+	encoded, err := args.Encode()
+	if err != nil {
+		// This should never happen. if it does, please file an issue at https://github.com/boundaryml/baml/issues
+		// and include the type of the args you're passing in.
+		wrapped_err := fmt.Errorf("BAML INTERNAL ERROR: TestOpenAIConcurrencyClientEnvBaseUrl: %w", err)
+		panic(wrapped_err)
+	}
+
+	result, err := bamlRuntime.CallFunctionParse(context.Background(), "TestOpenAIConcurrencyClientEnvBaseUrl", encoded)
+	if err != nil {
+		return "", err
+	}
+
+	casted := (result).(string)
+
+	return casted, nil
+}
+
+// / Parse version of TestOpenAIConcurrencyClientHardocodedBaseUrl (Takes in string and returns string)
+func (*parse) TestOpenAIConcurrencyClientHardocodedBaseUrl(text string, opts ...CallOptionFunc) (string, error) {
+
+	var callOpts callOption
+	for _, opt := range opts {
+		opt(&callOpts)
+	}
+
+	args := baml.BamlFunctionArguments{
+		Kwargs: map[string]any{"text": text, "stream": false},
+		Env:    getEnvVars(callOpts.env),
+	}
+
+	if callOpts.clientRegistry != nil {
+		args.ClientRegistry = callOpts.clientRegistry
+	}
+
+	if callOpts.collectors != nil {
+		args.Collectors = callOpts.collectors
+	}
+
+	if callOpts.typeBuilder != nil {
+		args.TypeBuilder = callOpts.typeBuilder
+	}
+
+	if callOpts.tags != nil {
+		args.Tags = callOpts.tags
+	}
+
+	encoded, err := args.Encode()
+	if err != nil {
+		// This should never happen. if it does, please file an issue at https://github.com/boundaryml/baml/issues
+		// and include the type of the args you're passing in.
+		wrapped_err := fmt.Errorf("BAML INTERNAL ERROR: TestOpenAIConcurrencyClientHardocodedBaseUrl: %w", err)
+		panic(wrapped_err)
+	}
+
+	result, err := bamlRuntime.CallFunctionParse(context.Background(), "TestOpenAIConcurrencyClientHardocodedBaseUrl", encoded)
+	if err != nil {
+		return "", err
+	}
+
+	casted := (result).(string)
+
+	return casted, nil
+}
+
 // / Parse version of TestOpenAIDummyClient (Takes in string and returns string)
 func (*parse) TestOpenAIDummyClient(text string, opts ...CallOptionFunc) (string, error) {
 
diff --git a/integ-tests/go/baml_client/functions_parse_stream.go b/integ-tests/go/baml_client/functions_parse_stream.go
index 20fe30ea65..f2ca985b4f 100644
--- a/integ-tests/go/baml_client/functions_parse_stream.go
+++ b/integ-tests/go/baml_client/functions_parse_stream.go
@@ -8494,6 +8494,100 @@ func (*parse_stream) TestOpenAI(text string, opts ...CallOptionFunc) (string, er
 	return casted, nil
 }
 
+// / Parse version of TestOpenAIConcurrencyClientEnvBaseUrl (Takes in string and returns string)
+func (*parse_stream) TestOpenAIConcurrencyClientEnvBaseUrl(text string, opts ...CallOptionFunc) (string, error) {
+
+	var callOpts callOption
+	for _, opt := range opts {
+		opt(&callOpts)
+	}
+
+	args := baml.BamlFunctionArguments{
+		Kwargs: map[string]any{"text": text, "stream": true},
+		Env:    getEnvVars(callOpts.env),
+	}
+
+	if callOpts.clientRegistry != nil {
+		args.ClientRegistry = callOpts.clientRegistry
+	}
+
+	if callOpts.collectors != nil {
+		args.Collectors = callOpts.collectors
+	}
+
+	if callOpts.typeBuilder != nil {
+		args.TypeBuilder = callOpts.typeBuilder
+	}
+
+	if callOpts.tags != nil {
+		args.Tags = callOpts.tags
+	}
+
+	encoded, err := args.Encode()
+	if err != nil {
+		// This should never happen. if it does, please file an issue at https://github.com/boundaryml/baml/issues
+		// and include the type of the args you're passing in.
+		wrapped_err := fmt.Errorf("BAML INTERNAL ERROR: TestOpenAIConcurrencyClientEnvBaseUrl: %w", err)
+		panic(wrapped_err)
+	}
+
+	result, err := bamlRuntime.CallFunctionParse(context.Background(), "TestOpenAIConcurrencyClientEnvBaseUrl", encoded)
+	if err != nil {
+		return "", err
+	}
+
+	casted := (result).(string)
+
+	return casted, nil
+}
+
+// / Parse version of TestOpenAIConcurrencyClientHardocodedBaseUrl (Takes in string and returns string)
+func (*parse_stream) TestOpenAIConcurrencyClientHardocodedBaseUrl(text string, opts ...CallOptionFunc) (string, error) {
+
+	var callOpts callOption
+	for _, opt := range opts {
+		opt(&callOpts)
+	}
+
+	args := baml.BamlFunctionArguments{
+		Kwargs: map[string]any{"text": text, "stream": true},
+		Env:    getEnvVars(callOpts.env),
+	}
+
+	if callOpts.clientRegistry != nil {
+		args.ClientRegistry = callOpts.clientRegistry
+	}
+
+	if callOpts.collectors != nil {
+		args.Collectors = callOpts.collectors
+	}
+
+	if callOpts.typeBuilder != nil {
+		args.TypeBuilder = callOpts.typeBuilder
+	}
+
+	if callOpts.tags != nil {
+		args.Tags = callOpts.tags
+	}
+
+	encoded, err := args.Encode()
+	if err != nil {
+		// This should never happen. if it does, please file an issue at https://github.com/boundaryml/baml/issues
+		// and include the type of the args you're passing in.
+		wrapped_err := fmt.Errorf("BAML INTERNAL ERROR: TestOpenAIConcurrencyClientHardocodedBaseUrl: %w", err)
+		panic(wrapped_err)
+	}
+
+	result, err := bamlRuntime.CallFunctionParse(context.Background(), "TestOpenAIConcurrencyClientHardocodedBaseUrl", encoded)
+	if err != nil {
+		return "", err
+	}
+
+	casted := (result).(string)
+
+	return casted, nil
+}
+
 // / Parse version of TestOpenAIDummyClient (Takes in string and returns string)
 func (*parse_stream) TestOpenAIDummyClient(text string, opts ...CallOptionFunc) (string, error) {
 
diff --git a/integ-tests/go/baml_client/functions_stream.go b/integ-tests/go/baml_client/functions_stream.go
index 8e4b34c6d0..fa69902ed6 100644
--- a/integ-tests/go/baml_client/functions_stream.go
+++ b/integ-tests/go/baml_client/functions_stream.go
@@ -13378,6 +13378,154 @@ func (*stream) TestOpenAI(ctx context.Context, input string, opts ...CallOptionF
 	return channel, nil
 }
 
+// / Streaming version of TestOpenAIConcurrencyClientEnvBaseUrl
+func (*stream) TestOpenAIConcurrencyClientEnvBaseUrl(ctx context.Context, input string, opts ...CallOptionFunc) (<-chan StreamValue[string, string], error) {
+
+	var callOpts callOption
+	for _, opt := range opts {
+		opt(&callOpts)
+	}
+
+	args := baml.BamlFunctionArguments{
+		Kwargs: map[string]any{"input": input},
+		Env:    getEnvVars(callOpts.env),
+	}
+
+	if callOpts.clientRegistry != nil {
+		args.ClientRegistry = callOpts.clientRegistry
+	}
+
+	if callOpts.collectors != nil {
+		args.Collectors = callOpts.collectors
+	}
+
+	if callOpts.typeBuilder != nil {
+		args.TypeBuilder = callOpts.typeBuilder
+	}
+
+	if callOpts.tags != nil {
+		args.Tags = callOpts.tags
+	}
+
+	encoded, err := args.Encode()
+	if err != nil {
+		// This should never happen. if it does, please file an issue at https://github.com/boundaryml/baml/issues
+		// and include the type of the args you're passing in.
+		wrapped_err := fmt.Errorf("BAML INTERNAL ERROR: TestOpenAIConcurrencyClientEnvBaseUrl: %w", err)
+		panic(wrapped_err)
+	}
+
+	internal_channel, err := bamlRuntime.CallFunctionStream(ctx, "TestOpenAIConcurrencyClientEnvBaseUrl", encoded, callOpts.onTick)
+	if err != nil {
+		return nil, err
+	}
+
+	channel := make(chan StreamValue[string, string])
+	go func() {
+		for result := range internal_channel {
+			if result.Error != nil {
+				channel <- StreamValue[string, string]{
+					IsError: true,
+					Error:   result.Error,
+				}
+				close(channel)
+				return
+			}
+			if result.HasData {
+				data := (result.Data).(string)
+				channel <- StreamValue[string, string]{
+					IsFinal:  true,
+					as_final: &data,
+				}
+			} else {
+				data := (result.StreamData).(string)
+				channel <- StreamValue[string, string]{
+					IsFinal:   false,
+					as_stream: &data,
+				}
+			}
+		}
+
+		// when internal_channel is closed, close the output too
+		close(channel)
+	}()
+	return channel, nil
+}
+
+// / Streaming version of TestOpenAIConcurrencyClientHardocodedBaseUrl
+func (*stream) TestOpenAIConcurrencyClientHardocodedBaseUrl(ctx context.Context, input string, opts ...CallOptionFunc) (<-chan StreamValue[string, string], error) {
+
+	var callOpts callOption
+	for _, opt := range opts {
+		opt(&callOpts)
+	}
+
+	args := baml.BamlFunctionArguments{
+		Kwargs: map[string]any{"input": input},
+		Env:    getEnvVars(callOpts.env),
+	}
+
+	if callOpts.clientRegistry != nil {
+		args.ClientRegistry = callOpts.clientRegistry
+	}
+
+	if callOpts.collectors != nil {
+		args.Collectors = callOpts.collectors
+	}
+
+	if callOpts.typeBuilder != nil {
+		args.TypeBuilder = callOpts.typeBuilder
+	}
+
+	if callOpts.tags != nil {
+		args.Tags = callOpts.tags
+	}
+
+	encoded, err := args.Encode()
+	if err != nil {
+		// This should never happen. if it does, please file an issue at https://github.com/boundaryml/baml/issues
+		// and include the type of the args you're passing in.
+		wrapped_err := fmt.Errorf("BAML INTERNAL ERROR: TestOpenAIConcurrencyClientHardocodedBaseUrl: %w", err)
+		panic(wrapped_err)
+	}
+
+	internal_channel, err := bamlRuntime.CallFunctionStream(ctx, "TestOpenAIConcurrencyClientHardocodedBaseUrl", encoded, callOpts.onTick)
+	if err != nil {
+		return nil, err
+	}
+
+	channel := make(chan StreamValue[string, string])
+	go func() {
+		for result := range internal_channel {
+			if result.Error != nil {
+				channel <- StreamValue[string, string]{
+					IsError: true,
+					Error:   result.Error,
+				}
+				close(channel)
+				return
+			}
+			if result.HasData {
+				data := (result.Data).(string)
+				channel <- StreamValue[string, string]{
+					IsFinal:  true,
+					as_final: &data,
+				}
+			} else {
+				data := (result.StreamData).(string)
+				channel <- StreamValue[string, string]{
+					IsFinal:   false,
+					as_stream: &data,
+				}
+			}
+		}
+
+		// when internal_channel is closed, close the output too
+		close(channel)
+	}()
+	return channel, nil
+}
+
 // / Streaming version of TestOpenAIDummyClient
 func (*stream) TestOpenAIDummyClient(ctx context.Context, input string, opts ...CallOptionFunc) (<-chan StreamValue[string, string], error) {
 
diff --git a/integ-tests/openapi/baml_client/openapi.yaml b/integ-tests/openapi/baml_client/openapi.yaml
index 731dc8b703..45e47a30ac 100644
--- a/integ-tests/openapi/baml_client/openapi.yaml
+++ b/integ-tests/openapi/baml_client/openapi.yaml
@@ -2548,6 +2548,32 @@ paths:
                 title: TestOpenAIResponse
                 type: string
       operationId: TestOpenAI
+  /call/TestOpenAIConcurrencyClientEnvBaseUrl:
+    post:
+      requestBody:
+        $ref: '#/components/requestBodies/TestOpenAIConcurrencyClientEnvBaseUrl'
+      responses:
+        '200':
+          description: Successful operation
+          content:
+            application/json:
+              schema:
+                title: TestOpenAIConcurrencyClientEnvBaseUrlResponse
+                type: string
+      operationId: TestOpenAIConcurrencyClientEnvBaseUrl
+  /call/TestOpenAIConcurrencyClientHardocodedBaseUrl:
+    post:
+      requestBody:
+        $ref: '#/components/requestBodies/TestOpenAIConcurrencyClientHardocodedBaseUrl'
+      responses:
+        '200':
+          description: Successful operation
+          content:
+            application/json:
+              schema:
+                title: TestOpenAIConcurrencyClientHardocodedBaseUrlResponse
+                type: string
+      operationId: TestOpenAIConcurrencyClientHardocodedBaseUrl
   /call/TestOpenAIDummyClient:
     post:
       requestBody:
@@ -6168,6 +6194,38 @@ components:
             required:
             - input
             additionalProperties: false
+    TestOpenAIConcurrencyClientEnvBaseUrl:
+      required: true
+      content:
+        application/json:
+          schema:
+            title: TestOpenAIConcurrencyClientEnvBaseUrlRequest
+            type: object
+            properties:
+              input:
+                type: string
+              __baml_options__:
+                nullable: true
+                $ref: '#/components/schemas/BamlOptions'
+            required:
+            - input
+            additionalProperties: false
+    TestOpenAIConcurrencyClientHardocodedBaseUrl:
+      required: true
+      content:
+        application/json:
+          schema:
+            title: TestOpenAIConcurrencyClientHardocodedBaseUrlRequest
+            type: object
+            properties:
+              input:
+                type: string
+              __baml_options__:
+                nullable: true
+                $ref: '#/components/schemas/BamlOptions'
+            required:
+            - input
+            additionalProperties: false
     TestOpenAIDummyClient:
       required: true
       content:
diff --git a/integ-tests/python-v1/baml_client/async_client.py b/integ-tests/python-v1/baml_client/async_client.py
index 7a191bb8c8..6743f49ab8 100644
--- a/integ-tests/python-v1/baml_client/async_client.py
+++ b/integ-tests/python-v1/baml_client/async_client.py
@@ -2779,6 +2779,36 @@ async def TestOpenAI(self, input: str,
                 "input": input,
             })
             return typing.cast(str, result.cast_to(types, types, stream_types, False, __runtime__))
+    async def TestOpenAIConcurrencyClientEnvBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> str:
+        # Check if on_tick is provided
+        if 'on_tick' in baml_options:
+            # Use streaming internally when on_tick is provided
+            stream = self.stream.TestOpenAIConcurrencyClientEnvBaseUrl(input=input,
+                baml_options=baml_options)
+            return await stream.get_final_response()
+        else:
+            # Original non-streaming code
+            result = await self.__options.merge_options(baml_options).call_function_async(function_name="TestOpenAIConcurrencyClientEnvBaseUrl", args={
+                "input": input,
+            })
+            return typing.cast(str, result.cast_to(types, types, stream_types, False, __runtime__))
+    async def TestOpenAIConcurrencyClientHardocodedBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> str:
+        # Check if on_tick is provided
+        if 'on_tick' in baml_options:
+            # Use streaming internally when on_tick is provided
+            stream = self.stream.TestOpenAIConcurrencyClientHardocodedBaseUrl(input=input,
+                baml_options=baml_options)
+            return await stream.get_final_response()
+        else:
+            # Original non-streaming code
+            result = await self.__options.merge_options(baml_options).call_function_async(function_name="TestOpenAIConcurrencyClientHardocodedBaseUrl", args={
+                "input": input,
+            })
+            return typing.cast(str, result.cast_to(types, types, stream_types, False, __runtime__))
     async def TestOpenAIDummyClient(self, input: str,
         baml_options: BamlCallOptions = {},
     ) -> str:
@@ -6088,6 +6118,30 @@ def TestOpenAI(self, input: str,
           lambda x: typing.cast(str, x.cast_to(types, types, stream_types, False, __runtime__)),
           ctx,
         )
+    def TestOpenAIConcurrencyClientEnvBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.BamlStream[str, str]:
+        ctx, result = self.__options.merge_options(baml_options).create_async_stream(function_name="TestOpenAIConcurrencyClientEnvBaseUrl", args={
+            "input": input,
+        })
+        return baml_py.BamlStream[str, str](
+          result,
+          lambda x: typing.cast(str, x.cast_to(types, types, stream_types, True, __runtime__)),
+          lambda x: typing.cast(str, x.cast_to(types, types, stream_types, False, __runtime__)),
+          ctx,
+        )
+    def TestOpenAIConcurrencyClientHardocodedBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.BamlStream[str, str]:
+        ctx, result = self.__options.merge_options(baml_options).create_async_stream(function_name="TestOpenAIConcurrencyClientHardocodedBaseUrl", args={
+            "input": input,
+        })
+        return baml_py.BamlStream[str, str](
+          result,
+          lambda x: typing.cast(str, x.cast_to(types, types, stream_types, True, __runtime__)),
+          lambda x: typing.cast(str, x.cast_to(types, types, stream_types, False, __runtime__)),
+          ctx,
+        )
     def TestOpenAIDummyClient(self, input: str,
         baml_options: BamlCallOptions = {},
     ) -> baml_py.BamlStream[str, str]:
@@ -8268,6 +8322,20 @@ async def TestOpenAI(self, input: str,
             "input": input,
         }, mode="request")
         return result
+    async def TestOpenAIConcurrencyClientEnvBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.baml_py.HTTPRequest:
+        result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="TestOpenAIConcurrencyClientEnvBaseUrl", args={
+            "input": input,
+        }, mode="request")
+        return result
+    async def TestOpenAIConcurrencyClientHardocodedBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.baml_py.HTTPRequest:
+        result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="TestOpenAIConcurrencyClientHardocodedBaseUrl", args={
+            "input": input,
+        }, mode="request")
+        return result
     async def TestOpenAIDummyClient(self, input: str,
         baml_options: BamlCallOptions = {},
     ) -> baml_py.baml_py.HTTPRequest:
@@ -10068,6 +10136,20 @@ async def TestOpenAI(self, input: str,
             "input": input,
         }, mode="stream")
         return result
+    async def TestOpenAIConcurrencyClientEnvBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.baml_py.HTTPRequest:
+        result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="TestOpenAIConcurrencyClientEnvBaseUrl", args={
+            "input": input,
+        }, mode="stream")
+        return result
+    async def TestOpenAIConcurrencyClientHardocodedBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.baml_py.HTTPRequest:
+        result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="TestOpenAIConcurrencyClientHardocodedBaseUrl", args={
+            "input": input,
+        }, mode="stream")
+        return result
     async def TestOpenAIDummyClient(self, input: str,
         baml_options: BamlCallOptions = {},
     ) -> baml_py.baml_py.HTTPRequest:
diff --git a/integ-tests/python-v1/baml_client/inlinedbaml.py b/integ-tests/python-v1/baml_client/inlinedbaml.py
index a5b492ab9f..3bc1f344be 100644
--- a/integ-tests/python-v1/baml_client/inlinedbaml.py
+++ b/integ-tests/python-v1/baml_client/inlinedbaml.py
@@ -12,7 +12,7 @@
 
 _file_map = {
 
-    "clients.baml": "retry_policy Bar {\n  max_retries 3\n  strategy {\n    type exponential_backoff\n  }\n}\n\nretry_policy Foo {\n  max_retries 3\n  strategy {\n    type constant_delay\n    delay_ms 100\n  }\n}\n\nclient<llm> GPT4 {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> GPT4o {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// For integ tests\nclient<llm> GPT4oBaseUrlNotSet {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n    base_url env.OPEN_API_BASE_DO_NOT_SET_THIS\n  }\n}\n\n\nclient<llm> GPT4Turbo {\n  retry_policy Bar\n  provider openai\n  options {\n    model gpt-4-turbo\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nretry_policy GPT4oRetry {\n  max_retries 2\n  strategy {\n    type exponential_backoff\n  }\n}\n\nclient<llm> GPT35 {\n  provider openai\n  retry_policy GPT4oRetry\n  options {\n    model \"gpt-4o-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> GPT35LegacyProvider {\n  provider openai\n  options {\n    model \"gpt-3.5-turbo\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> Ollama {\n  provider ollama\n  options {\n    model llama3.1\n  }\n}\n\nclient<llm> GPT35Azure {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    // base_url \"https://west-us-azure-baml.openai.azure.com/openai/deployments/gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\n// Azure O1 client without max_tokens (should not add default)\nclient<llm> AzureO1 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O1 client with explicit max_tokens (should keep user value)\nclient<llm> AzureO1WithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> AzureO1WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// Azure GPT-35 client with explicit max_tokens (should keep user value)\nclient<llm> GPT35AzureWithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure client with invalid resource name (for testing failures)\nclient<llm> GPT35AzureFailed {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml-incorrect-suffix\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\nclient<llm> Gemini {\n  provider google-ai\n  options {\n    model gemini-2.5-flash\n    api_key env.GOOGLE_API_KEY\n    safetySettings {\n      category HARM_CATEGORY_HATE_SPEECH\n      threshold BLOCK_LOW_AND_ABOVE\n    }\n  }\n}\n\nclient<llm> Gemini25ProThinking {\n  provider google-ai\n  retry_policy Constant\n  options {\n    model \"gemini-2.5-pro\"\n    api_key env.GOOGLE_API_KEY\n    generationConfig {\n      thinkingConfig {\n          thinkingBudget 1024\n          includeThoughts true\n      }\n    }\n  }\n}\n\nclient<llm> GeminiOpenAiGeneric {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://generativelanguage.googleapis.com/v1beta/\"\n    model \"gemini-2.5-flash\"\n    api_key env.GOOGLE_API_KEY\n  }\n}\n\nclient<llm> Vertex {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> VertexWithQueryParams {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    project_id gloo-ai\n    query_params {\n      key env.VERTEX_API_KEY\n    }\n  }\n}\n\nclient<llm> VertexClaude {\n  provider vertex-ai\n  options {\n    model \"claude-3-5-sonnet@20240620\"\n    location us-east5\n    anthropic_version \"vertex-2023-10-16\"\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> AwsBedrock {\n  provider aws-bedrock\n  options {\n    inference_configuration {\n      max_tokens 2048\n    }\n    // max_tokens 100000\n    // max_completion_tokens 100000\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    //model \"arn:aws:bedrock:us-east-1:404337120808:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-east-1.amazonaws.com/\"\n    // region \"us-east-1\"\n    // access_key_id env.AWS_ACCESS_KEY_ID\n    // secret_access_key env.AWS_SECRET_ACCESS_KEY\n    // session_token env.AWS_SESSION_TOKEN\n    // session_token null\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidRegion {\n  provider aws-bedrock\n  options {\n    region \"us-invalid-7\"\n    inference_configuration {\n      max_tokens 100\n    }\n    // model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidEndpoint {\n  provider aws-bedrock\n  options {\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-doenotexist-3.amazonaws.com\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidAccessKey {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidProfile {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    profile \"boundaryml-dev-invalid\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidSessionToken {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\n\nclient<llm> Invalid{\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> Sonnet {\n  provider anthropic\n  options {\n    model claude-3-5-sonnet-20241022\n    api_key env.ANTHROPIC_API_KEY\n  }\n}\n\n\nclient<llm> SonnetThinking {\n  provider anthropic\n  options {\n    model \"claude-3-7-sonnet-20250219\"\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 2048\n    thinking {\n      type \"enabled\"\n      budget_tokens 1024\n    }\n  }\n}\n\nclient<llm> Claude {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> ClaudeWithCaching {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 500\n    allowed_role_metadata [\"cache_control\"]\n    headers {\n      \"anthropic-beta\" \"prompt-caching-2024-07-31\"\n    }\n  }\n}\n\nclient<llm> Resilient_SimpleSyntax {\n  retry_policy Foo\n  provider baml-fallback\n  options {\n    strategy [\n      GPT4Turbo\n      GPT35\n      Lottery_SimpleSyntax\n    ]\n  }\n}\n\nclient<llm> Lottery_SimpleSyntax {\n  provider baml-round-robin\n  options {\n    start 0\n    strategy [\n      Claude\n      GPT35\n    ]\n  }\n}\n\nclient<llm> TogetherAi {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://api.together.ai/v1\"\n    api_key env.TOGETHER_API_KEY\n    model \"meta-llama/Llama-3-70b-chat-hf\"\n  }\n}\n\n// OpenAI O1 client without max_tokens (should not add default)\nclient<llm> OpenAIO1 {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// OpenAI O1 client with explicit max_tokens (should fail)\nclient<llm> OpenAIO1WithMaxTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// OpenAI O1 client with explicit max_completion_tokens\nclient<llm> OpenAIO1WithMaxCompletionTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// OpenAI GPT-4 client with explicit max_tokens\nclient<llm> GPT4WithMaxTokens {\n  provider openai\n  options {\n    model \"gpt-4\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure O3 client without max_tokens (should not add default)\nclient<llm> AzureO3 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O3 client with explicit max_completion_tokens\nclient<llm> AzureO3WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n",
+    "clients.baml": "retry_policy Bar {\n  max_retries 3\n  strategy {\n    type exponential_backoff\n  }\n}\n\nretry_policy Foo {\n  max_retries 3\n  strategy {\n    type constant_delay\n    delay_ms 100\n  }\n}\n\nclient<llm> GPT4 {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> GPT4o {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// For integ tests\nclient<llm> GPT4oBaseUrlNotSet {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n    base_url env.OPEN_API_BASE_DO_NOT_SET_THIS\n  }\n}\n\n\nclient<llm> GPT4Turbo {\n  retry_policy Bar\n  provider openai\n  options {\n    model gpt-4-turbo\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nretry_policy GPT4oRetry {\n  max_retries 2\n  strategy {\n    type exponential_backoff\n  }\n}\n\nclient<llm> GPT35 {\n  provider openai\n  retry_policy GPT4oRetry\n  options {\n    model \"gpt-4o-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> GPT35LegacyProvider {\n  provider openai\n  options {\n    model \"gpt-3.5-turbo\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> OpenAIConcurrencyTestClientEnvBaseUrl {\n  provider openai-generic\n  options {\n    base_url env.OPENAI_CONCURRENCY_TEST_BASE_URL\n    model \"concurrency-test\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> OpenAIConcurrencyTestClientHardocodedBaseUrl {\n  provider openai-generic\n  options {\n    base_url \"http://127.0.0.1:9876/v1/\"\n    model \"concurrency-test\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> Ollama {\n  provider ollama\n  options {\n    model llama3.1\n  }\n}\n\nclient<llm> GPT35Azure {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    // base_url \"https://west-us-azure-baml.openai.azure.com/openai/deployments/gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\n// Azure O1 client without max_tokens (should not add default)\nclient<llm> AzureO1 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O1 client with explicit max_tokens (should keep user value)\nclient<llm> AzureO1WithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> AzureO1WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// Azure GPT-35 client with explicit max_tokens (should keep user value)\nclient<llm> GPT35AzureWithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure client with invalid resource name (for testing failures)\nclient<llm> GPT35AzureFailed {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml-incorrect-suffix\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\nclient<llm> Gemini {\n  provider google-ai\n  options {\n    model gemini-2.5-flash\n    api_key env.GOOGLE_API_KEY\n    safetySettings {\n      category HARM_CATEGORY_HATE_SPEECH\n      threshold BLOCK_LOW_AND_ABOVE\n    }\n  }\n}\n\nclient<llm> Gemini25ProThinking {\n  provider google-ai\n  retry_policy Constant\n  options {\n    model \"gemini-2.5-pro\"\n    api_key env.GOOGLE_API_KEY\n    generationConfig {\n      thinkingConfig {\n          thinkingBudget 1024\n          includeThoughts true\n      }\n    }\n  }\n}\n\nclient<llm> GeminiOpenAiGeneric {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://generativelanguage.googleapis.com/v1beta/\"\n    model \"gemini-2.5-flash\"\n    api_key env.GOOGLE_API_KEY\n  }\n}\n\nclient<llm> Vertex {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> VertexWithQueryParams {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    project_id gloo-ai\n    query_params {\n      key env.VERTEX_API_KEY\n    }\n  }\n}\n\nclient<llm> VertexClaude {\n  provider vertex-ai\n  options {\n    model \"claude-3-5-sonnet@20240620\"\n    location us-east5\n    anthropic_version \"vertex-2023-10-16\"\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> AwsBedrock {\n  provider aws-bedrock\n  options {\n    inference_configuration {\n      max_tokens 2048\n    }\n    // max_tokens 100000\n    // max_completion_tokens 100000\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    //model \"arn:aws:bedrock:us-east-1:404337120808:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-east-1.amazonaws.com/\"\n    // region \"us-east-1\"\n    // access_key_id env.AWS_ACCESS_KEY_ID\n    // secret_access_key env.AWS_SECRET_ACCESS_KEY\n    // session_token env.AWS_SESSION_TOKEN\n    // session_token null\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidRegion {\n  provider aws-bedrock\n  options {\n    region \"us-invalid-7\"\n    inference_configuration {\n      max_tokens 100\n    }\n    // model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidEndpoint {\n  provider aws-bedrock\n  options {\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-doenotexist-3.amazonaws.com\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidAccessKey {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidProfile {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    profile \"boundaryml-dev-invalid\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidSessionToken {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\n\nclient<llm> Invalid{\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> Sonnet {\n  provider anthropic\n  options {\n    model claude-3-5-sonnet-20241022\n    api_key env.ANTHROPIC_API_KEY\n  }\n}\n\n\nclient<llm> SonnetThinking {\n  provider anthropic\n  options {\n    model \"claude-3-7-sonnet-20250219\"\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 2048\n    thinking {\n      type \"enabled\"\n      budget_tokens 1024\n    }\n  }\n}\n\nclient<llm> Claude {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> ClaudeWithCaching {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 500\n    allowed_role_metadata [\"cache_control\"]\n    headers {\n      \"anthropic-beta\" \"prompt-caching-2024-07-31\"\n    }\n  }\n}\n\nclient<llm> Resilient_SimpleSyntax {\n  retry_policy Foo\n  provider baml-fallback\n  options {\n    strategy [\n      GPT4Turbo\n      GPT35\n      Lottery_SimpleSyntax\n    ]\n  }\n}\n\nclient<llm> Lottery_SimpleSyntax {\n  provider baml-round-robin\n  options {\n    start 0\n    strategy [\n      Claude\n      GPT35\n    ]\n  }\n}\n\nclient<llm> TogetherAi {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://api.together.ai/v1\"\n    api_key env.TOGETHER_API_KEY\n    model \"meta-llama/Llama-3-70b-chat-hf\"\n  }\n}\n\n// OpenAI O1 client without max_tokens (should not add default)\nclient<llm> OpenAIO1 {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// OpenAI O1 client with explicit max_tokens (should fail)\nclient<llm> OpenAIO1WithMaxTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// OpenAI O1 client with explicit max_completion_tokens\nclient<llm> OpenAIO1WithMaxCompletionTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// OpenAI GPT-4 client with explicit max_tokens\nclient<llm> GPT4WithMaxTokens {\n  provider openai\n  options {\n    model \"gpt-4\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure O3 client without max_tokens (should not add default)\nclient<llm> AzureO3 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O3 client with explicit max_completion_tokens\nclient<llm> AzureO3WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n",
     "custom-task.baml": "class BookOrder {\n  orderId string @description(#\"\n    The ID of the book order\n  \"#)\n  title string @description(#\"\n    The title of the ordered book\n  \"#)\n  quantity int @description(#\"\n    The quantity of books ordered\n  \"#)\n  price float @description(#\"\n    The price of the book\n  \"#)\n}\n\nclass FlightConfirmation {\n  confirmationNumber string @description(#\"\n    The flight confirmation number\n  \"#)\n  flightNumber string @description(#\"\n    The flight number\n  \"#)\n  departureTime string @description(#\"\n    The scheduled departure time of the flight\n  \"#)\n  arrivalTime string @description(#\"\n    The scheduled arrival time of the flight\n  \"#)\n  seatNumber string @description(#\"\n    The seat number assigned on the flight\n  \"#)\n}\n\nclass GroceryReceipt {\n  receiptId string @description(#\"\n    The ID of the grocery receipt\n  \"#)\n  storeName string @description(#\"\n    The name of the grocery store\n  \"#)\n  items (string | int | float)[] @description(#\"\n    A list of items purchased. Each item consists of a name, quantity, and price.\n  \"#)\n  totalAmount float @description(#\"\n    The total amount spent on groceries\n  \"#)\n}\n \nclass CustomTaskResult {\n  bookOrder BookOrder | null\n  flightConfirmation FlightConfirmation | null\n  groceryReceipt GroceryReceipt | null\n}\n\nfunction CustomTask(input: string) -> BookOrder | FlightConfirmation | GroceryReceipt {\n  client \"openai/gpt-4o-mini\"\n  prompt #\"\n    Given the input string, extract either an order for a book, a flight confirmation, or a grocery receipt.\n\n    {{ ctx.output_format }}\n\n    Input:\n    \n    {{ input}}\n  \"#\n}\n\ntest CustomTask {\n  functions [CustomTask]\n  args {\n    input #\"\nDear [Your Name],\n\nThank you for booking with [Airline Name]! We are pleased to confirm your upcoming flight.\n\nFlight Confirmation Details:\n\nBooking Reference: ABC123\nPassenger Name: [Your Name]\nFlight Number: XY789\nDeparture Date: September 15, 2024\nDeparture Time: 10:30 AM\nArrival Time: 1:45 PM\nDeparture Airport: John F. Kennedy International Airport (JFK), New York, NY\nArrival Airport: Los Angeles International Airport (LAX), Los Angeles, CA\nSeat Number: 12A\nClass: Economy\nBaggage Allowance:\n\nChecked Baggage: 1 piece, up to 23 kg\nCarry-On Baggage: 1 piece, up to 7 kg\nImportant Information:\n\nPlease arrive at the airport at least 2 hours before your scheduled departure.\nCheck-in online via our website or mobile app to save time at the airport.\nEnsure that your identification documents are up to date and match the name on your booking.\nContact Us:\n\nIf you have any questions or need to make changes to your booking, please contact our customer service team at 1-800-123-4567 or email us at support@[airline].com.\n\nWe wish you a pleasant journey and thank you for choosing [Airline Name].\n\nBest regards,\n\n[Airline Name] Customer Service\n    \"#\n  }\n}",
     "fiddle-examples/audio/audio.baml": "function DescribeAudio(audio: audio) -> string {\n  client GPT4o\n  prompt #\"\n    Describe the audio below in 20 words:\n    {{ _.role(\"user\") }}\n    {{ audio }}\n  \"#\n\n}\n\n\n\n\n// chat role user present\nfunction DescribeAudio2(audio: audio) -> string {\n  client GPT4Turbo\n  prompt #\"\n    {{ _.role(\"user\") }}\n    You should return 1 answer that answer the following command.\n\n    Describe this in 5 words:\n    {{ audio }}\n  \"#\n}\n\ntest TestAudio {\n    functions [DescribeAudio]\n  args {\n    audio { url \"https://www.pacdv.com/sounds/voices/friday-rocks.wav\"}\n  }\n}\n\ntest TestAudio2 {\n  functions [DescribeAudio2]\n  args {\n    audio { file \"friday-rocks.wav\" }\n      }\n}\n",
     "fiddle-examples/chain-of-thought.baml": "class Email {\n    subject string\n    body string\n    from_address string\n}\n\nenum OrderStatus {\n    ORDERED\n    SHIPPED\n    DELIVERED\n    CANCELLED\n}\n\nclass OrderInfo {\n    order_status OrderStatus\n    tracking_number string?\n    estimated_arrival_date string?\n}\n\nfunction GetOrderInfo(email: Email) -> OrderInfo {\n  client GPT4\n  prompt #\"\n    Given the email below:\n\n    ```\n    from: {{email.from_address}}\n    Email Subject: {{email.subject}}\n    Email Body: {{email.body}}\n    ```\n\n    Extract this info from the email in JSON format:\n    {{ ctx.output_format }}\n\n    Before you output the JSON, please explain your\n    reasoning step-by-step. Here is an example on how to do this:\n    'If we think step by step we can see that ...\n     therefore the output JSON is:\n    {\n      ... the json schema ...\n    }'\n  \"#\n}",
@@ -112,7 +112,7 @@
     "test-files/providers/openai-responses-validation.baml": "// OpenAI Responses Provider Validation Tests\n// These tests validate that the openai-responses provider is properly configured\n\n// Test 1: Basic provider recognition\n// This should parse successfully once openai-responses is available\nclient<llm> ValidateOpenAIResponses {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n  }\n}\n\n// Test 2: Valid client_response_type values for openai-responses\nclient<llm> ValidateResponseTypeOpenAI {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    client_response_type \"openai\"\n  }\n}\n\nclient<llm> ValidateResponseTypeOpenAIResponses {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    client_response_type \"openai-responses\"\n  }\n}\n\nclient<llm> ValidateResponseTypeAnthropic {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    client_response_type \"anthropic\"\n  }\n}\n\n// Test 3: Provider should be in allowed list\n// This will validate that \"openai-responses\" is included in ClientProvider::allowed_providers()\n\n// Test 4: Default base URL should be correct\nclient<llm> ValidateDefaultBaseURL {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    // Should default to https://api.openai.com/v1\n  }\n}\n\n// Test 5: Custom base URL should work\nclient<llm> ValidateCustomBaseURL {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    base_url \"https://custom.openai.com/v1\"\n  }\n}\n\n// Simple test functions to validate the clients work\nfunction ValidateBasicResponses(input: string) -> string {\n  client ValidateOpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Say \"success\" if you can read this: {{ input }}\n  \"#\n}\n\nfunction ValidateResponseTypes(input: string) -> string {\n  client ValidateResponseTypeOpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Respond with \"response-type-works\" for: {{ input }}\n  \"#\n}\n\n// Validation test suite\ntest ValidateOpenAIResponsesProvider {\n  functions [\n    ValidateBasicResponses,\n    ValidateResponseTypes\n  ]\n  args {\n    input \"test\"\n  }\n}",
     "test-files/providers/openai-responses.baml": "// OpenAI Responses API Provider Tests\n// Tests the new openai-responses provider that uses the OpenAI Responses API\n\n// Basic OpenAI Responses client\nclient<llm> OpenAIResponses {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n  }\n}\n\n// OpenAI Responses client with explicit response type\nclient<llm> OpenAIResponsesExplicit {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    client_response_type \"openai-responses\"\n  }\n}\n\n// OpenAI Responses client with custom base URL (for testing)\nclient<llm> OpenAIResponsesCustomURL {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    base_url \"https://api.openai.com/v1\"\n  }\n}\n\n// Test basic functionality with responses API\nfunction TestOpenAIResponses(input: string) -> string {\n  client OpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a short haiku about {{ input }}. Make it simple and beautiful.\n  \"#\n}\n\n// Test with explicit response type configuration\nfunction TestOpenAIResponsesExplicit(input: string) -> string {\n  client OpenAIResponsesExplicit\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Create a brief poem about {{ input }}. Keep it under 50 words.\n  \"#\n}\n\n// Test with custom base URL\nfunction TestOpenAIResponsesCustomURL(input: string) -> string {\n  client OpenAIResponsesCustomURL\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Tell me an interesting fact about {{ input }}.\n  \"#\n}\n\n// Test with multi-turn conversation\nfunction TestOpenAIResponsesConversation(topic: string) -> string {\n  client OpenAIResponses\n  prompt #\"\n    {{ _.role(\"system\") }}\n    You are a helpful assistant that provides concise answers.\n    \n    {{ _.role(\"user\") }}\n    What is {{ topic }}?\n    \n    {{ _.role(\"assistant\") }}\n    {{ topic }} is a fascinating subject. Let me explain briefly.\n    \n    {{ _.role(\"user\") }}\n    Can you give me a simple example?\n  \"#\n}\n\n// Test with different model parameter\nclient<llm> OpenAIResponsesGPT4 {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4\"\n  }\n}\n\nfunction TestOpenAIResponsesDifferentModel(input: string) -> string {\n  client OpenAIResponsesGPT4\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Explain {{ input }} in one sentence.\n  \"#\n}\n\n// Test error handling with invalid configuration\nclient<llm> OpenAIResponsesInvalidResponseType {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    // This should work since openai response type is valid for responses provider\n    client_response_type \"openai\"\n  }\n}\n\nfunction TestOpenAIResponsesWithOpenAIResponseType(input: string) -> string {\n  client OpenAIResponsesInvalidResponseType\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write about {{ input }}.\n  \"#\n}\n\n// Comprehensive test suite for OpenAI Responses\ntest TestOpenAIResponsesProviders {\n  functions [\n    TestOpenAIResponses,\n    TestOpenAIResponsesExplicit,\n    TestOpenAIResponsesCustomURL,\n    TestOpenAIResponsesConversation,\n    TestOpenAIResponsesDifferentModel,\n    TestOpenAIResponsesWithOpenAIResponseType\n  ]\n  args {\n    input \"mountains\"\n    topic \"machine learning\"\n  }\n}\n\n// Test shorthand syntax (this should work but use standard openai, not responses)\nfunction TestOpenAIResponsesShorthand(input: string) -> string {\n  client \"openai/gpt-5-mini\"\n  prompt #\"\n    {{ _.role(\"user\") }}\n    What do you think about {{ input }}?\n  \"#\n}\n\n// Test to ensure the provider correctly routes to /v1/responses endpoint\n// This is validated by the implementation, not by the test execution\nfunction TestOpenAIResponsesEndpoint(input: string) -> string {\n  client OpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    This request should go to /v1/responses endpoint, not /v1/chat/completions.\n    Respond with a short message about {{ input }}.\n  \"#\n}\n\n// Test that demonstrates automatic response type selection\nclient<llm> OpenAIResponsesAutoType {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    // No explicit client_response_type - should automatically use openai-responses\n  }\n}\n\nfunction TestOpenAIResponsesAutoType(input: string) -> string {\n  client OpenAIResponsesAutoType\n  prompt #\"\n    {{ _.role(\"user\") }}\n    This client should automatically use openai-responses response type.\n    Write a short description of {{ input }}.\n  \"#\n}\n\n// Additional test for validation\ntest TestOpenAIResponsesValidation {\n  functions [\n    TestOpenAIResponsesShorthand,\n    TestOpenAIResponsesEndpoint,\n    TestOpenAIResponsesAutoType,\n    TestOpenAIResponsesExplicit,\n    TestOpenAIProviderWithResponsesType\n  ]\n  args {\n    input \"artificial intelligence\"\n  }\n}\n\n// Test image input/output with OpenAI Responses API\nclient<llm> OpenAIResponsesImage {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5\"\n  }\n}\n\nfunction TestOpenAIResponsesImageInput(image: image | string | pdf | audio) -> string {\n  client OpenAIResponsesImage\n  prompt #\"\n    {{ _.role(\"user\") }}\n    what is in this content?\n    {{ image }}\n  \"#\n}\n\n// Test for image analysis\ntest TestOpenAIResponsesImageAnalysis {\n  functions [\n    TestOpenAIResponsesImageInput\n  ]\n  args {\n    image \"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg\"\n  }\n}\n\n// Test web search with OpenAI Responses API\nclient<llm> OpenAIResponsesWebSearch {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    tools [\n      {\n        type \"web_search_preview\"\n      }\n    ]\n  }\n}\n\nfunction TestOpenAIResponsesWebSearch(query: string) -> string {\n  client OpenAIResponsesWebSearch\n  prompt #\"\n    {{ _.role(\"user\") }}\n    {{ query }}\n  \"#\n}\n\n// Test for web search functionality\ntest TestOpenAIResponsesWebSearchTest {\n  functions [\n    TestOpenAIResponsesWebSearch\n  ]\n  args {\n    query \"What was a positive news story from today?\"\n  }\n}\n\n\n// Test function calling with OpenAI Responses API\nclient<llm> OpenAIResponsesFunctionCall {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    tools [\n      {\n        type \"function\"\n        name \"get_current_weather\"\n        description \"Get the current weather in a given location\"\n        parameters {\n          type \"object\"\n          properties {\n            location {\n              type \"string\"\n              description \"The city and state, e.g. San Francisco, CA\"\n            }\n            unit {\n              type \"string\"\n              enum [\"celsius\", \"fahrenheit\"]\n            }\n          }\n          required [\"location\", \"unit\"]\n        }\n      }\n    ]\n    tool_choice \"auto\"\n  }\n}\n\nfunction TestOpenAIResponsesFunctionCall(query: string) -> string {\n  client OpenAIResponsesFunctionCall\n  prompt #\"\n    {{ _.role(\"user\") }}\n    {{ query }}\n  \"#\n}\n\n// Test for function calling\ntest TestOpenAIResponsesFunctionCallTest {\n  functions [\n    TestOpenAIResponsesFunctionCall\n  ]\n  args {\n    query \"What is the weather like in Boston today?\"\n  }\n}\n\n// Test using standard openai provider with openai-responses client_response_type\nclient<llm> OpenAIWithResponsesType {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    client_response_type \"openai-responses\"\n  }\n}\n\nfunction TestOpenAIProviderWithResponsesType(input: string) -> string {\n  client OpenAIWithResponsesType\n  prompt #\"\n    {{ _.role(\"user\") }}\n    This uses the openai provider but with openai-responses client_response_type.\n    Write a short summary about {{ input }}.\n  \"#\n}\n\n// Test reasoning with OpenAI Responses API\nclient<llm> OpenAIResponsesReasoning {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5\"\n    reasoning{\n      effort \"high\"\n    }\n  }\n}\n\nfunction TestOpenAIResponsesReasoning(problem: string) -> string {\n  client OpenAIResponsesReasoning\n  prompt #\"\n    {{ _.role(\"user\") }}\n    {{ problem }}\n  \"#\n}\n\n// Test for reasoning capability\ntest TestOpenAIResponsesReasoningTest {\n  functions [\n    TestOpenAIResponsesReasoning\n  ]\n  args {\n    problem \"Solve this step by step: If a train travels at 60 mph for 2.5 hours, then at 80 mph for 1.5 hours, what is the total distance traveled?\"\n  }\n}\n\nclient<llm> Gpt5 {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5\"\n  }\n}\n\n\nfunction TestOpenAIResponsesAllRoles(problem: string) -> string {\n  client Gpt5\n  prompt #\"\n    {{ _.role(\"system\") }}\n    Hi\n    {{ _.role(\"developer\") }}\n    Hi\n    {{ _.role(\"assistant\") }}\n    Hi\n    {{ _.role(\"user\") }}\n    {{ problem }}\n  \"#\n}\n\n\nfunction TestOpenaiResponsesPdfs(pdf: pdf) -> string {\n  client Gpt5\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Summarize in one sentence the contents of this:\n    {{ pdf }}\n  \"#\n} \n\ntest TestOpenaiResponsesPdfsTest {\n  functions [\n    TestOpenaiResponsesPdfs\n  ]\n  args {\n    pdf { url \"https://www.berkshirehathaway.com/letters/2024ltr.pdf\" }\n  }\n}\n\ntest TestOpenaiResponsesPdfsTestFile {\n  functions [\n    TestOpenaiResponsesPdfs\n  ]\n  args {\n    pdf { file \"../../dummy.pdf\" }\n  }\n}\n\n\ntest TestOpenAIResponsesAllRolesTest {\n  functions [\n    TestOpenAIResponsesAllRoles\n  ]\n  args {\n    problem \"What is the weather like in Boston today?\"\n  }\n}",
     "test-files/providers/openai-with-anthropic-response.baml": "client<llm> OpenAIWithAnthropicResponse {\n  provider openai-responses\n  options {\n    model \"gpt-4o\"\n    client_response_type \"openai-responses\"\n    base_url \"http://localhost:8000\"\n  }\n}\n\nfunction OpenAIWithAnthropicResponseHello(s: string) -> string {\n  client OpenAIWithAnthropicResponse\n  prompt #\"\n    Return the string \"Hello, world!\" with {{ s }} included in the response.\n    {{ _.role(\"user\") }}\n  \"#\n}\n\ntest TestOpenAIWithAnthropicResponse {\n  functions [\n    OpenAIWithAnthropicResponseHello\n  ]\n  args {\n    s \"Cherry blossoms\"\n  }\n}",
-    "test-files/providers/openai.baml": "function PromptTestOpenAI(input: string) -> string {\n  client GPT35\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAILegacyProvider(input: string) -> string {\n  client GPT35LegacyProvider\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIShorthand(input: string) -> string {\n  client \"openai/gpt-4o\"\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n\n\n\n// Test standard GPT-4 (should add default max_tokens)\nfunction TestOpenAI(input: string) -> string {\n  client GPT4\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model without max_tokens (should not add default)\nfunction TestOpenAIO1NoMaxTokens(input: string) -> string {\n  client OpenAIO1\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_tokens (should fail)\nfunction TestOpenAIO1WithMaxTokens(input: string) -> string {\n  client OpenAIO1WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_completion_tokens\nfunction TestOpenAIO1WithMaxCompletionTokens(input: string) -> string {\n  client OpenAIO1WithMaxCompletionTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test GPT-4 with explicit max_tokens (should keep user value)\nfunction TestOpenAIWithMaxTokens(input: string) -> string {\n  client GPT4WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test OpenAI with null max_tokens (should not add default)\nfunction TestOpenAIWithNullMaxTokens(input: string) -> string {\n  client OpenAIWithNullMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nclient GPT4oMini {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o-mini\"\n  }\n}\n\nfunction TestOpenAIGPT4oMini(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n    {{ input }}\n  \"#\n}\nfunction TestOpenAIGPT4oMini2(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction TestOpenAIGPT4oMini3(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction OpenAIGPT4oMissingBaseUrlEnvVar(input: string) -> string {\n  client GPT4oBaseUrlNotSet\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\n// Add test cases to verify the behavior\ntest TestOpenAIClients {\n  functions [\n    TestOpenAI,\n    TestOpenAIO1NoMaxTokens,\n    TestOpenAIO1WithMaxTokens,\n    TestOpenAIWithMaxTokens,\n    TestOpenAIO1WithMaxCompletionTokens,\n    TestOpenAIWithNullMaxTokens\n  ]\n  args {\n    input #\"\n    Cherry blossoms\n    \"#\n  }\n}\n\nclient<llm> OpenAIWithNullMaxTokens {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o\"\n    max_tokens null\n  }\n}",
+    "test-files/providers/openai.baml": "function PromptTestOpenAI(input: string) -> string {\n  client GPT35\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAILegacyProvider(input: string) -> string {\n  client GPT35LegacyProvider\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIShorthand(input: string) -> string {\n  client \"openai/gpt-4o\"\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n\n\n\n// Test standard GPT-4 (should add default max_tokens)\nfunction TestOpenAI(input: string) -> string {\n  client GPT4\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIConcurrencyClientEnvBaseUrl(input: string) -> string {\n  client OpenAIConcurrencyTestClientEnvBaseUrl\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIConcurrencyClientHardocodedBaseUrl(input: string) -> string {\n  client OpenAIConcurrencyTestClientHardocodedBaseUrl\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model without max_tokens (should not add default)\nfunction TestOpenAIO1NoMaxTokens(input: string) -> string {\n  client OpenAIO1\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_tokens (should fail)\nfunction TestOpenAIO1WithMaxTokens(input: string) -> string {\n  client OpenAIO1WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_completion_tokens\nfunction TestOpenAIO1WithMaxCompletionTokens(input: string) -> string {\n  client OpenAIO1WithMaxCompletionTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test GPT-4 with explicit max_tokens (should keep user value)\nfunction TestOpenAIWithMaxTokens(input: string) -> string {\n  client GPT4WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test OpenAI with null max_tokens (should not add default)\nfunction TestOpenAIWithNullMaxTokens(input: string) -> string {\n  client OpenAIWithNullMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nclient GPT4oMini {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o-mini\"\n  }\n}\n\nfunction TestOpenAIGPT4oMini(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n    {{ input }}\n  \"#\n}\nfunction TestOpenAIGPT4oMini2(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction TestOpenAIGPT4oMini3(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction OpenAIGPT4oMissingBaseUrlEnvVar(input: string) -> string {\n  client GPT4oBaseUrlNotSet\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\n// Add test cases to verify the behavior\ntest TestOpenAIClients {\n  functions [\n    TestOpenAI,\n    TestOpenAIO1NoMaxTokens,\n    TestOpenAIO1WithMaxTokens,\n    TestOpenAIWithMaxTokens,\n    TestOpenAIO1WithMaxCompletionTokens,\n    TestOpenAIWithNullMaxTokens\n  ]\n  args {\n    input #\"\n    Cherry blossoms\n    \"#\n  }\n}\n\nclient<llm> OpenAIWithNullMaxTokens {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o\"\n    max_tokens null\n  }\n}",
     "test-files/providers/openrouter.baml": "function TestOpenRouterMistralSmall3_1_24b(input: string) -> string {\n  client OpenRouterMistralSmall3_1_24b\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n \n \ntest TestName {\n  functions [TestOpenRouterMistralSmall3_1_24b]\n  args {\n    input #\"\n      hello world\n    \"#\n  }\n}\n  \n \n\nclient<llm> OpenRouterMistralSmall3_1_24b {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://openrouter.ai/api/v1\"\n    api_key env.OPENROUTER_API_KEY\n    model \"mistralai/mistral-small-3.1-24b-instruct\"\n    temperature 0.1\n    headers {\n      \"HTTP-Referer\" \"https://me.com\" // Optional\n      \"X-Title\" \"me\" // Optional\n    }\n  }\n}",
     "test-files/providers/strategy.baml": "function TestFallbackStrategy(input: string) -> string {\n  client Resilient_SimpleSyntax\n  prompt #\"\n    {{ _.role('system') }}\n    You are a helpful assistant.\n\n    {{ _.role('user') }}\n    Write a nice short story about {{ input }}\n  \"#\n}\n\nfunction TestRoundRobinStrategy(input: string) -> string {\n  client Lottery_SimpleSyntax\n  prompt #\"\n    {{ _.role('system') }}\n    You are a helpful assistant.\n\n    {{ _.role('user') }}\n    Write a nice short story about {{ input }}\n  \"#\n}\n",
     "test-files/providers/tests.baml": "test TestOpenAIShorthand {\n  functions [TestOpenAIShorthand]\n  args {\n    input \"Donkey kong and peanut butter\"\n  }\n}\n\ntest TestAWS {\n  functions [\n    TestAws\n  ]\n  args {\n    input \"Donkey kong and peanut butter\"\n  }\n}\n\ntest TestProvider {\n  functions [\n    TestAnthropic, TestVertex, PromptTestOpenAI, TestAzure, TestOllama, TestGemini, TestGeminiThinking, TestAws,\n    TestAwsInvalidRegion,\n    TestOpenAIShorthand,\n    TestAnthropicShorthand,\n    TestAwsInvalidAccessKey,\n    TestAwsInvalidProfile,\n    TestAwsInvalidSessionToken\n  ]\n  args {\n    input \"Donkey kong and peanut butter\"\n  }\n}\n\ntest TestName {\n  functions [TestCaching]\n  args {\n    input #\"\nIn a near-future society where dreams have become a tradable commodity and shared experience, a lonely and socially awkward teenager named Alex discovers they possess a rare and powerful ability to not only view but also manipulate the dreams of others. Initially thrilled by this newfound power, Alex begins subtly altering the dreams of classmates and family members, helping them overcome fears, boost confidence, or experience fantastical adventures. As Alex's skills grow, so does their influence. They start selling premium dream experiences on the black market, crafting intricate and addictive dreamscapes for wealthy clients. However, the line between dream and reality begins to blur for those exposed to Alex's creations. Some clients struggle to differentiate between their true memories and the artificial ones implanted by Alex's dream manipulation.\n\nComplications arise when a mysterious government agency takes notice of Alex's unique abilities. They offer Alex a chance to use their gift for \"the greater good,\" hinting at applications in therapy, criminal rehabilitation, and even national security. Simultaneously, an underground resistance movement reaches out, warning Alex about the dangers of dream manipulation and the potential for mass control and exploitation. Caught between these opposing forces, Alex must navigate a complex web of ethical dilemmas. They grapple with questions of free will, the nature of consciousness, and the responsibility that comes with having power over people's minds. As the consequences of their actions spiral outward, affecting the lives of loved ones and strangers alike, Alex is forced to confront the true nature of their ability and decide how—or if—it should be used.\n\nThe story explores themes of identity, the subconscious mind, the ethics of technology, and the power of imagination. It delves into the potential consequences of a world where our most private thoughts and experiences are no longer truly our own, and examines the fine line between helping others and manipulating them for personal gain or a perceived greater good. The narrative further expands on the societal implications of such abilities, questioning the moral boundaries of altering consciousness and the potential for abuse in a world where dreams can be commodified. It challenges the reader to consider the impact of technology on personal autonomy and the ethical responsibilities of those who wield such power.\n\nAs Alex's journey unfolds, they encounter various individuals whose lives have been touched by their dream manipulations, each presenting a unique perspective on the ethical quandaries at hand. From a classmate who gains newfound confidence to a wealthy client who becomes addicted to the dreamscapes, the ripple effects of Alex's actions are profound and far-reaching. The government agency's interest in Alex's abilities raises questions about the potential for state control and surveillance, while the resistance movement highlights the dangers of unchecked power and the importance of safeguarding individual freedoms.\n\nUltimately, Alex's story is one of self-discovery and moral reckoning, as they must decide whether to embrace their abilities for personal gain, align with the government's vision of a controlled utopia, or join the resistance in their fight for freedom and autonomy. The narrative invites readers to reflect on the nature of reality, the boundaries of human experience, and the ethical implications of a world where dreams are no longer private sanctuaries but shared and manipulated commodities. It also explores the psychological impact on Alex, who must deal with the burden of knowing the intimate fears and desires of others, and the isolation that comes from being unable to share their own dreams without altering them.\n\nThe story further examines the technological advancements that have made dream manipulation possible, questioning the role of innovation in society and the potential for both progress and peril. It considers the societal divide between those who can afford to buy enhanced dream experiences and those who cannot, highlighting issues of inequality and access. As Alex becomes more entangled in the web of their own making, they must confront the possibility that their actions could lead to unintended consequences, not just for themselves but for the fabric of society as a whole.\n\nIn the end, Alex's journey is a cautionary tale about the power of dreams and the responsibilities that come with wielding such influence. It serves as a reminder of the importance of ethical considerations in the face of technological advancement and the need to balance innovation with humanity. The story leaves readers pondering the true cost of a world where dreams are no longer sacred, and the potential for both wonder and danger in the uncharted territories of the mind. But it's also a story about the power of imagination and the potential for change, even in a world where our deepest thoughts are no longer our own. And it's a story about the power of choice, and the importance of fighting for the freedom to dream.\n\nIn conclusion, this story is a reflection on the power of dreams and the responsibilities that come with wielding such influence. It serves as a reminder of the importance of ethical considerations in the face of technological advancement and the need to balance innovation with humanity. The story leaves readers pondering the true cost of a world where dreams are no longer sacred, and the potential for both wonder and danger in the uncharted territories of the mind. But it's also a story about the power of imagination and the potential for change, even in a world where our deepest thoughts are no longer our own. And it's a story about the power of choice, and the importance of fighting for the freedom to dream.\n    \"#\n    not_cached #\"\n      hello world\n    \"#\n  }\n}",
diff --git a/integ-tests/python-v1/baml_client/parser.py b/integ-tests/python-v1/baml_client/parser.py
index 1648a18ae4..b009c62af4 100644
--- a/integ-tests/python-v1/baml_client/parser.py
+++ b/integ-tests/python-v1/baml_client/parser.py
@@ -1104,6 +1104,18 @@ def TestOpenAI(
         result = self.__options.merge_options(baml_options).parse_response(function_name="TestOpenAI", llm_response=llm_response, mode="request")
         return typing.cast(str, result)
 
+    def TestOpenAIConcurrencyClientEnvBaseUrl(
+        self, llm_response: str, baml_options: BamlCallOptions = {},
+    ) -> str:
+        result = self.__options.merge_options(baml_options).parse_response(function_name="TestOpenAIConcurrencyClientEnvBaseUrl", llm_response=llm_response, mode="request")
+        return typing.cast(str, result)
+
+    def TestOpenAIConcurrencyClientHardocodedBaseUrl(
+        self, llm_response: str, baml_options: BamlCallOptions = {},
+    ) -> str:
+        result = self.__options.merge_options(baml_options).parse_response(function_name="TestOpenAIConcurrencyClientHardocodedBaseUrl", llm_response=llm_response, mode="request")
+        return typing.cast(str, result)
+
     def TestOpenAIDummyClient(
         self, llm_response: str, baml_options: BamlCallOptions = {},
     ) -> str:
@@ -2648,6 +2660,18 @@ def TestOpenAI(
         result = self.__options.merge_options(baml_options).parse_response(function_name="TestOpenAI", llm_response=llm_response, mode="stream")
         return typing.cast(str, result)
 
+    def TestOpenAIConcurrencyClientEnvBaseUrl(
+        self, llm_response: str, baml_options: BamlCallOptions = {},
+    ) -> str:
+        result = self.__options.merge_options(baml_options).parse_response(function_name="TestOpenAIConcurrencyClientEnvBaseUrl", llm_response=llm_response, mode="stream")
+        return typing.cast(str, result)
+
+    def TestOpenAIConcurrencyClientHardocodedBaseUrl(
+        self, llm_response: str, baml_options: BamlCallOptions = {},
+    ) -> str:
+        result = self.__options.merge_options(baml_options).parse_response(function_name="TestOpenAIConcurrencyClientHardocodedBaseUrl", llm_response=llm_response, mode="stream")
+        return typing.cast(str, result)
+
     def TestOpenAIDummyClient(
         self, llm_response: str, baml_options: BamlCallOptions = {},
     ) -> str:
diff --git a/integ-tests/python-v1/baml_client/sync_client.py b/integ-tests/python-v1/baml_client/sync_client.py
index 3b50d6c408..bd9a417e09 100644
--- a/integ-tests/python-v1/baml_client/sync_client.py
+++ b/integ-tests/python-v1/baml_client/sync_client.py
@@ -2611,6 +2611,34 @@ def TestOpenAI(self, input: str,
                 "input": input,
             })
             return typing.cast(str, result.cast_to(types, types, stream_types, False, __runtime__))
+    def TestOpenAIConcurrencyClientEnvBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> str:
+        # Check if on_tick is provided
+        if 'on_tick' in baml_options:
+            stream = self.stream.TestOpenAIConcurrencyClientEnvBaseUrl(input=input,
+                baml_options=baml_options)
+            return stream.get_final_response()
+        else:
+            # Original non-streaming code
+            result = self.__options.merge_options(baml_options).call_function_sync(function_name="TestOpenAIConcurrencyClientEnvBaseUrl", args={
+                "input": input,
+            })
+            return typing.cast(str, result.cast_to(types, types, stream_types, False, __runtime__))
+    def TestOpenAIConcurrencyClientHardocodedBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> str:
+        # Check if on_tick is provided
+        if 'on_tick' in baml_options:
+            stream = self.stream.TestOpenAIConcurrencyClientHardocodedBaseUrl(input=input,
+                baml_options=baml_options)
+            return stream.get_final_response()
+        else:
+            # Original non-streaming code
+            result = self.__options.merge_options(baml_options).call_function_sync(function_name="TestOpenAIConcurrencyClientHardocodedBaseUrl", args={
+                "input": input,
+            })
+            return typing.cast(str, result.cast_to(types, types, stream_types, False, __runtime__))
     def TestOpenAIDummyClient(self, input: str,
         baml_options: BamlCallOptions = {},
     ) -> str:
@@ -5844,6 +5872,30 @@ def TestOpenAI(self, input: str,
           lambda x: typing.cast(str, x.cast_to(types, types, stream_types, False, __runtime__)),
           ctx,
         )
+    def TestOpenAIConcurrencyClientEnvBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.BamlSyncStream[str, str]:
+        ctx, result = self.__options.merge_options(baml_options).create_sync_stream(function_name="TestOpenAIConcurrencyClientEnvBaseUrl", args={
+            "input": input,
+        })
+        return baml_py.BamlSyncStream[str, str](
+          result,
+          lambda x: typing.cast(str, x.cast_to(types, types, stream_types, True, __runtime__)),
+          lambda x: typing.cast(str, x.cast_to(types, types, stream_types, False, __runtime__)),
+          ctx,
+        )
+    def TestOpenAIConcurrencyClientHardocodedBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.BamlSyncStream[str, str]:
+        ctx, result = self.__options.merge_options(baml_options).create_sync_stream(function_name="TestOpenAIConcurrencyClientHardocodedBaseUrl", args={
+            "input": input,
+        })
+        return baml_py.BamlSyncStream[str, str](
+          result,
+          lambda x: typing.cast(str, x.cast_to(types, types, stream_types, True, __runtime__)),
+          lambda x: typing.cast(str, x.cast_to(types, types, stream_types, False, __runtime__)),
+          ctx,
+        )
     def TestOpenAIDummyClient(self, input: str,
         baml_options: BamlCallOptions = {},
     ) -> baml_py.BamlSyncStream[str, str]:
@@ -8024,6 +8076,20 @@ def TestOpenAI(self, input: str,
             "input": input,
         }, mode="request")
         return result
+    def TestOpenAIConcurrencyClientEnvBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.baml_py.HTTPRequest:
+        result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="TestOpenAIConcurrencyClientEnvBaseUrl", args={
+            "input": input,
+        }, mode="request")
+        return result
+    def TestOpenAIConcurrencyClientHardocodedBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.baml_py.HTTPRequest:
+        result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="TestOpenAIConcurrencyClientHardocodedBaseUrl", args={
+            "input": input,
+        }, mode="request")
+        return result
     def TestOpenAIDummyClient(self, input: str,
         baml_options: BamlCallOptions = {},
     ) -> baml_py.baml_py.HTTPRequest:
@@ -9824,6 +9890,20 @@ def TestOpenAI(self, input: str,
             "input": input,
         }, mode="stream")
         return result
+    def TestOpenAIConcurrencyClientEnvBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.baml_py.HTTPRequest:
+        result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="TestOpenAIConcurrencyClientEnvBaseUrl", args={
+            "input": input,
+        }, mode="stream")
+        return result
+    def TestOpenAIConcurrencyClientHardocodedBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.baml_py.HTTPRequest:
+        result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="TestOpenAIConcurrencyClientHardocodedBaseUrl", args={
+            "input": input,
+        }, mode="stream")
+        return result
     def TestOpenAIDummyClient(self, input: str,
         baml_options: BamlCallOptions = {},
     ) -> baml_py.baml_py.HTTPRequest:
diff --git a/integ-tests/python/baml_client/async_client.py b/integ-tests/python/baml_client/async_client.py
index 7a191bb8c8..6743f49ab8 100644
--- a/integ-tests/python/baml_client/async_client.py
+++ b/integ-tests/python/baml_client/async_client.py
@@ -2779,6 +2779,36 @@ async def TestOpenAI(self, input: str,
                 "input": input,
             })
             return typing.cast(str, result.cast_to(types, types, stream_types, False, __runtime__))
+    async def TestOpenAIConcurrencyClientEnvBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> str:
+        # Check if on_tick is provided
+        if 'on_tick' in baml_options:
+            # Use streaming internally when on_tick is provided
+            stream = self.stream.TestOpenAIConcurrencyClientEnvBaseUrl(input=input,
+                baml_options=baml_options)
+            return await stream.get_final_response()
+        else:
+            # Original non-streaming code
+            result = await self.__options.merge_options(baml_options).call_function_async(function_name="TestOpenAIConcurrencyClientEnvBaseUrl", args={
+                "input": input,
+            })
+            return typing.cast(str, result.cast_to(types, types, stream_types, False, __runtime__))
+    async def TestOpenAIConcurrencyClientHardocodedBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> str:
+        # Check if on_tick is provided
+        if 'on_tick' in baml_options:
+            # Use streaming internally when on_tick is provided
+            stream = self.stream.TestOpenAIConcurrencyClientHardocodedBaseUrl(input=input,
+                baml_options=baml_options)
+            return await stream.get_final_response()
+        else:
+            # Original non-streaming code
+            result = await self.__options.merge_options(baml_options).call_function_async(function_name="TestOpenAIConcurrencyClientHardocodedBaseUrl", args={
+                "input": input,
+            })
+            return typing.cast(str, result.cast_to(types, types, stream_types, False, __runtime__))
     async def TestOpenAIDummyClient(self, input: str,
         baml_options: BamlCallOptions = {},
     ) -> str:
@@ -6088,6 +6118,30 @@ def TestOpenAI(self, input: str,
           lambda x: typing.cast(str, x.cast_to(types, types, stream_types, False, __runtime__)),
           ctx,
         )
+    def TestOpenAIConcurrencyClientEnvBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.BamlStream[str, str]:
+        ctx, result = self.__options.merge_options(baml_options).create_async_stream(function_name="TestOpenAIConcurrencyClientEnvBaseUrl", args={
+            "input": input,
+        })
+        return baml_py.BamlStream[str, str](
+          result,
+          lambda x: typing.cast(str, x.cast_to(types, types, stream_types, True, __runtime__)),
+          lambda x: typing.cast(str, x.cast_to(types, types, stream_types, False, __runtime__)),
+          ctx,
+        )
+    def TestOpenAIConcurrencyClientHardocodedBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.BamlStream[str, str]:
+        ctx, result = self.__options.merge_options(baml_options).create_async_stream(function_name="TestOpenAIConcurrencyClientHardocodedBaseUrl", args={
+            "input": input,
+        })
+        return baml_py.BamlStream[str, str](
+          result,
+          lambda x: typing.cast(str, x.cast_to(types, types, stream_types, True, __runtime__)),
+          lambda x: typing.cast(str, x.cast_to(types, types, stream_types, False, __runtime__)),
+          ctx,
+        )
     def TestOpenAIDummyClient(self, input: str,
         baml_options: BamlCallOptions = {},
     ) -> baml_py.BamlStream[str, str]:
@@ -8268,6 +8322,20 @@ async def TestOpenAI(self, input: str,
             "input": input,
         }, mode="request")
         return result
+    async def TestOpenAIConcurrencyClientEnvBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.baml_py.HTTPRequest:
+        result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="TestOpenAIConcurrencyClientEnvBaseUrl", args={
+            "input": input,
+        }, mode="request")
+        return result
+    async def TestOpenAIConcurrencyClientHardocodedBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.baml_py.HTTPRequest:
+        result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="TestOpenAIConcurrencyClientHardocodedBaseUrl", args={
+            "input": input,
+        }, mode="request")
+        return result
     async def TestOpenAIDummyClient(self, input: str,
         baml_options: BamlCallOptions = {},
     ) -> baml_py.baml_py.HTTPRequest:
@@ -10068,6 +10136,20 @@ async def TestOpenAI(self, input: str,
             "input": input,
         }, mode="stream")
         return result
+    async def TestOpenAIConcurrencyClientEnvBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.baml_py.HTTPRequest:
+        result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="TestOpenAIConcurrencyClientEnvBaseUrl", args={
+            "input": input,
+        }, mode="stream")
+        return result
+    async def TestOpenAIConcurrencyClientHardocodedBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.baml_py.HTTPRequest:
+        result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="TestOpenAIConcurrencyClientHardocodedBaseUrl", args={
+            "input": input,
+        }, mode="stream")
+        return result
     async def TestOpenAIDummyClient(self, input: str,
         baml_options: BamlCallOptions = {},
     ) -> baml_py.baml_py.HTTPRequest:
diff --git a/integ-tests/python/baml_client/inlinedbaml.py b/integ-tests/python/baml_client/inlinedbaml.py
index a5b492ab9f..3bc1f344be 100644
--- a/integ-tests/python/baml_client/inlinedbaml.py
+++ b/integ-tests/python/baml_client/inlinedbaml.py
@@ -12,7 +12,7 @@
 
 _file_map = {
 
-    "clients.baml": "retry_policy Bar {\n  max_retries 3\n  strategy {\n    type exponential_backoff\n  }\n}\n\nretry_policy Foo {\n  max_retries 3\n  strategy {\n    type constant_delay\n    delay_ms 100\n  }\n}\n\nclient<llm> GPT4 {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> GPT4o {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// For integ tests\nclient<llm> GPT4oBaseUrlNotSet {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n    base_url env.OPEN_API_BASE_DO_NOT_SET_THIS\n  }\n}\n\n\nclient<llm> GPT4Turbo {\n  retry_policy Bar\n  provider openai\n  options {\n    model gpt-4-turbo\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nretry_policy GPT4oRetry {\n  max_retries 2\n  strategy {\n    type exponential_backoff\n  }\n}\n\nclient<llm> GPT35 {\n  provider openai\n  retry_policy GPT4oRetry\n  options {\n    model \"gpt-4o-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> GPT35LegacyProvider {\n  provider openai\n  options {\n    model \"gpt-3.5-turbo\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> Ollama {\n  provider ollama\n  options {\n    model llama3.1\n  }\n}\n\nclient<llm> GPT35Azure {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    // base_url \"https://west-us-azure-baml.openai.azure.com/openai/deployments/gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\n// Azure O1 client without max_tokens (should not add default)\nclient<llm> AzureO1 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O1 client with explicit max_tokens (should keep user value)\nclient<llm> AzureO1WithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> AzureO1WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// Azure GPT-35 client with explicit max_tokens (should keep user value)\nclient<llm> GPT35AzureWithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure client with invalid resource name (for testing failures)\nclient<llm> GPT35AzureFailed {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml-incorrect-suffix\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\nclient<llm> Gemini {\n  provider google-ai\n  options {\n    model gemini-2.5-flash\n    api_key env.GOOGLE_API_KEY\n    safetySettings {\n      category HARM_CATEGORY_HATE_SPEECH\n      threshold BLOCK_LOW_AND_ABOVE\n    }\n  }\n}\n\nclient<llm> Gemini25ProThinking {\n  provider google-ai\n  retry_policy Constant\n  options {\n    model \"gemini-2.5-pro\"\n    api_key env.GOOGLE_API_KEY\n    generationConfig {\n      thinkingConfig {\n          thinkingBudget 1024\n          includeThoughts true\n      }\n    }\n  }\n}\n\nclient<llm> GeminiOpenAiGeneric {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://generativelanguage.googleapis.com/v1beta/\"\n    model \"gemini-2.5-flash\"\n    api_key env.GOOGLE_API_KEY\n  }\n}\n\nclient<llm> Vertex {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> VertexWithQueryParams {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    project_id gloo-ai\n    query_params {\n      key env.VERTEX_API_KEY\n    }\n  }\n}\n\nclient<llm> VertexClaude {\n  provider vertex-ai\n  options {\n    model \"claude-3-5-sonnet@20240620\"\n    location us-east5\n    anthropic_version \"vertex-2023-10-16\"\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> AwsBedrock {\n  provider aws-bedrock\n  options {\n    inference_configuration {\n      max_tokens 2048\n    }\n    // max_tokens 100000\n    // max_completion_tokens 100000\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    //model \"arn:aws:bedrock:us-east-1:404337120808:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-east-1.amazonaws.com/\"\n    // region \"us-east-1\"\n    // access_key_id env.AWS_ACCESS_KEY_ID\n    // secret_access_key env.AWS_SECRET_ACCESS_KEY\n    // session_token env.AWS_SESSION_TOKEN\n    // session_token null\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidRegion {\n  provider aws-bedrock\n  options {\n    region \"us-invalid-7\"\n    inference_configuration {\n      max_tokens 100\n    }\n    // model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidEndpoint {\n  provider aws-bedrock\n  options {\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-doenotexist-3.amazonaws.com\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidAccessKey {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidProfile {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    profile \"boundaryml-dev-invalid\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidSessionToken {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\n\nclient<llm> Invalid{\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> Sonnet {\n  provider anthropic\n  options {\n    model claude-3-5-sonnet-20241022\n    api_key env.ANTHROPIC_API_KEY\n  }\n}\n\n\nclient<llm> SonnetThinking {\n  provider anthropic\n  options {\n    model \"claude-3-7-sonnet-20250219\"\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 2048\n    thinking {\n      type \"enabled\"\n      budget_tokens 1024\n    }\n  }\n}\n\nclient<llm> Claude {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> ClaudeWithCaching {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 500\n    allowed_role_metadata [\"cache_control\"]\n    headers {\n      \"anthropic-beta\" \"prompt-caching-2024-07-31\"\n    }\n  }\n}\n\nclient<llm> Resilient_SimpleSyntax {\n  retry_policy Foo\n  provider baml-fallback\n  options {\n    strategy [\n      GPT4Turbo\n      GPT35\n      Lottery_SimpleSyntax\n    ]\n  }\n}\n\nclient<llm> Lottery_SimpleSyntax {\n  provider baml-round-robin\n  options {\n    start 0\n    strategy [\n      Claude\n      GPT35\n    ]\n  }\n}\n\nclient<llm> TogetherAi {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://api.together.ai/v1\"\n    api_key env.TOGETHER_API_KEY\n    model \"meta-llama/Llama-3-70b-chat-hf\"\n  }\n}\n\n// OpenAI O1 client without max_tokens (should not add default)\nclient<llm> OpenAIO1 {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// OpenAI O1 client with explicit max_tokens (should fail)\nclient<llm> OpenAIO1WithMaxTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// OpenAI O1 client with explicit max_completion_tokens\nclient<llm> OpenAIO1WithMaxCompletionTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// OpenAI GPT-4 client with explicit max_tokens\nclient<llm> GPT4WithMaxTokens {\n  provider openai\n  options {\n    model \"gpt-4\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure O3 client without max_tokens (should not add default)\nclient<llm> AzureO3 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O3 client with explicit max_completion_tokens\nclient<llm> AzureO3WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n",
+    "clients.baml": "retry_policy Bar {\n  max_retries 3\n  strategy {\n    type exponential_backoff\n  }\n}\n\nretry_policy Foo {\n  max_retries 3\n  strategy {\n    type constant_delay\n    delay_ms 100\n  }\n}\n\nclient<llm> GPT4 {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> GPT4o {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// For integ tests\nclient<llm> GPT4oBaseUrlNotSet {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n    base_url env.OPEN_API_BASE_DO_NOT_SET_THIS\n  }\n}\n\n\nclient<llm> GPT4Turbo {\n  retry_policy Bar\n  provider openai\n  options {\n    model gpt-4-turbo\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nretry_policy GPT4oRetry {\n  max_retries 2\n  strategy {\n    type exponential_backoff\n  }\n}\n\nclient<llm> GPT35 {\n  provider openai\n  retry_policy GPT4oRetry\n  options {\n    model \"gpt-4o-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> GPT35LegacyProvider {\n  provider openai\n  options {\n    model \"gpt-3.5-turbo\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> OpenAIConcurrencyTestClientEnvBaseUrl {\n  provider openai-generic\n  options {\n    base_url env.OPENAI_CONCURRENCY_TEST_BASE_URL\n    model \"concurrency-test\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> OpenAIConcurrencyTestClientHardocodedBaseUrl {\n  provider openai-generic\n  options {\n    base_url \"http://127.0.0.1:9876/v1/\"\n    model \"concurrency-test\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> Ollama {\n  provider ollama\n  options {\n    model llama3.1\n  }\n}\n\nclient<llm> GPT35Azure {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    // base_url \"https://west-us-azure-baml.openai.azure.com/openai/deployments/gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\n// Azure O1 client without max_tokens (should not add default)\nclient<llm> AzureO1 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O1 client with explicit max_tokens (should keep user value)\nclient<llm> AzureO1WithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> AzureO1WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// Azure GPT-35 client with explicit max_tokens (should keep user value)\nclient<llm> GPT35AzureWithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure client with invalid resource name (for testing failures)\nclient<llm> GPT35AzureFailed {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml-incorrect-suffix\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\nclient<llm> Gemini {\n  provider google-ai\n  options {\n    model gemini-2.5-flash\n    api_key env.GOOGLE_API_KEY\n    safetySettings {\n      category HARM_CATEGORY_HATE_SPEECH\n      threshold BLOCK_LOW_AND_ABOVE\n    }\n  }\n}\n\nclient<llm> Gemini25ProThinking {\n  provider google-ai\n  retry_policy Constant\n  options {\n    model \"gemini-2.5-pro\"\n    api_key env.GOOGLE_API_KEY\n    generationConfig {\n      thinkingConfig {\n          thinkingBudget 1024\n          includeThoughts true\n      }\n    }\n  }\n}\n\nclient<llm> GeminiOpenAiGeneric {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://generativelanguage.googleapis.com/v1beta/\"\n    model \"gemini-2.5-flash\"\n    api_key env.GOOGLE_API_KEY\n  }\n}\n\nclient<llm> Vertex {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> VertexWithQueryParams {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    project_id gloo-ai\n    query_params {\n      key env.VERTEX_API_KEY\n    }\n  }\n}\n\nclient<llm> VertexClaude {\n  provider vertex-ai\n  options {\n    model \"claude-3-5-sonnet@20240620\"\n    location us-east5\n    anthropic_version \"vertex-2023-10-16\"\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> AwsBedrock {\n  provider aws-bedrock\n  options {\n    inference_configuration {\n      max_tokens 2048\n    }\n    // max_tokens 100000\n    // max_completion_tokens 100000\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    //model \"arn:aws:bedrock:us-east-1:404337120808:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-east-1.amazonaws.com/\"\n    // region \"us-east-1\"\n    // access_key_id env.AWS_ACCESS_KEY_ID\n    // secret_access_key env.AWS_SECRET_ACCESS_KEY\n    // session_token env.AWS_SESSION_TOKEN\n    // session_token null\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidRegion {\n  provider aws-bedrock\n  options {\n    region \"us-invalid-7\"\n    inference_configuration {\n      max_tokens 100\n    }\n    // model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidEndpoint {\n  provider aws-bedrock\n  options {\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-doenotexist-3.amazonaws.com\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidAccessKey {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidProfile {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    profile \"boundaryml-dev-invalid\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidSessionToken {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\n\nclient<llm> Invalid{\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> Sonnet {\n  provider anthropic\n  options {\n    model claude-3-5-sonnet-20241022\n    api_key env.ANTHROPIC_API_KEY\n  }\n}\n\n\nclient<llm> SonnetThinking {\n  provider anthropic\n  options {\n    model \"claude-3-7-sonnet-20250219\"\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 2048\n    thinking {\n      type \"enabled\"\n      budget_tokens 1024\n    }\n  }\n}\n\nclient<llm> Claude {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> ClaudeWithCaching {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 500\n    allowed_role_metadata [\"cache_control\"]\n    headers {\n      \"anthropic-beta\" \"prompt-caching-2024-07-31\"\n    }\n  }\n}\n\nclient<llm> Resilient_SimpleSyntax {\n  retry_policy Foo\n  provider baml-fallback\n  options {\n    strategy [\n      GPT4Turbo\n      GPT35\n      Lottery_SimpleSyntax\n    ]\n  }\n}\n\nclient<llm> Lottery_SimpleSyntax {\n  provider baml-round-robin\n  options {\n    start 0\n    strategy [\n      Claude\n      GPT35\n    ]\n  }\n}\n\nclient<llm> TogetherAi {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://api.together.ai/v1\"\n    api_key env.TOGETHER_API_KEY\n    model \"meta-llama/Llama-3-70b-chat-hf\"\n  }\n}\n\n// OpenAI O1 client without max_tokens (should not add default)\nclient<llm> OpenAIO1 {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// OpenAI O1 client with explicit max_tokens (should fail)\nclient<llm> OpenAIO1WithMaxTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// OpenAI O1 client with explicit max_completion_tokens\nclient<llm> OpenAIO1WithMaxCompletionTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// OpenAI GPT-4 client with explicit max_tokens\nclient<llm> GPT4WithMaxTokens {\n  provider openai\n  options {\n    model \"gpt-4\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure O3 client without max_tokens (should not add default)\nclient<llm> AzureO3 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O3 client with explicit max_completion_tokens\nclient<llm> AzureO3WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n",
     "custom-task.baml": "class BookOrder {\n  orderId string @description(#\"\n    The ID of the book order\n  \"#)\n  title string @description(#\"\n    The title of the ordered book\n  \"#)\n  quantity int @description(#\"\n    The quantity of books ordered\n  \"#)\n  price float @description(#\"\n    The price of the book\n  \"#)\n}\n\nclass FlightConfirmation {\n  confirmationNumber string @description(#\"\n    The flight confirmation number\n  \"#)\n  flightNumber string @description(#\"\n    The flight number\n  \"#)\n  departureTime string @description(#\"\n    The scheduled departure time of the flight\n  \"#)\n  arrivalTime string @description(#\"\n    The scheduled arrival time of the flight\n  \"#)\n  seatNumber string @description(#\"\n    The seat number assigned on the flight\n  \"#)\n}\n\nclass GroceryReceipt {\n  receiptId string @description(#\"\n    The ID of the grocery receipt\n  \"#)\n  storeName string @description(#\"\n    The name of the grocery store\n  \"#)\n  items (string | int | float)[] @description(#\"\n    A list of items purchased. Each item consists of a name, quantity, and price.\n  \"#)\n  totalAmount float @description(#\"\n    The total amount spent on groceries\n  \"#)\n}\n \nclass CustomTaskResult {\n  bookOrder BookOrder | null\n  flightConfirmation FlightConfirmation | null\n  groceryReceipt GroceryReceipt | null\n}\n\nfunction CustomTask(input: string) -> BookOrder | FlightConfirmation | GroceryReceipt {\n  client \"openai/gpt-4o-mini\"\n  prompt #\"\n    Given the input string, extract either an order for a book, a flight confirmation, or a grocery receipt.\n\n    {{ ctx.output_format }}\n\n    Input:\n    \n    {{ input}}\n  \"#\n}\n\ntest CustomTask {\n  functions [CustomTask]\n  args {\n    input #\"\nDear [Your Name],\n\nThank you for booking with [Airline Name]! We are pleased to confirm your upcoming flight.\n\nFlight Confirmation Details:\n\nBooking Reference: ABC123\nPassenger Name: [Your Name]\nFlight Number: XY789\nDeparture Date: September 15, 2024\nDeparture Time: 10:30 AM\nArrival Time: 1:45 PM\nDeparture Airport: John F. Kennedy International Airport (JFK), New York, NY\nArrival Airport: Los Angeles International Airport (LAX), Los Angeles, CA\nSeat Number: 12A\nClass: Economy\nBaggage Allowance:\n\nChecked Baggage: 1 piece, up to 23 kg\nCarry-On Baggage: 1 piece, up to 7 kg\nImportant Information:\n\nPlease arrive at the airport at least 2 hours before your scheduled departure.\nCheck-in online via our website or mobile app to save time at the airport.\nEnsure that your identification documents are up to date and match the name on your booking.\nContact Us:\n\nIf you have any questions or need to make changes to your booking, please contact our customer service team at 1-800-123-4567 or email us at support@[airline].com.\n\nWe wish you a pleasant journey and thank you for choosing [Airline Name].\n\nBest regards,\n\n[Airline Name] Customer Service\n    \"#\n  }\n}",
     "fiddle-examples/audio/audio.baml": "function DescribeAudio(audio: audio) -> string {\n  client GPT4o\n  prompt #\"\n    Describe the audio below in 20 words:\n    {{ _.role(\"user\") }}\n    {{ audio }}\n  \"#\n\n}\n\n\n\n\n// chat role user present\nfunction DescribeAudio2(audio: audio) -> string {\n  client GPT4Turbo\n  prompt #\"\n    {{ _.role(\"user\") }}\n    You should return 1 answer that answer the following command.\n\n    Describe this in 5 words:\n    {{ audio }}\n  \"#\n}\n\ntest TestAudio {\n    functions [DescribeAudio]\n  args {\n    audio { url \"https://www.pacdv.com/sounds/voices/friday-rocks.wav\"}\n  }\n}\n\ntest TestAudio2 {\n  functions [DescribeAudio2]\n  args {\n    audio { file \"friday-rocks.wav\" }\n      }\n}\n",
     "fiddle-examples/chain-of-thought.baml": "class Email {\n    subject string\n    body string\n    from_address string\n}\n\nenum OrderStatus {\n    ORDERED\n    SHIPPED\n    DELIVERED\n    CANCELLED\n}\n\nclass OrderInfo {\n    order_status OrderStatus\n    tracking_number string?\n    estimated_arrival_date string?\n}\n\nfunction GetOrderInfo(email: Email) -> OrderInfo {\n  client GPT4\n  prompt #\"\n    Given the email below:\n\n    ```\n    from: {{email.from_address}}\n    Email Subject: {{email.subject}}\n    Email Body: {{email.body}}\n    ```\n\n    Extract this info from the email in JSON format:\n    {{ ctx.output_format }}\n\n    Before you output the JSON, please explain your\n    reasoning step-by-step. Here is an example on how to do this:\n    'If we think step by step we can see that ...\n     therefore the output JSON is:\n    {\n      ... the json schema ...\n    }'\n  \"#\n}",
@@ -112,7 +112,7 @@
     "test-files/providers/openai-responses-validation.baml": "// OpenAI Responses Provider Validation Tests\n// These tests validate that the openai-responses provider is properly configured\n\n// Test 1: Basic provider recognition\n// This should parse successfully once openai-responses is available\nclient<llm> ValidateOpenAIResponses {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n  }\n}\n\n// Test 2: Valid client_response_type values for openai-responses\nclient<llm> ValidateResponseTypeOpenAI {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    client_response_type \"openai\"\n  }\n}\n\nclient<llm> ValidateResponseTypeOpenAIResponses {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    client_response_type \"openai-responses\"\n  }\n}\n\nclient<llm> ValidateResponseTypeAnthropic {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    client_response_type \"anthropic\"\n  }\n}\n\n// Test 3: Provider should be in allowed list\n// This will validate that \"openai-responses\" is included in ClientProvider::allowed_providers()\n\n// Test 4: Default base URL should be correct\nclient<llm> ValidateDefaultBaseURL {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    // Should default to https://api.openai.com/v1\n  }\n}\n\n// Test 5: Custom base URL should work\nclient<llm> ValidateCustomBaseURL {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    base_url \"https://custom.openai.com/v1\"\n  }\n}\n\n// Simple test functions to validate the clients work\nfunction ValidateBasicResponses(input: string) -> string {\n  client ValidateOpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Say \"success\" if you can read this: {{ input }}\n  \"#\n}\n\nfunction ValidateResponseTypes(input: string) -> string {\n  client ValidateResponseTypeOpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Respond with \"response-type-works\" for: {{ input }}\n  \"#\n}\n\n// Validation test suite\ntest ValidateOpenAIResponsesProvider {\n  functions [\n    ValidateBasicResponses,\n    ValidateResponseTypes\n  ]\n  args {\n    input \"test\"\n  }\n}",
     "test-files/providers/openai-responses.baml": "// OpenAI Responses API Provider Tests\n// Tests the new openai-responses provider that uses the OpenAI Responses API\n\n// Basic OpenAI Responses client\nclient<llm> OpenAIResponses {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n  }\n}\n\n// OpenAI Responses client with explicit response type\nclient<llm> OpenAIResponsesExplicit {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    client_response_type \"openai-responses\"\n  }\n}\n\n// OpenAI Responses client with custom base URL (for testing)\nclient<llm> OpenAIResponsesCustomURL {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    base_url \"https://api.openai.com/v1\"\n  }\n}\n\n// Test basic functionality with responses API\nfunction TestOpenAIResponses(input: string) -> string {\n  client OpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a short haiku about {{ input }}. Make it simple and beautiful.\n  \"#\n}\n\n// Test with explicit response type configuration\nfunction TestOpenAIResponsesExplicit(input: string) -> string {\n  client OpenAIResponsesExplicit\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Create a brief poem about {{ input }}. Keep it under 50 words.\n  \"#\n}\n\n// Test with custom base URL\nfunction TestOpenAIResponsesCustomURL(input: string) -> string {\n  client OpenAIResponsesCustomURL\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Tell me an interesting fact about {{ input }}.\n  \"#\n}\n\n// Test with multi-turn conversation\nfunction TestOpenAIResponsesConversation(topic: string) -> string {\n  client OpenAIResponses\n  prompt #\"\n    {{ _.role(\"system\") }}\n    You are a helpful assistant that provides concise answers.\n    \n    {{ _.role(\"user\") }}\n    What is {{ topic }}?\n    \n    {{ _.role(\"assistant\") }}\n    {{ topic }} is a fascinating subject. Let me explain briefly.\n    \n    {{ _.role(\"user\") }}\n    Can you give me a simple example?\n  \"#\n}\n\n// Test with different model parameter\nclient<llm> OpenAIResponsesGPT4 {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4\"\n  }\n}\n\nfunction TestOpenAIResponsesDifferentModel(input: string) -> string {\n  client OpenAIResponsesGPT4\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Explain {{ input }} in one sentence.\n  \"#\n}\n\n// Test error handling with invalid configuration\nclient<llm> OpenAIResponsesInvalidResponseType {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    // This should work since openai response type is valid for responses provider\n    client_response_type \"openai\"\n  }\n}\n\nfunction TestOpenAIResponsesWithOpenAIResponseType(input: string) -> string {\n  client OpenAIResponsesInvalidResponseType\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write about {{ input }}.\n  \"#\n}\n\n// Comprehensive test suite for OpenAI Responses\ntest TestOpenAIResponsesProviders {\n  functions [\n    TestOpenAIResponses,\n    TestOpenAIResponsesExplicit,\n    TestOpenAIResponsesCustomURL,\n    TestOpenAIResponsesConversation,\n    TestOpenAIResponsesDifferentModel,\n    TestOpenAIResponsesWithOpenAIResponseType\n  ]\n  args {\n    input \"mountains\"\n    topic \"machine learning\"\n  }\n}\n\n// Test shorthand syntax (this should work but use standard openai, not responses)\nfunction TestOpenAIResponsesShorthand(input: string) -> string {\n  client \"openai/gpt-5-mini\"\n  prompt #\"\n    {{ _.role(\"user\") }}\n    What do you think about {{ input }}?\n  \"#\n}\n\n// Test to ensure the provider correctly routes to /v1/responses endpoint\n// This is validated by the implementation, not by the test execution\nfunction TestOpenAIResponsesEndpoint(input: string) -> string {\n  client OpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    This request should go to /v1/responses endpoint, not /v1/chat/completions.\n    Respond with a short message about {{ input }}.\n  \"#\n}\n\n// Test that demonstrates automatic response type selection\nclient<llm> OpenAIResponsesAutoType {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    // No explicit client_response_type - should automatically use openai-responses\n  }\n}\n\nfunction TestOpenAIResponsesAutoType(input: string) -> string {\n  client OpenAIResponsesAutoType\n  prompt #\"\n    {{ _.role(\"user\") }}\n    This client should automatically use openai-responses response type.\n    Write a short description of {{ input }}.\n  \"#\n}\n\n// Additional test for validation\ntest TestOpenAIResponsesValidation {\n  functions [\n    TestOpenAIResponsesShorthand,\n    TestOpenAIResponsesEndpoint,\n    TestOpenAIResponsesAutoType,\n    TestOpenAIResponsesExplicit,\n    TestOpenAIProviderWithResponsesType\n  ]\n  args {\n    input \"artificial intelligence\"\n  }\n}\n\n// Test image input/output with OpenAI Responses API\nclient<llm> OpenAIResponsesImage {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5\"\n  }\n}\n\nfunction TestOpenAIResponsesImageInput(image: image | string | pdf | audio) -> string {\n  client OpenAIResponsesImage\n  prompt #\"\n    {{ _.role(\"user\") }}\n    what is in this content?\n    {{ image }}\n  \"#\n}\n\n// Test for image analysis\ntest TestOpenAIResponsesImageAnalysis {\n  functions [\n    TestOpenAIResponsesImageInput\n  ]\n  args {\n    image \"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg\"\n  }\n}\n\n// Test web search with OpenAI Responses API\nclient<llm> OpenAIResponsesWebSearch {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    tools [\n      {\n        type \"web_search_preview\"\n      }\n    ]\n  }\n}\n\nfunction TestOpenAIResponsesWebSearch(query: string) -> string {\n  client OpenAIResponsesWebSearch\n  prompt #\"\n    {{ _.role(\"user\") }}\n    {{ query }}\n  \"#\n}\n\n// Test for web search functionality\ntest TestOpenAIResponsesWebSearchTest {\n  functions [\n    TestOpenAIResponsesWebSearch\n  ]\n  args {\n    query \"What was a positive news story from today?\"\n  }\n}\n\n\n// Test function calling with OpenAI Responses API\nclient<llm> OpenAIResponsesFunctionCall {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    tools [\n      {\n        type \"function\"\n        name \"get_current_weather\"\n        description \"Get the current weather in a given location\"\n        parameters {\n          type \"object\"\n          properties {\n            location {\n              type \"string\"\n              description \"The city and state, e.g. San Francisco, CA\"\n            }\n            unit {\n              type \"string\"\n              enum [\"celsius\", \"fahrenheit\"]\n            }\n          }\n          required [\"location\", \"unit\"]\n        }\n      }\n    ]\n    tool_choice \"auto\"\n  }\n}\n\nfunction TestOpenAIResponsesFunctionCall(query: string) -> string {\n  client OpenAIResponsesFunctionCall\n  prompt #\"\n    {{ _.role(\"user\") }}\n    {{ query }}\n  \"#\n}\n\n// Test for function calling\ntest TestOpenAIResponsesFunctionCallTest {\n  functions [\n    TestOpenAIResponsesFunctionCall\n  ]\n  args {\n    query \"What is the weather like in Boston today?\"\n  }\n}\n\n// Test using standard openai provider with openai-responses client_response_type\nclient<llm> OpenAIWithResponsesType {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    client_response_type \"openai-responses\"\n  }\n}\n\nfunction TestOpenAIProviderWithResponsesType(input: string) -> string {\n  client OpenAIWithResponsesType\n  prompt #\"\n    {{ _.role(\"user\") }}\n    This uses the openai provider but with openai-responses client_response_type.\n    Write a short summary about {{ input }}.\n  \"#\n}\n\n// Test reasoning with OpenAI Responses API\nclient<llm> OpenAIResponsesReasoning {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5\"\n    reasoning{\n      effort \"high\"\n    }\n  }\n}\n\nfunction TestOpenAIResponsesReasoning(problem: string) -> string {\n  client OpenAIResponsesReasoning\n  prompt #\"\n    {{ _.role(\"user\") }}\n    {{ problem }}\n  \"#\n}\n\n// Test for reasoning capability\ntest TestOpenAIResponsesReasoningTest {\n  functions [\n    TestOpenAIResponsesReasoning\n  ]\n  args {\n    problem \"Solve this step by step: If a train travels at 60 mph for 2.5 hours, then at 80 mph for 1.5 hours, what is the total distance traveled?\"\n  }\n}\n\nclient<llm> Gpt5 {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5\"\n  }\n}\n\n\nfunction TestOpenAIResponsesAllRoles(problem: string) -> string {\n  client Gpt5\n  prompt #\"\n    {{ _.role(\"system\") }}\n    Hi\n    {{ _.role(\"developer\") }}\n    Hi\n    {{ _.role(\"assistant\") }}\n    Hi\n    {{ _.role(\"user\") }}\n    {{ problem }}\n  \"#\n}\n\n\nfunction TestOpenaiResponsesPdfs(pdf: pdf) -> string {\n  client Gpt5\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Summarize in one sentence the contents of this:\n    {{ pdf }}\n  \"#\n} \n\ntest TestOpenaiResponsesPdfsTest {\n  functions [\n    TestOpenaiResponsesPdfs\n  ]\n  args {\n    pdf { url \"https://www.berkshirehathaway.com/letters/2024ltr.pdf\" }\n  }\n}\n\ntest TestOpenaiResponsesPdfsTestFile {\n  functions [\n    TestOpenaiResponsesPdfs\n  ]\n  args {\n    pdf { file \"../../dummy.pdf\" }\n  }\n}\n\n\ntest TestOpenAIResponsesAllRolesTest {\n  functions [\n    TestOpenAIResponsesAllRoles\n  ]\n  args {\n    problem \"What is the weather like in Boston today?\"\n  }\n}",
     "test-files/providers/openai-with-anthropic-response.baml": "client<llm> OpenAIWithAnthropicResponse {\n  provider openai-responses\n  options {\n    model \"gpt-4o\"\n    client_response_type \"openai-responses\"\n    base_url \"http://localhost:8000\"\n  }\n}\n\nfunction OpenAIWithAnthropicResponseHello(s: string) -> string {\n  client OpenAIWithAnthropicResponse\n  prompt #\"\n    Return the string \"Hello, world!\" with {{ s }} included in the response.\n    {{ _.role(\"user\") }}\n  \"#\n}\n\ntest TestOpenAIWithAnthropicResponse {\n  functions [\n    OpenAIWithAnthropicResponseHello\n  ]\n  args {\n    s \"Cherry blossoms\"\n  }\n}",
-    "test-files/providers/openai.baml": "function PromptTestOpenAI(input: string) -> string {\n  client GPT35\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAILegacyProvider(input: string) -> string {\n  client GPT35LegacyProvider\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIShorthand(input: string) -> string {\n  client \"openai/gpt-4o\"\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n\n\n\n// Test standard GPT-4 (should add default max_tokens)\nfunction TestOpenAI(input: string) -> string {\n  client GPT4\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model without max_tokens (should not add default)\nfunction TestOpenAIO1NoMaxTokens(input: string) -> string {\n  client OpenAIO1\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_tokens (should fail)\nfunction TestOpenAIO1WithMaxTokens(input: string) -> string {\n  client OpenAIO1WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_completion_tokens\nfunction TestOpenAIO1WithMaxCompletionTokens(input: string) -> string {\n  client OpenAIO1WithMaxCompletionTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test GPT-4 with explicit max_tokens (should keep user value)\nfunction TestOpenAIWithMaxTokens(input: string) -> string {\n  client GPT4WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test OpenAI with null max_tokens (should not add default)\nfunction TestOpenAIWithNullMaxTokens(input: string) -> string {\n  client OpenAIWithNullMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nclient GPT4oMini {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o-mini\"\n  }\n}\n\nfunction TestOpenAIGPT4oMini(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n    {{ input }}\n  \"#\n}\nfunction TestOpenAIGPT4oMini2(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction TestOpenAIGPT4oMini3(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction OpenAIGPT4oMissingBaseUrlEnvVar(input: string) -> string {\n  client GPT4oBaseUrlNotSet\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\n// Add test cases to verify the behavior\ntest TestOpenAIClients {\n  functions [\n    TestOpenAI,\n    TestOpenAIO1NoMaxTokens,\n    TestOpenAIO1WithMaxTokens,\n    TestOpenAIWithMaxTokens,\n    TestOpenAIO1WithMaxCompletionTokens,\n    TestOpenAIWithNullMaxTokens\n  ]\n  args {\n    input #\"\n    Cherry blossoms\n    \"#\n  }\n}\n\nclient<llm> OpenAIWithNullMaxTokens {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o\"\n    max_tokens null\n  }\n}",
+    "test-files/providers/openai.baml": "function PromptTestOpenAI(input: string) -> string {\n  client GPT35\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAILegacyProvider(input: string) -> string {\n  client GPT35LegacyProvider\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIShorthand(input: string) -> string {\n  client \"openai/gpt-4o\"\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n\n\n\n// Test standard GPT-4 (should add default max_tokens)\nfunction TestOpenAI(input: string) -> string {\n  client GPT4\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIConcurrencyClientEnvBaseUrl(input: string) -> string {\n  client OpenAIConcurrencyTestClientEnvBaseUrl\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIConcurrencyClientHardocodedBaseUrl(input: string) -> string {\n  client OpenAIConcurrencyTestClientHardocodedBaseUrl\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model without max_tokens (should not add default)\nfunction TestOpenAIO1NoMaxTokens(input: string) -> string {\n  client OpenAIO1\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_tokens (should fail)\nfunction TestOpenAIO1WithMaxTokens(input: string) -> string {\n  client OpenAIO1WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_completion_tokens\nfunction TestOpenAIO1WithMaxCompletionTokens(input: string) -> string {\n  client OpenAIO1WithMaxCompletionTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test GPT-4 with explicit max_tokens (should keep user value)\nfunction TestOpenAIWithMaxTokens(input: string) -> string {\n  client GPT4WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test OpenAI with null max_tokens (should not add default)\nfunction TestOpenAIWithNullMaxTokens(input: string) -> string {\n  client OpenAIWithNullMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nclient GPT4oMini {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o-mini\"\n  }\n}\n\nfunction TestOpenAIGPT4oMini(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n    {{ input }}\n  \"#\n}\nfunction TestOpenAIGPT4oMini2(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction TestOpenAIGPT4oMini3(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction OpenAIGPT4oMissingBaseUrlEnvVar(input: string) -> string {\n  client GPT4oBaseUrlNotSet\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\n// Add test cases to verify the behavior\ntest TestOpenAIClients {\n  functions [\n    TestOpenAI,\n    TestOpenAIO1NoMaxTokens,\n    TestOpenAIO1WithMaxTokens,\n    TestOpenAIWithMaxTokens,\n    TestOpenAIO1WithMaxCompletionTokens,\n    TestOpenAIWithNullMaxTokens\n  ]\n  args {\n    input #\"\n    Cherry blossoms\n    \"#\n  }\n}\n\nclient<llm> OpenAIWithNullMaxTokens {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o\"\n    max_tokens null\n  }\n}",
     "test-files/providers/openrouter.baml": "function TestOpenRouterMistralSmall3_1_24b(input: string) -> string {\n  client OpenRouterMistralSmall3_1_24b\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n \n \ntest TestName {\n  functions [TestOpenRouterMistralSmall3_1_24b]\n  args {\n    input #\"\n      hello world\n    \"#\n  }\n}\n  \n \n\nclient<llm> OpenRouterMistralSmall3_1_24b {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://openrouter.ai/api/v1\"\n    api_key env.OPENROUTER_API_KEY\n    model \"mistralai/mistral-small-3.1-24b-instruct\"\n    temperature 0.1\n    headers {\n      \"HTTP-Referer\" \"https://me.com\" // Optional\n      \"X-Title\" \"me\" // Optional\n    }\n  }\n}",
     "test-files/providers/strategy.baml": "function TestFallbackStrategy(input: string) -> string {\n  client Resilient_SimpleSyntax\n  prompt #\"\n    {{ _.role('system') }}\n    You are a helpful assistant.\n\n    {{ _.role('user') }}\n    Write a nice short story about {{ input }}\n  \"#\n}\n\nfunction TestRoundRobinStrategy(input: string) -> string {\n  client Lottery_SimpleSyntax\n  prompt #\"\n    {{ _.role('system') }}\n    You are a helpful assistant.\n\n    {{ _.role('user') }}\n    Write a nice short story about {{ input }}\n  \"#\n}\n",
     "test-files/providers/tests.baml": "test TestOpenAIShorthand {\n  functions [TestOpenAIShorthand]\n  args {\n    input \"Donkey kong and peanut butter\"\n  }\n}\n\ntest TestAWS {\n  functions [\n    TestAws\n  ]\n  args {\n    input \"Donkey kong and peanut butter\"\n  }\n}\n\ntest TestProvider {\n  functions [\n    TestAnthropic, TestVertex, PromptTestOpenAI, TestAzure, TestOllama, TestGemini, TestGeminiThinking, TestAws,\n    TestAwsInvalidRegion,\n    TestOpenAIShorthand,\n    TestAnthropicShorthand,\n    TestAwsInvalidAccessKey,\n    TestAwsInvalidProfile,\n    TestAwsInvalidSessionToken\n  ]\n  args {\n    input \"Donkey kong and peanut butter\"\n  }\n}\n\ntest TestName {\n  functions [TestCaching]\n  args {\n    input #\"\nIn a near-future society where dreams have become a tradable commodity and shared experience, a lonely and socially awkward teenager named Alex discovers they possess a rare and powerful ability to not only view but also manipulate the dreams of others. Initially thrilled by this newfound power, Alex begins subtly altering the dreams of classmates and family members, helping them overcome fears, boost confidence, or experience fantastical adventures. As Alex's skills grow, so does their influence. They start selling premium dream experiences on the black market, crafting intricate and addictive dreamscapes for wealthy clients. However, the line between dream and reality begins to blur for those exposed to Alex's creations. Some clients struggle to differentiate between their true memories and the artificial ones implanted by Alex's dream manipulation.\n\nComplications arise when a mysterious government agency takes notice of Alex's unique abilities. They offer Alex a chance to use their gift for \"the greater good,\" hinting at applications in therapy, criminal rehabilitation, and even national security. Simultaneously, an underground resistance movement reaches out, warning Alex about the dangers of dream manipulation and the potential for mass control and exploitation. Caught between these opposing forces, Alex must navigate a complex web of ethical dilemmas. They grapple with questions of free will, the nature of consciousness, and the responsibility that comes with having power over people's minds. As the consequences of their actions spiral outward, affecting the lives of loved ones and strangers alike, Alex is forced to confront the true nature of their ability and decide how—or if—it should be used.\n\nThe story explores themes of identity, the subconscious mind, the ethics of technology, and the power of imagination. It delves into the potential consequences of a world where our most private thoughts and experiences are no longer truly our own, and examines the fine line between helping others and manipulating them for personal gain or a perceived greater good. The narrative further expands on the societal implications of such abilities, questioning the moral boundaries of altering consciousness and the potential for abuse in a world where dreams can be commodified. It challenges the reader to consider the impact of technology on personal autonomy and the ethical responsibilities of those who wield such power.\n\nAs Alex's journey unfolds, they encounter various individuals whose lives have been touched by their dream manipulations, each presenting a unique perspective on the ethical quandaries at hand. From a classmate who gains newfound confidence to a wealthy client who becomes addicted to the dreamscapes, the ripple effects of Alex's actions are profound and far-reaching. The government agency's interest in Alex's abilities raises questions about the potential for state control and surveillance, while the resistance movement highlights the dangers of unchecked power and the importance of safeguarding individual freedoms.\n\nUltimately, Alex's story is one of self-discovery and moral reckoning, as they must decide whether to embrace their abilities for personal gain, align with the government's vision of a controlled utopia, or join the resistance in their fight for freedom and autonomy. The narrative invites readers to reflect on the nature of reality, the boundaries of human experience, and the ethical implications of a world where dreams are no longer private sanctuaries but shared and manipulated commodities. It also explores the psychological impact on Alex, who must deal with the burden of knowing the intimate fears and desires of others, and the isolation that comes from being unable to share their own dreams without altering them.\n\nThe story further examines the technological advancements that have made dream manipulation possible, questioning the role of innovation in society and the potential for both progress and peril. It considers the societal divide between those who can afford to buy enhanced dream experiences and those who cannot, highlighting issues of inequality and access. As Alex becomes more entangled in the web of their own making, they must confront the possibility that their actions could lead to unintended consequences, not just for themselves but for the fabric of society as a whole.\n\nIn the end, Alex's journey is a cautionary tale about the power of dreams and the responsibilities that come with wielding such influence. It serves as a reminder of the importance of ethical considerations in the face of technological advancement and the need to balance innovation with humanity. The story leaves readers pondering the true cost of a world where dreams are no longer sacred, and the potential for both wonder and danger in the uncharted territories of the mind. But it's also a story about the power of imagination and the potential for change, even in a world where our deepest thoughts are no longer our own. And it's a story about the power of choice, and the importance of fighting for the freedom to dream.\n\nIn conclusion, this story is a reflection on the power of dreams and the responsibilities that come with wielding such influence. It serves as a reminder of the importance of ethical considerations in the face of technological advancement and the need to balance innovation with humanity. The story leaves readers pondering the true cost of a world where dreams are no longer sacred, and the potential for both wonder and danger in the uncharted territories of the mind. But it's also a story about the power of imagination and the potential for change, even in a world where our deepest thoughts are no longer our own. And it's a story about the power of choice, and the importance of fighting for the freedom to dream.\n    \"#\n    not_cached #\"\n      hello world\n    \"#\n  }\n}",
diff --git a/integ-tests/python/baml_client/parser.py b/integ-tests/python/baml_client/parser.py
index 1648a18ae4..b009c62af4 100644
--- a/integ-tests/python/baml_client/parser.py
+++ b/integ-tests/python/baml_client/parser.py
@@ -1104,6 +1104,18 @@ def TestOpenAI(
         result = self.__options.merge_options(baml_options).parse_response(function_name="TestOpenAI", llm_response=llm_response, mode="request")
         return typing.cast(str, result)
 
+    def TestOpenAIConcurrencyClientEnvBaseUrl(
+        self, llm_response: str, baml_options: BamlCallOptions = {},
+    ) -> str:
+        result = self.__options.merge_options(baml_options).parse_response(function_name="TestOpenAIConcurrencyClientEnvBaseUrl", llm_response=llm_response, mode="request")
+        return typing.cast(str, result)
+
+    def TestOpenAIConcurrencyClientHardocodedBaseUrl(
+        self, llm_response: str, baml_options: BamlCallOptions = {},
+    ) -> str:
+        result = self.__options.merge_options(baml_options).parse_response(function_name="TestOpenAIConcurrencyClientHardocodedBaseUrl", llm_response=llm_response, mode="request")
+        return typing.cast(str, result)
+
     def TestOpenAIDummyClient(
         self, llm_response: str, baml_options: BamlCallOptions = {},
     ) -> str:
@@ -2648,6 +2660,18 @@ def TestOpenAI(
         result = self.__options.merge_options(baml_options).parse_response(function_name="TestOpenAI", llm_response=llm_response, mode="stream")
         return typing.cast(str, result)
 
+    def TestOpenAIConcurrencyClientEnvBaseUrl(
+        self, llm_response: str, baml_options: BamlCallOptions = {},
+    ) -> str:
+        result = self.__options.merge_options(baml_options).parse_response(function_name="TestOpenAIConcurrencyClientEnvBaseUrl", llm_response=llm_response, mode="stream")
+        return typing.cast(str, result)
+
+    def TestOpenAIConcurrencyClientHardocodedBaseUrl(
+        self, llm_response: str, baml_options: BamlCallOptions = {},
+    ) -> str:
+        result = self.__options.merge_options(baml_options).parse_response(function_name="TestOpenAIConcurrencyClientHardocodedBaseUrl", llm_response=llm_response, mode="stream")
+        return typing.cast(str, result)
+
     def TestOpenAIDummyClient(
         self, llm_response: str, baml_options: BamlCallOptions = {},
     ) -> str:
diff --git a/integ-tests/python/baml_client/sync_client.py b/integ-tests/python/baml_client/sync_client.py
index 3b50d6c408..bd9a417e09 100644
--- a/integ-tests/python/baml_client/sync_client.py
+++ b/integ-tests/python/baml_client/sync_client.py
@@ -2611,6 +2611,34 @@ def TestOpenAI(self, input: str,
                 "input": input,
             })
             return typing.cast(str, result.cast_to(types, types, stream_types, False, __runtime__))
+    def TestOpenAIConcurrencyClientEnvBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> str:
+        # Check if on_tick is provided
+        if 'on_tick' in baml_options:
+            stream = self.stream.TestOpenAIConcurrencyClientEnvBaseUrl(input=input,
+                baml_options=baml_options)
+            return stream.get_final_response()
+        else:
+            # Original non-streaming code
+            result = self.__options.merge_options(baml_options).call_function_sync(function_name="TestOpenAIConcurrencyClientEnvBaseUrl", args={
+                "input": input,
+            })
+            return typing.cast(str, result.cast_to(types, types, stream_types, False, __runtime__))
+    def TestOpenAIConcurrencyClientHardocodedBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> str:
+        # Check if on_tick is provided
+        if 'on_tick' in baml_options:
+            stream = self.stream.TestOpenAIConcurrencyClientHardocodedBaseUrl(input=input,
+                baml_options=baml_options)
+            return stream.get_final_response()
+        else:
+            # Original non-streaming code
+            result = self.__options.merge_options(baml_options).call_function_sync(function_name="TestOpenAIConcurrencyClientHardocodedBaseUrl", args={
+                "input": input,
+            })
+            return typing.cast(str, result.cast_to(types, types, stream_types, False, __runtime__))
     def TestOpenAIDummyClient(self, input: str,
         baml_options: BamlCallOptions = {},
     ) -> str:
@@ -5844,6 +5872,30 @@ def TestOpenAI(self, input: str,
           lambda x: typing.cast(str, x.cast_to(types, types, stream_types, False, __runtime__)),
           ctx,
         )
+    def TestOpenAIConcurrencyClientEnvBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.BamlSyncStream[str, str]:
+        ctx, result = self.__options.merge_options(baml_options).create_sync_stream(function_name="TestOpenAIConcurrencyClientEnvBaseUrl", args={
+            "input": input,
+        })
+        return baml_py.BamlSyncStream[str, str](
+          result,
+          lambda x: typing.cast(str, x.cast_to(types, types, stream_types, True, __runtime__)),
+          lambda x: typing.cast(str, x.cast_to(types, types, stream_types, False, __runtime__)),
+          ctx,
+        )
+    def TestOpenAIConcurrencyClientHardocodedBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.BamlSyncStream[str, str]:
+        ctx, result = self.__options.merge_options(baml_options).create_sync_stream(function_name="TestOpenAIConcurrencyClientHardocodedBaseUrl", args={
+            "input": input,
+        })
+        return baml_py.BamlSyncStream[str, str](
+          result,
+          lambda x: typing.cast(str, x.cast_to(types, types, stream_types, True, __runtime__)),
+          lambda x: typing.cast(str, x.cast_to(types, types, stream_types, False, __runtime__)),
+          ctx,
+        )
     def TestOpenAIDummyClient(self, input: str,
         baml_options: BamlCallOptions = {},
     ) -> baml_py.BamlSyncStream[str, str]:
@@ -8024,6 +8076,20 @@ def TestOpenAI(self, input: str,
             "input": input,
         }, mode="request")
         return result
+    def TestOpenAIConcurrencyClientEnvBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.baml_py.HTTPRequest:
+        result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="TestOpenAIConcurrencyClientEnvBaseUrl", args={
+            "input": input,
+        }, mode="request")
+        return result
+    def TestOpenAIConcurrencyClientHardocodedBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.baml_py.HTTPRequest:
+        result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="TestOpenAIConcurrencyClientHardocodedBaseUrl", args={
+            "input": input,
+        }, mode="request")
+        return result
     def TestOpenAIDummyClient(self, input: str,
         baml_options: BamlCallOptions = {},
     ) -> baml_py.baml_py.HTTPRequest:
@@ -9824,6 +9890,20 @@ def TestOpenAI(self, input: str,
             "input": input,
         }, mode="stream")
         return result
+    def TestOpenAIConcurrencyClientEnvBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.baml_py.HTTPRequest:
+        result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="TestOpenAIConcurrencyClientEnvBaseUrl", args={
+            "input": input,
+        }, mode="stream")
+        return result
+    def TestOpenAIConcurrencyClientHardocodedBaseUrl(self, input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.baml_py.HTTPRequest:
+        result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="TestOpenAIConcurrencyClientHardocodedBaseUrl", args={
+            "input": input,
+        }, mode="stream")
+        return result
     def TestOpenAIDummyClient(self, input: str,
         baml_options: BamlCallOptions = {},
     ) -> baml_py.baml_py.HTTPRequest:
diff --git a/integ-tests/python/run_tests.sh b/integ-tests/python/run_tests.sh
index 3fab89dacd..af2981d78f 100755
--- a/integ-tests/python/run_tests.sh
+++ b/integ-tests/python/run_tests.sh
@@ -22,3 +22,4 @@ uv run pytest "$@" \
     --ignore=tests/test_ontick.py \
     --ignore=tests/test_abort_handlers.py \
     --ignore=tests/test_abort_handlers_simple.py \
+    --ignore=tests/test_connection_pool.py \
diff --git a/integ-tests/python/tests/test_connection_pool.py b/integ-tests/python/tests/test_connection_pool.py
new file mode 100644
index 0000000000..e99143fa1c
--- /dev/null
+++ b/integ-tests/python/tests/test_connection_pool.py
@@ -0,0 +1,262 @@
+# Tests to repro this issue: https://github.com/BoundaryML/baml/issues/2594
+#
+# The code here is not able to reproduce the issue, it suggests that concurrency
+# actually works as expected. The idea is:
+#
+# 1. Start the Node server in concurrent_server.js that receives a --latency
+#    flag and responds to any incoming request in that amount of time.
+#
+# 2. Send 30 requests concurrently from the Python client. If there was a
+#    problem with the client, the total duration to get all the responses should
+#    be much longer than the latency of a single response.
+#
+# That should prove that concurrency either works or does not.
+#
+# According to the Github issue, the first 6 requests run sequentially, so the
+# total duration of the asyncio.gather() call should be at least 6 times the
+# latency of a single response. But so far no luck in reproducing the bug.
+
+import asyncio
+import contextlib
+import os
+import pathlib
+import shutil
+import socket
+import time
+
+from baml_py import ClientRegistry
+import pytest
+import pytest_asyncio
+
+from baml_client import b
+
+
+# These are hardcoded in the Baml client. Can't be fully dynamic because the
+# bug is present when URLs are written as strings instead of using the env var.
+HOST = "127.0.0.1"
+PORT = 9876
+
+
+# For dynamic clients we can use ports not hardcoded in the Baml client.
+def find_free_port():
+    with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
+        s.bind(("127.0.0.1", 0))
+        return s.getsockname()[1]
+
+
+def port_is_free(host: str, port: int) -> bool:
+    with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
+        s.settimeout(0.25)
+        return s.connect_ex((host, port)) != 0
+
+
+
+async def wait_for_port(host: str, port: int, timeout_s: float = 15.0):
+    deadline = asyncio.get_running_loop().time() + timeout_s
+    while True:
+        try:
+            reader, writer = await asyncio.open_connection(host, port)
+            writer.close()
+            try:
+                await writer.wait_closed()
+            except Exception:
+                pass
+            return
+        except Exception:
+            if asyncio.get_running_loop().time() > deadline:
+                raise RuntimeError(f"Port {host}:{port} did not open in time")
+            await asyncio.sleep(0.05)
+
+
+async def http_health(host: str, port: int, timeout_s: float = 2.0):
+    deadline = asyncio.get_running_loop().time() + timeout_s
+
+    request = (
+        "GET /health HTTP/1.1\r\n"
+        f"Host: {host}:{port}\r\n"
+        "Connection: close\r\n"
+        "\r\n"
+    )
+
+    while True:
+        try:
+            reader, writer = await asyncio.open_connection(host, port)
+            writer.write(request.encode("ascii"))
+            await writer.drain()
+            data = await reader.read(-1)
+            writer.close()
+            try:
+                await writer.wait_closed()
+            except Exception:
+                pass
+            if b" 200 " in data:
+                return True
+        except Exception:
+            pass
+
+        if asyncio.get_running_loop().time() > deadline:
+            return False
+        await asyncio.sleep(0.05)
+
+
+async def terminate_process(proc: asyncio.subprocess.Process):
+    if proc.returncode is None:
+        try:
+            proc.terminate()
+        except ProcessLookupError:
+            return
+        try:
+            await asyncio.wait_for(proc.wait(), timeout=3)
+        except asyncio.TimeoutError:
+            try:
+                proc.kill()
+            except ProcessLookupError:
+                pass
+
+
+async def read_stdout(proc: asyncio.subprocess.Process, buf: list[str]):
+    if not proc.stdout:
+        return
+    try:
+        while True:
+            line = await proc.stdout.readline()
+            if not line:
+                break
+            try:
+                buf.append(line.decode("utf-8", errors="ignore"))
+            except Exception:
+                buf.append(repr(line))
+    except Exception:
+        pass
+
+
+@contextlib.asynccontextmanager
+async def start_concurrency_test_server(latency: int):
+    server_js_path = pathlib.Path(__file__).parent.parent.parent / "common" / "concurrent_server.js"
+    if not server_js_path.exists():
+        raise FileNotFoundError(f"Server script not found: {server_js_path}")
+
+    node_bin = shutil.which("node") or shutil.which("nodejs")
+    if not node_bin:
+        raise RuntimeError("Cannot find 'node' or 'nodejs' on PATH")
+
+    if not port_is_free(HOST, PORT):
+        raise RuntimeError(f"Concurrency test expects Port {HOST}:{PORT} to be free but it is not")
+
+    # In case we need additional logic.
+    host = HOST
+    port = PORT
+
+    cmd = [node_bin, str(server_js_path), "--host", host, "--port", str(port), "--latency", str(latency)]
+
+    base_url = f"http://{host}:{port}/v1/"
+
+    os.environ["OPENAI_CONCURRENCY_TEST_BASE_URL"] = base_url
+    env = os.environ.copy()
+
+    proc = await asyncio.create_subprocess_exec(
+        *cmd,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.STDOUT,
+        cwd=os.getcwd(),
+        env=env,
+    )
+
+    log_buf: list[str] = []
+    proc_stdout_task = asyncio.create_task(read_stdout(proc, log_buf))
+
+    try:
+        await wait_for_port(host, port, timeout_s=15.0)
+        await http_health(host, port, timeout_s=2.0)
+    except Exception as e:
+        await terminate_process(proc)
+
+        try:
+            await asyncio.wait_for(proc_stdout_task, timeout=0.3)
+        except Exception:
+            pass
+
+        logs = "".join(log_buf)
+
+        raise RuntimeError(f"Failed to start Node server: {e}\n--- server output ---\n{logs}")
+
+    try:
+        yield base_url
+    finally:
+        await terminate_process(proc)
+
+        try:
+            await asyncio.wait_for(proc_stdout_task, timeout=0.3)
+        except Exception:
+            pass
+
+        logs = "".join(log_buf)
+        print(f"--- Concurrency Test Server Output ---\n{logs}")
+
+        try:
+            await asyncio.wait_for(proc_stdout_task, timeout=0.5)
+        except Exception:
+            pass
+
+
+@pytest_asyncio.fixture(scope="module")
+async def concurrency_server_url():
+    async with start_concurrency_test_server(LATENCY_MS) as base_url:
+        yield base_url
+
+
+# Times the server takes to process one request.
+LATENCY_MS = 500
+
+# How many requests to make.
+NUM_REQUESTS = 30
+
+# Allow some extra time per request for scheduling / OS overhead.
+ALLOWED_DEVIATION_MS = 3 * NUM_REQUESTS
+
+# Expected duration in milliseconds of all concurrent requests.
+EXPECTED_DURATION_MS = LATENCY_MS + ALLOWED_DEVIATION_MS
+
+
+async def assert_completes_in_time(baml_requests: list[asyncio.Future]):
+    start_time = time.perf_counter()
+    timeout_s = max(5.0, (EXPECTED_DURATION_MS / 1000.0) + 2.0)
+
+    results = await asyncio.wait_for(asyncio.gather(*baml_requests), timeout=timeout_s)
+
+    duration_ms = (time.perf_counter() - start_time) * 1000.0
+
+    assert len(results) == NUM_REQUESTS
+
+    assert duration_ms <= EXPECTED_DURATION_MS, (
+        f"Expected duration <= {EXPECTED_DURATION_MS} ms but got {duration_ms:.2f} ms; "
+        f"requests may not be running concurrently."
+    )
+
+
+@pytest.mark.asyncio
+async def test_openai_concurrency_client_hardcoded_base_url(concurrency_server_url: str):
+    requests = [b.TestOpenAIConcurrencyClientHardocodedBaseUrl("test") for _ in range(NUM_REQUESTS)]
+
+    await assert_completes_in_time(requests)
+
+
+@pytest.mark.asyncio
+async def test_openai_concurrency_client_env_var_base_url(concurrency_server_url: str):
+    requests = [b.TestOpenAIConcurrencyClientEnvBaseUrl("test") for _ in range(NUM_REQUESTS)]
+
+    await assert_completes_in_time(requests)
+
+
+@pytest.mark.asyncio
+async def test_openai_concurrency_client_registry(concurrency_server_url: str):
+    cr = ClientRegistry()
+    cr.add_llm_client("ConcurrencyTestClient", "openai-generic", {
+        "model": "concurrency-test",
+        "base_url": concurrency_server_url,
+    })
+    cr.set_primary("ConcurrencyTestClient")
+
+    requests = [b.TestOpenAI("test", {"client_registry": cr}) for _ in range(NUM_REQUESTS)]
+
+    await assert_completes_in_time(requests)
\ No newline at end of file
diff --git a/integ-tests/react/baml_client/async_client.ts b/integ-tests/react/baml_client/async_client.ts
index b96a94c533..259a0f169a 100644
--- a/integ-tests/react/baml_client/async_client.ts
+++ b/integ-tests/react/baml_client/async_client.ts
@@ -8736,6 +8736,102 @@ export type RecursivePartialNull<T> = MovedRecursivePartialNull<T>
             }
             }
             
+        async TestOpenAIConcurrencyClientEnvBaseUrl(
+        input: string,
+        __baml_options__?: BamlCallOptions<never>
+        ): Promise<string> {
+          try {
+          const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+          const signal = options.signal;
+
+          if (signal?.aborted) {
+          throw new BamlAbortError('Operation was aborted', signal.reason);
+          }
+
+          // Check if onTick is provided - route through streaming if so
+          if (options.onTick) {
+          const stream = this.stream.TestOpenAIConcurrencyClientEnvBaseUrl(
+          input,
+          __baml_options__
+          );
+
+          return await stream.getFinalResponse();
+          }
+
+          const collector = options.collector ? (Array.isArray(options.collector) ? options.collector :
+          [options.collector]) : [];
+          const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+          const env: Record<string, string> = Object.fromEntries(
+            Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+            );
+            const raw = await this.runtime.callFunction(
+            "TestOpenAIConcurrencyClientEnvBaseUrl",
+            {
+            "input": input
+            },
+            this.ctxManager.cloneContext(),
+            options.tb?.__tb(),
+            options.clientRegistry,
+            collector,
+            options.tags || {},
+            env,
+            signal,
+            options.events,
+            )
+            return raw.parsed(false) as string
+            } catch (error) {
+            throw toBamlError(error);
+            }
+            }
+            
+        async TestOpenAIConcurrencyClientHardocodedBaseUrl(
+        input: string,
+        __baml_options__?: BamlCallOptions<never>
+        ): Promise<string> {
+          try {
+          const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+          const signal = options.signal;
+
+          if (signal?.aborted) {
+          throw new BamlAbortError('Operation was aborted', signal.reason);
+          }
+
+          // Check if onTick is provided - route through streaming if so
+          if (options.onTick) {
+          const stream = this.stream.TestOpenAIConcurrencyClientHardocodedBaseUrl(
+          input,
+          __baml_options__
+          );
+
+          return await stream.getFinalResponse();
+          }
+
+          const collector = options.collector ? (Array.isArray(options.collector) ? options.collector :
+          [options.collector]) : [];
+          const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+          const env: Record<string, string> = Object.fromEntries(
+            Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+            );
+            const raw = await this.runtime.callFunction(
+            "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+            {
+            "input": input
+            },
+            this.ctxManager.cloneContext(),
+            options.tb?.__tb(),
+            options.clientRegistry,
+            collector,
+            options.tags || {},
+            env,
+            signal,
+            options.events,
+            )
+            return raw.parsed(false) as string
+            } catch (error) {
+            throw toBamlError(error);
+            }
+            }
+            
         async TestOpenAIDummyClient(
         input: string,
         __baml_options__?: BamlCallOptions<never>
@@ -24278,6 +24374,138 @@ export type RecursivePartialNull<T> = MovedRecursivePartialNull<T>
                   }
                   }
                   
+            TestOpenAIConcurrencyClientEnvBaseUrl(
+            input: string,
+            __baml_options__?: BamlCallOptions<never>
+            ): BamlStream<string, string>
+              {
+              try {
+              const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+              const signal = options.signal;
+
+              if (signal?.aborted) {
+              throw new BamlAbortError('Operation was aborted', signal.reason);
+              }
+
+              let collector = options.collector ? (Array.isArray(options.collector) ? options.collector :
+              [options.collector]) : [];
+
+              let onTickWrapper: (() => void) | undefined;
+
+              // Create collector and wrap onTick if provided
+              if (options.onTick) {
+              const tickCollector = new Collector("on-tick-collector");
+              collector = [...collector, tickCollector];
+
+              onTickWrapper = () => {
+              const log = tickCollector.last;
+              if (log) {
+              try {
+              options.onTick!("Unknown", log);
+              } catch (error) {
+              console.error("Error in onTick callback for TestOpenAIConcurrencyClientEnvBaseUrl", error);
+              }
+              }
+              };
+              }
+
+              const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+              const env: Record<string, string> = Object.fromEntries(
+                Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+                );
+                const raw = this.runtime.streamFunction(
+                "TestOpenAIConcurrencyClientEnvBaseUrl",
+                {
+                "input": input
+                },
+                undefined,
+                this.ctxManager.cloneContext(),
+                options.tb?.__tb(),
+                options.clientRegistry,
+                collector,
+                options.tags || {},
+                env,
+                signal,
+                onTickWrapper,
+                )
+                return new BamlStream<string, string>(
+                  raw,
+                  (a): string => a,
+                  (a): string => a,
+                  this.ctxManager.cloneContext(),
+                  options.signal,
+                  )
+                  } catch (error) {
+                  throw toBamlError(error);
+                  }
+                  }
+                  
+            TestOpenAIConcurrencyClientHardocodedBaseUrl(
+            input: string,
+            __baml_options__?: BamlCallOptions<never>
+            ): BamlStream<string, string>
+              {
+              try {
+              const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+              const signal = options.signal;
+
+              if (signal?.aborted) {
+              throw new BamlAbortError('Operation was aborted', signal.reason);
+              }
+
+              let collector = options.collector ? (Array.isArray(options.collector) ? options.collector :
+              [options.collector]) : [];
+
+              let onTickWrapper: (() => void) | undefined;
+
+              // Create collector and wrap onTick if provided
+              if (options.onTick) {
+              const tickCollector = new Collector("on-tick-collector");
+              collector = [...collector, tickCollector];
+
+              onTickWrapper = () => {
+              const log = tickCollector.last;
+              if (log) {
+              try {
+              options.onTick!("Unknown", log);
+              } catch (error) {
+              console.error("Error in onTick callback for TestOpenAIConcurrencyClientHardocodedBaseUrl", error);
+              }
+              }
+              };
+              }
+
+              const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+              const env: Record<string, string> = Object.fromEntries(
+                Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+                );
+                const raw = this.runtime.streamFunction(
+                "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+                {
+                "input": input
+                },
+                undefined,
+                this.ctxManager.cloneContext(),
+                options.tb?.__tb(),
+                options.clientRegistry,
+                collector,
+                options.tags || {},
+                env,
+                signal,
+                onTickWrapper,
+                )
+                return new BamlStream<string, string>(
+                  raw,
+                  (a): string => a,
+                  (a): string => a,
+                  this.ctxManager.cloneContext(),
+                  options.signal,
+                  )
+                  } catch (error) {
+                  throw toBamlError(error);
+                  }
+                  }
+                  
             TestOpenAIDummyClient(
             input: string,
             __baml_options__?: BamlCallOptions<never>
diff --git a/integ-tests/react/baml_client/async_request.ts b/integ-tests/react/baml_client/async_request.ts
index a04b4ec23c..d0e6619f92 100644
--- a/integ-tests/react/baml_client/async_request.ts
+++ b/integ-tests/react/baml_client/async_request.ts
@@ -4541,6 +4541,56 @@ env?: Record<string, string | undefined>
       }
       }
       
+  async TestOpenAIConcurrencyClientEnvBaseUrl(
+  input: string,
+  __baml_options__?: BamlCallOptions<never>
+  ): Promise<HTTPRequest> {
+    try {
+    const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+    const env: Record<string, string> = Object.fromEntries(
+      Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return await this.runtime.buildRequest(
+      "TestOpenAIConcurrencyClientEnvBaseUrl",
+      {
+      "input": input
+      },
+      this.ctxManager.cloneContext(),
+      __baml_options__?.tb?.__tb(),
+      __baml_options__?.clientRegistry,
+      false,
+      env
+      )
+      } catch (error) {
+      throw toBamlError(error);
+      }
+      }
+      
+  async TestOpenAIConcurrencyClientHardocodedBaseUrl(
+  input: string,
+  __baml_options__?: BamlCallOptions<never>
+  ): Promise<HTTPRequest> {
+    try {
+    const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+    const env: Record<string, string> = Object.fromEntries(
+      Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return await this.runtime.buildRequest(
+      "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+      {
+      "input": input
+      },
+      this.ctxManager.cloneContext(),
+      __baml_options__?.tb?.__tb(),
+      __baml_options__?.clientRegistry,
+      false,
+      env
+      )
+      } catch (error) {
+      throw toBamlError(error);
+      }
+      }
+      
   async TestOpenAIDummyClient(
   input: string,
   __baml_options__?: BamlCallOptions<never>
@@ -10947,6 +10997,56 @@ env?: Record<string, string | undefined>
           }
           }
           
+      async TestOpenAIConcurrencyClientEnvBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+      ): Promise<HTTPRequest> {
+        try {
+        const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+        const env: Record<string, string> = Object.fromEntries(
+          Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+          );
+          return await this.runtime.buildRequest(
+          "TestOpenAIConcurrencyClientEnvBaseUrl",
+          {
+          "input": input
+          },
+          this.ctxManager.cloneContext(),
+          __baml_options__?.tb?.__tb(),
+          __baml_options__?.clientRegistry,
+          true,
+          env
+          )
+          } catch (error) {
+          throw toBamlError(error);
+          }
+          }
+          
+      async TestOpenAIConcurrencyClientHardocodedBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+      ): Promise<HTTPRequest> {
+        try {
+        const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+        const env: Record<string, string> = Object.fromEntries(
+          Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+          );
+          return await this.runtime.buildRequest(
+          "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+          {
+          "input": input
+          },
+          this.ctxManager.cloneContext(),
+          __baml_options__?.tb?.__tb(),
+          __baml_options__?.clientRegistry,
+          true,
+          env
+          )
+          } catch (error) {
+          throw toBamlError(error);
+          }
+          }
+          
       async TestOpenAIDummyClient(
       input: string,
       __baml_options__?: BamlCallOptions<never>
diff --git a/integ-tests/react/baml_client/inlinedbaml.ts b/integ-tests/react/baml_client/inlinedbaml.ts
index 467be2794b..f3a78048ff 100644
--- a/integ-tests/react/baml_client/inlinedbaml.ts
+++ b/integ-tests/react/baml_client/inlinedbaml.ts
@@ -20,7 +20,7 @@ $ pnpm add @boundaryml/baml
 
 const fileMap = {
   
-  "clients.baml": "retry_policy Bar {\n  max_retries 3\n  strategy {\n    type exponential_backoff\n  }\n}\n\nretry_policy Foo {\n  max_retries 3\n  strategy {\n    type constant_delay\n    delay_ms 100\n  }\n}\n\nclient<llm> GPT4 {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> GPT4o {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// For integ tests\nclient<llm> GPT4oBaseUrlNotSet {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n    base_url env.OPEN_API_BASE_DO_NOT_SET_THIS\n  }\n}\n\n\nclient<llm> GPT4Turbo {\n  retry_policy Bar\n  provider openai\n  options {\n    model gpt-4-turbo\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nretry_policy GPT4oRetry {\n  max_retries 2\n  strategy {\n    type exponential_backoff\n  }\n}\n\nclient<llm> GPT35 {\n  provider openai\n  retry_policy GPT4oRetry\n  options {\n    model \"gpt-4o-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> GPT35LegacyProvider {\n  provider openai\n  options {\n    model \"gpt-3.5-turbo\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> Ollama {\n  provider ollama\n  options {\n    model llama3.1\n  }\n}\n\nclient<llm> GPT35Azure {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    // base_url \"https://west-us-azure-baml.openai.azure.com/openai/deployments/gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\n// Azure O1 client without max_tokens (should not add default)\nclient<llm> AzureO1 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O1 client with explicit max_tokens (should keep user value)\nclient<llm> AzureO1WithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> AzureO1WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// Azure GPT-35 client with explicit max_tokens (should keep user value)\nclient<llm> GPT35AzureWithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure client with invalid resource name (for testing failures)\nclient<llm> GPT35AzureFailed {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml-incorrect-suffix\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\nclient<llm> Gemini {\n  provider google-ai\n  options {\n    model gemini-2.5-flash\n    api_key env.GOOGLE_API_KEY\n    safetySettings {\n      category HARM_CATEGORY_HATE_SPEECH\n      threshold BLOCK_LOW_AND_ABOVE\n    }\n  }\n}\n\nclient<llm> Gemini25ProThinking {\n  provider google-ai\n  retry_policy Constant\n  options {\n    model \"gemini-2.5-pro\"\n    api_key env.GOOGLE_API_KEY\n    generationConfig {\n      thinkingConfig {\n          thinkingBudget 1024\n          includeThoughts true\n      }\n    }\n  }\n}\n\nclient<llm> GeminiOpenAiGeneric {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://generativelanguage.googleapis.com/v1beta/\"\n    model \"gemini-2.5-flash\"\n    api_key env.GOOGLE_API_KEY\n  }\n}\n\nclient<llm> Vertex {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> VertexWithQueryParams {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    project_id gloo-ai\n    query_params {\n      key env.VERTEX_API_KEY\n    }\n  }\n}\n\nclient<llm> VertexClaude {\n  provider vertex-ai\n  options {\n    model \"claude-3-5-sonnet@20240620\"\n    location us-east5\n    anthropic_version \"vertex-2023-10-16\"\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> AwsBedrock {\n  provider aws-bedrock\n  options {\n    inference_configuration {\n      max_tokens 2048\n    }\n    // max_tokens 100000\n    // max_completion_tokens 100000\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    //model \"arn:aws:bedrock:us-east-1:404337120808:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-east-1.amazonaws.com/\"\n    // region \"us-east-1\"\n    // access_key_id env.AWS_ACCESS_KEY_ID\n    // secret_access_key env.AWS_SECRET_ACCESS_KEY\n    // session_token env.AWS_SESSION_TOKEN\n    // session_token null\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidRegion {\n  provider aws-bedrock\n  options {\n    region \"us-invalid-7\"\n    inference_configuration {\n      max_tokens 100\n    }\n    // model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidEndpoint {\n  provider aws-bedrock\n  options {\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-doenotexist-3.amazonaws.com\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidAccessKey {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidProfile {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    profile \"boundaryml-dev-invalid\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidSessionToken {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\n\nclient<llm> Invalid{\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> Sonnet {\n  provider anthropic\n  options {\n    model claude-3-5-sonnet-20241022\n    api_key env.ANTHROPIC_API_KEY\n  }\n}\n\n\nclient<llm> SonnetThinking {\n  provider anthropic\n  options {\n    model \"claude-3-7-sonnet-20250219\"\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 2048\n    thinking {\n      type \"enabled\"\n      budget_tokens 1024\n    }\n  }\n}\n\nclient<llm> Claude {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> ClaudeWithCaching {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 500\n    allowed_role_metadata [\"cache_control\"]\n    headers {\n      \"anthropic-beta\" \"prompt-caching-2024-07-31\"\n    }\n  }\n}\n\nclient<llm> Resilient_SimpleSyntax {\n  retry_policy Foo\n  provider baml-fallback\n  options {\n    strategy [\n      GPT4Turbo\n      GPT35\n      Lottery_SimpleSyntax\n    ]\n  }\n}\n\nclient<llm> Lottery_SimpleSyntax {\n  provider baml-round-robin\n  options {\n    start 0\n    strategy [\n      Claude\n      GPT35\n    ]\n  }\n}\n\nclient<llm> TogetherAi {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://api.together.ai/v1\"\n    api_key env.TOGETHER_API_KEY\n    model \"meta-llama/Llama-3-70b-chat-hf\"\n  }\n}\n\n// OpenAI O1 client without max_tokens (should not add default)\nclient<llm> OpenAIO1 {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// OpenAI O1 client with explicit max_tokens (should fail)\nclient<llm> OpenAIO1WithMaxTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// OpenAI O1 client with explicit max_completion_tokens\nclient<llm> OpenAIO1WithMaxCompletionTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// OpenAI GPT-4 client with explicit max_tokens\nclient<llm> GPT4WithMaxTokens {\n  provider openai\n  options {\n    model \"gpt-4\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure O3 client without max_tokens (should not add default)\nclient<llm> AzureO3 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O3 client with explicit max_completion_tokens\nclient<llm> AzureO3WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n",
+  "clients.baml": "retry_policy Bar {\n  max_retries 3\n  strategy {\n    type exponential_backoff\n  }\n}\n\nretry_policy Foo {\n  max_retries 3\n  strategy {\n    type constant_delay\n    delay_ms 100\n  }\n}\n\nclient<llm> GPT4 {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> GPT4o {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// For integ tests\nclient<llm> GPT4oBaseUrlNotSet {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n    base_url env.OPEN_API_BASE_DO_NOT_SET_THIS\n  }\n}\n\n\nclient<llm> GPT4Turbo {\n  retry_policy Bar\n  provider openai\n  options {\n    model gpt-4-turbo\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nretry_policy GPT4oRetry {\n  max_retries 2\n  strategy {\n    type exponential_backoff\n  }\n}\n\nclient<llm> GPT35 {\n  provider openai\n  retry_policy GPT4oRetry\n  options {\n    model \"gpt-4o-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> GPT35LegacyProvider {\n  provider openai\n  options {\n    model \"gpt-3.5-turbo\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> OpenAIConcurrencyTestClientEnvBaseUrl {\n  provider openai-generic\n  options {\n    base_url env.OPENAI_CONCURRENCY_TEST_BASE_URL\n    model \"concurrency-test\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> OpenAIConcurrencyTestClientHardocodedBaseUrl {\n  provider openai-generic\n  options {\n    base_url \"http://127.0.0.1:9876/v1/\"\n    model \"concurrency-test\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> Ollama {\n  provider ollama\n  options {\n    model llama3.1\n  }\n}\n\nclient<llm> GPT35Azure {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    // base_url \"https://west-us-azure-baml.openai.azure.com/openai/deployments/gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\n// Azure O1 client without max_tokens (should not add default)\nclient<llm> AzureO1 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O1 client with explicit max_tokens (should keep user value)\nclient<llm> AzureO1WithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> AzureO1WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// Azure GPT-35 client with explicit max_tokens (should keep user value)\nclient<llm> GPT35AzureWithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure client with invalid resource name (for testing failures)\nclient<llm> GPT35AzureFailed {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml-incorrect-suffix\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\nclient<llm> Gemini {\n  provider google-ai\n  options {\n    model gemini-2.5-flash\n    api_key env.GOOGLE_API_KEY\n    safetySettings {\n      category HARM_CATEGORY_HATE_SPEECH\n      threshold BLOCK_LOW_AND_ABOVE\n    }\n  }\n}\n\nclient<llm> Gemini25ProThinking {\n  provider google-ai\n  retry_policy Constant\n  options {\n    model \"gemini-2.5-pro\"\n    api_key env.GOOGLE_API_KEY\n    generationConfig {\n      thinkingConfig {\n          thinkingBudget 1024\n          includeThoughts true\n      }\n    }\n  }\n}\n\nclient<llm> GeminiOpenAiGeneric {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://generativelanguage.googleapis.com/v1beta/\"\n    model \"gemini-2.5-flash\"\n    api_key env.GOOGLE_API_KEY\n  }\n}\n\nclient<llm> Vertex {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> VertexWithQueryParams {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    project_id gloo-ai\n    query_params {\n      key env.VERTEX_API_KEY\n    }\n  }\n}\n\nclient<llm> VertexClaude {\n  provider vertex-ai\n  options {\n    model \"claude-3-5-sonnet@20240620\"\n    location us-east5\n    anthropic_version \"vertex-2023-10-16\"\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> AwsBedrock {\n  provider aws-bedrock\n  options {\n    inference_configuration {\n      max_tokens 2048\n    }\n    // max_tokens 100000\n    // max_completion_tokens 100000\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    //model \"arn:aws:bedrock:us-east-1:404337120808:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-east-1.amazonaws.com/\"\n    // region \"us-east-1\"\n    // access_key_id env.AWS_ACCESS_KEY_ID\n    // secret_access_key env.AWS_SECRET_ACCESS_KEY\n    // session_token env.AWS_SESSION_TOKEN\n    // session_token null\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidRegion {\n  provider aws-bedrock\n  options {\n    region \"us-invalid-7\"\n    inference_configuration {\n      max_tokens 100\n    }\n    // model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidEndpoint {\n  provider aws-bedrock\n  options {\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-doenotexist-3.amazonaws.com\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidAccessKey {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidProfile {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    profile \"boundaryml-dev-invalid\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidSessionToken {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\n\nclient<llm> Invalid{\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> Sonnet {\n  provider anthropic\n  options {\n    model claude-3-5-sonnet-20241022\n    api_key env.ANTHROPIC_API_KEY\n  }\n}\n\n\nclient<llm> SonnetThinking {\n  provider anthropic\n  options {\n    model \"claude-3-7-sonnet-20250219\"\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 2048\n    thinking {\n      type \"enabled\"\n      budget_tokens 1024\n    }\n  }\n}\n\nclient<llm> Claude {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> ClaudeWithCaching {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 500\n    allowed_role_metadata [\"cache_control\"]\n    headers {\n      \"anthropic-beta\" \"prompt-caching-2024-07-31\"\n    }\n  }\n}\n\nclient<llm> Resilient_SimpleSyntax {\n  retry_policy Foo\n  provider baml-fallback\n  options {\n    strategy [\n      GPT4Turbo\n      GPT35\n      Lottery_SimpleSyntax\n    ]\n  }\n}\n\nclient<llm> Lottery_SimpleSyntax {\n  provider baml-round-robin\n  options {\n    start 0\n    strategy [\n      Claude\n      GPT35\n    ]\n  }\n}\n\nclient<llm> TogetherAi {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://api.together.ai/v1\"\n    api_key env.TOGETHER_API_KEY\n    model \"meta-llama/Llama-3-70b-chat-hf\"\n  }\n}\n\n// OpenAI O1 client without max_tokens (should not add default)\nclient<llm> OpenAIO1 {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// OpenAI O1 client with explicit max_tokens (should fail)\nclient<llm> OpenAIO1WithMaxTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// OpenAI O1 client with explicit max_completion_tokens\nclient<llm> OpenAIO1WithMaxCompletionTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// OpenAI GPT-4 client with explicit max_tokens\nclient<llm> GPT4WithMaxTokens {\n  provider openai\n  options {\n    model \"gpt-4\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure O3 client without max_tokens (should not add default)\nclient<llm> AzureO3 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O3 client with explicit max_completion_tokens\nclient<llm> AzureO3WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n",
   "custom-task.baml": "class BookOrder {\n  orderId string @description(#\"\n    The ID of the book order\n  \"#)\n  title string @description(#\"\n    The title of the ordered book\n  \"#)\n  quantity int @description(#\"\n    The quantity of books ordered\n  \"#)\n  price float @description(#\"\n    The price of the book\n  \"#)\n}\n\nclass FlightConfirmation {\n  confirmationNumber string @description(#\"\n    The flight confirmation number\n  \"#)\n  flightNumber string @description(#\"\n    The flight number\n  \"#)\n  departureTime string @description(#\"\n    The scheduled departure time of the flight\n  \"#)\n  arrivalTime string @description(#\"\n    The scheduled arrival time of the flight\n  \"#)\n  seatNumber string @description(#\"\n    The seat number assigned on the flight\n  \"#)\n}\n\nclass GroceryReceipt {\n  receiptId string @description(#\"\n    The ID of the grocery receipt\n  \"#)\n  storeName string @description(#\"\n    The name of the grocery store\n  \"#)\n  items (string | int | float)[] @description(#\"\n    A list of items purchased. Each item consists of a name, quantity, and price.\n  \"#)\n  totalAmount float @description(#\"\n    The total amount spent on groceries\n  \"#)\n}\n \nclass CustomTaskResult {\n  bookOrder BookOrder | null\n  flightConfirmation FlightConfirmation | null\n  groceryReceipt GroceryReceipt | null\n}\n\nfunction CustomTask(input: string) -> BookOrder | FlightConfirmation | GroceryReceipt {\n  client \"openai/gpt-4o-mini\"\n  prompt #\"\n    Given the input string, extract either an order for a book, a flight confirmation, or a grocery receipt.\n\n    {{ ctx.output_format }}\n\n    Input:\n    \n    {{ input}}\n  \"#\n}\n\ntest CustomTask {\n  functions [CustomTask]\n  args {\n    input #\"\nDear [Your Name],\n\nThank you for booking with [Airline Name]! We are pleased to confirm your upcoming flight.\n\nFlight Confirmation Details:\n\nBooking Reference: ABC123\nPassenger Name: [Your Name]\nFlight Number: XY789\nDeparture Date: September 15, 2024\nDeparture Time: 10:30 AM\nArrival Time: 1:45 PM\nDeparture Airport: John F. Kennedy International Airport (JFK), New York, NY\nArrival Airport: Los Angeles International Airport (LAX), Los Angeles, CA\nSeat Number: 12A\nClass: Economy\nBaggage Allowance:\n\nChecked Baggage: 1 piece, up to 23 kg\nCarry-On Baggage: 1 piece, up to 7 kg\nImportant Information:\n\nPlease arrive at the airport at least 2 hours before your scheduled departure.\nCheck-in online via our website or mobile app to save time at the airport.\nEnsure that your identification documents are up to date and match the name on your booking.\nContact Us:\n\nIf you have any questions or need to make changes to your booking, please contact our customer service team at 1-800-123-4567 or email us at support@[airline].com.\n\nWe wish you a pleasant journey and thank you for choosing [Airline Name].\n\nBest regards,\n\n[Airline Name] Customer Service\n    \"#\n  }\n}",
   "fiddle-examples/audio/audio.baml": "function DescribeAudio(audio: audio) -> string {\n  client GPT4o\n  prompt #\"\n    Describe the audio below in 20 words:\n    {{ _.role(\"user\") }}\n    {{ audio }}\n  \"#\n\n}\n\n\n\n\n// chat role user present\nfunction DescribeAudio2(audio: audio) -> string {\n  client GPT4Turbo\n  prompt #\"\n    {{ _.role(\"user\") }}\n    You should return 1 answer that answer the following command.\n\n    Describe this in 5 words:\n    {{ audio }}\n  \"#\n}\n\ntest TestAudio {\n    functions [DescribeAudio]\n  args {\n    audio { url \"https://www.pacdv.com/sounds/voices/friday-rocks.wav\"}\n  }\n}\n\ntest TestAudio2 {\n  functions [DescribeAudio2]\n  args {\n    audio { file \"friday-rocks.wav\" }\n      }\n}\n",
   "fiddle-examples/chain-of-thought.baml": "class Email {\n    subject string\n    body string\n    from_address string\n}\n\nenum OrderStatus {\n    ORDERED\n    SHIPPED\n    DELIVERED\n    CANCELLED\n}\n\nclass OrderInfo {\n    order_status OrderStatus\n    tracking_number string?\n    estimated_arrival_date string?\n}\n\nfunction GetOrderInfo(email: Email) -> OrderInfo {\n  client GPT4\n  prompt #\"\n    Given the email below:\n\n    ```\n    from: {{email.from_address}}\n    Email Subject: {{email.subject}}\n    Email Body: {{email.body}}\n    ```\n\n    Extract this info from the email in JSON format:\n    {{ ctx.output_format }}\n\n    Before you output the JSON, please explain your\n    reasoning step-by-step. Here is an example on how to do this:\n    'If we think step by step we can see that ...\n     therefore the output JSON is:\n    {\n      ... the json schema ...\n    }'\n  \"#\n}",
@@ -120,7 +120,7 @@ const fileMap = {
   "test-files/providers/openai-responses-validation.baml": "// OpenAI Responses Provider Validation Tests\n// These tests validate that the openai-responses provider is properly configured\n\n// Test 1: Basic provider recognition\n// This should parse successfully once openai-responses is available\nclient<llm> ValidateOpenAIResponses {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n  }\n}\n\n// Test 2: Valid client_response_type values for openai-responses\nclient<llm> ValidateResponseTypeOpenAI {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    client_response_type \"openai\"\n  }\n}\n\nclient<llm> ValidateResponseTypeOpenAIResponses {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    client_response_type \"openai-responses\"\n  }\n}\n\nclient<llm> ValidateResponseTypeAnthropic {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    client_response_type \"anthropic\"\n  }\n}\n\n// Test 3: Provider should be in allowed list\n// This will validate that \"openai-responses\" is included in ClientProvider::allowed_providers()\n\n// Test 4: Default base URL should be correct\nclient<llm> ValidateDefaultBaseURL {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    // Should default to https://api.openai.com/v1\n  }\n}\n\n// Test 5: Custom base URL should work\nclient<llm> ValidateCustomBaseURL {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    base_url \"https://custom.openai.com/v1\"\n  }\n}\n\n// Simple test functions to validate the clients work\nfunction ValidateBasicResponses(input: string) -> string {\n  client ValidateOpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Say \"success\" if you can read this: {{ input }}\n  \"#\n}\n\nfunction ValidateResponseTypes(input: string) -> string {\n  client ValidateResponseTypeOpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Respond with \"response-type-works\" for: {{ input }}\n  \"#\n}\n\n// Validation test suite\ntest ValidateOpenAIResponsesProvider {\n  functions [\n    ValidateBasicResponses,\n    ValidateResponseTypes\n  ]\n  args {\n    input \"test\"\n  }\n}",
   "test-files/providers/openai-responses.baml": "// OpenAI Responses API Provider Tests\n// Tests the new openai-responses provider that uses the OpenAI Responses API\n\n// Basic OpenAI Responses client\nclient<llm> OpenAIResponses {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n  }\n}\n\n// OpenAI Responses client with explicit response type\nclient<llm> OpenAIResponsesExplicit {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    client_response_type \"openai-responses\"\n  }\n}\n\n// OpenAI Responses client with custom base URL (for testing)\nclient<llm> OpenAIResponsesCustomURL {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    base_url \"https://api.openai.com/v1\"\n  }\n}\n\n// Test basic functionality with responses API\nfunction TestOpenAIResponses(input: string) -> string {\n  client OpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a short haiku about {{ input }}. Make it simple and beautiful.\n  \"#\n}\n\n// Test with explicit response type configuration\nfunction TestOpenAIResponsesExplicit(input: string) -> string {\n  client OpenAIResponsesExplicit\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Create a brief poem about {{ input }}. Keep it under 50 words.\n  \"#\n}\n\n// Test with custom base URL\nfunction TestOpenAIResponsesCustomURL(input: string) -> string {\n  client OpenAIResponsesCustomURL\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Tell me an interesting fact about {{ input }}.\n  \"#\n}\n\n// Test with multi-turn conversation\nfunction TestOpenAIResponsesConversation(topic: string) -> string {\n  client OpenAIResponses\n  prompt #\"\n    {{ _.role(\"system\") }}\n    You are a helpful assistant that provides concise answers.\n    \n    {{ _.role(\"user\") }}\n    What is {{ topic }}?\n    \n    {{ _.role(\"assistant\") }}\n    {{ topic }} is a fascinating subject. Let me explain briefly.\n    \n    {{ _.role(\"user\") }}\n    Can you give me a simple example?\n  \"#\n}\n\n// Test with different model parameter\nclient<llm> OpenAIResponsesGPT4 {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4\"\n  }\n}\n\nfunction TestOpenAIResponsesDifferentModel(input: string) -> string {\n  client OpenAIResponsesGPT4\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Explain {{ input }} in one sentence.\n  \"#\n}\n\n// Test error handling with invalid configuration\nclient<llm> OpenAIResponsesInvalidResponseType {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    // This should work since openai response type is valid for responses provider\n    client_response_type \"openai\"\n  }\n}\n\nfunction TestOpenAIResponsesWithOpenAIResponseType(input: string) -> string {\n  client OpenAIResponsesInvalidResponseType\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write about {{ input }}.\n  \"#\n}\n\n// Comprehensive test suite for OpenAI Responses\ntest TestOpenAIResponsesProviders {\n  functions [\n    TestOpenAIResponses,\n    TestOpenAIResponsesExplicit,\n    TestOpenAIResponsesCustomURL,\n    TestOpenAIResponsesConversation,\n    TestOpenAIResponsesDifferentModel,\n    TestOpenAIResponsesWithOpenAIResponseType\n  ]\n  args {\n    input \"mountains\"\n    topic \"machine learning\"\n  }\n}\n\n// Test shorthand syntax (this should work but use standard openai, not responses)\nfunction TestOpenAIResponsesShorthand(input: string) -> string {\n  client \"openai/gpt-5-mini\"\n  prompt #\"\n    {{ _.role(\"user\") }}\n    What do you think about {{ input }}?\n  \"#\n}\n\n// Test to ensure the provider correctly routes to /v1/responses endpoint\n// This is validated by the implementation, not by the test execution\nfunction TestOpenAIResponsesEndpoint(input: string) -> string {\n  client OpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    This request should go to /v1/responses endpoint, not /v1/chat/completions.\n    Respond with a short message about {{ input }}.\n  \"#\n}\n\n// Test that demonstrates automatic response type selection\nclient<llm> OpenAIResponsesAutoType {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    // No explicit client_response_type - should automatically use openai-responses\n  }\n}\n\nfunction TestOpenAIResponsesAutoType(input: string) -> string {\n  client OpenAIResponsesAutoType\n  prompt #\"\n    {{ _.role(\"user\") }}\n    This client should automatically use openai-responses response type.\n    Write a short description of {{ input }}.\n  \"#\n}\n\n// Additional test for validation\ntest TestOpenAIResponsesValidation {\n  functions [\n    TestOpenAIResponsesShorthand,\n    TestOpenAIResponsesEndpoint,\n    TestOpenAIResponsesAutoType,\n    TestOpenAIResponsesExplicit,\n    TestOpenAIProviderWithResponsesType\n  ]\n  args {\n    input \"artificial intelligence\"\n  }\n}\n\n// Test image input/output with OpenAI Responses API\nclient<llm> OpenAIResponsesImage {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5\"\n  }\n}\n\nfunction TestOpenAIResponsesImageInput(image: image | string | pdf | audio) -> string {\n  client OpenAIResponsesImage\n  prompt #\"\n    {{ _.role(\"user\") }}\n    what is in this content?\n    {{ image }}\n  \"#\n}\n\n// Test for image analysis\ntest TestOpenAIResponsesImageAnalysis {\n  functions [\n    TestOpenAIResponsesImageInput\n  ]\n  args {\n    image \"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg\"\n  }\n}\n\n// Test web search with OpenAI Responses API\nclient<llm> OpenAIResponsesWebSearch {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    tools [\n      {\n        type \"web_search_preview\"\n      }\n    ]\n  }\n}\n\nfunction TestOpenAIResponsesWebSearch(query: string) -> string {\n  client OpenAIResponsesWebSearch\n  prompt #\"\n    {{ _.role(\"user\") }}\n    {{ query }}\n  \"#\n}\n\n// Test for web search functionality\ntest TestOpenAIResponsesWebSearchTest {\n  functions [\n    TestOpenAIResponsesWebSearch\n  ]\n  args {\n    query \"What was a positive news story from today?\"\n  }\n}\n\n\n// Test function calling with OpenAI Responses API\nclient<llm> OpenAIResponsesFunctionCall {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    tools [\n      {\n        type \"function\"\n        name \"get_current_weather\"\n        description \"Get the current weather in a given location\"\n        parameters {\n          type \"object\"\n          properties {\n            location {\n              type \"string\"\n              description \"The city and state, e.g. San Francisco, CA\"\n            }\n            unit {\n              type \"string\"\n              enum [\"celsius\", \"fahrenheit\"]\n            }\n          }\n          required [\"location\", \"unit\"]\n        }\n      }\n    ]\n    tool_choice \"auto\"\n  }\n}\n\nfunction TestOpenAIResponsesFunctionCall(query: string) -> string {\n  client OpenAIResponsesFunctionCall\n  prompt #\"\n    {{ _.role(\"user\") }}\n    {{ query }}\n  \"#\n}\n\n// Test for function calling\ntest TestOpenAIResponsesFunctionCallTest {\n  functions [\n    TestOpenAIResponsesFunctionCall\n  ]\n  args {\n    query \"What is the weather like in Boston today?\"\n  }\n}\n\n// Test using standard openai provider with openai-responses client_response_type\nclient<llm> OpenAIWithResponsesType {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    client_response_type \"openai-responses\"\n  }\n}\n\nfunction TestOpenAIProviderWithResponsesType(input: string) -> string {\n  client OpenAIWithResponsesType\n  prompt #\"\n    {{ _.role(\"user\") }}\n    This uses the openai provider but with openai-responses client_response_type.\n    Write a short summary about {{ input }}.\n  \"#\n}\n\n// Test reasoning with OpenAI Responses API\nclient<llm> OpenAIResponsesReasoning {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5\"\n    reasoning{\n      effort \"high\"\n    }\n  }\n}\n\nfunction TestOpenAIResponsesReasoning(problem: string) -> string {\n  client OpenAIResponsesReasoning\n  prompt #\"\n    {{ _.role(\"user\") }}\n    {{ problem }}\n  \"#\n}\n\n// Test for reasoning capability\ntest TestOpenAIResponsesReasoningTest {\n  functions [\n    TestOpenAIResponsesReasoning\n  ]\n  args {\n    problem \"Solve this step by step: If a train travels at 60 mph for 2.5 hours, then at 80 mph for 1.5 hours, what is the total distance traveled?\"\n  }\n}\n\nclient<llm> Gpt5 {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5\"\n  }\n}\n\n\nfunction TestOpenAIResponsesAllRoles(problem: string) -> string {\n  client Gpt5\n  prompt #\"\n    {{ _.role(\"system\") }}\n    Hi\n    {{ _.role(\"developer\") }}\n    Hi\n    {{ _.role(\"assistant\") }}\n    Hi\n    {{ _.role(\"user\") }}\n    {{ problem }}\n  \"#\n}\n\n\nfunction TestOpenaiResponsesPdfs(pdf: pdf) -> string {\n  client Gpt5\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Summarize in one sentence the contents of this:\n    {{ pdf }}\n  \"#\n} \n\ntest TestOpenaiResponsesPdfsTest {\n  functions [\n    TestOpenaiResponsesPdfs\n  ]\n  args {\n    pdf { url \"https://www.berkshirehathaway.com/letters/2024ltr.pdf\" }\n  }\n}\n\ntest TestOpenaiResponsesPdfsTestFile {\n  functions [\n    TestOpenaiResponsesPdfs\n  ]\n  args {\n    pdf { file \"../../dummy.pdf\" }\n  }\n}\n\n\ntest TestOpenAIResponsesAllRolesTest {\n  functions [\n    TestOpenAIResponsesAllRoles\n  ]\n  args {\n    problem \"What is the weather like in Boston today?\"\n  }\n}",
   "test-files/providers/openai-with-anthropic-response.baml": "client<llm> OpenAIWithAnthropicResponse {\n  provider openai-responses\n  options {\n    model \"gpt-4o\"\n    client_response_type \"openai-responses\"\n    base_url \"http://localhost:8000\"\n  }\n}\n\nfunction OpenAIWithAnthropicResponseHello(s: string) -> string {\n  client OpenAIWithAnthropicResponse\n  prompt #\"\n    Return the string \"Hello, world!\" with {{ s }} included in the response.\n    {{ _.role(\"user\") }}\n  \"#\n}\n\ntest TestOpenAIWithAnthropicResponse {\n  functions [\n    OpenAIWithAnthropicResponseHello\n  ]\n  args {\n    s \"Cherry blossoms\"\n  }\n}",
-  "test-files/providers/openai.baml": "function PromptTestOpenAI(input: string) -> string {\n  client GPT35\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAILegacyProvider(input: string) -> string {\n  client GPT35LegacyProvider\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIShorthand(input: string) -> string {\n  client \"openai/gpt-4o\"\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n\n\n\n// Test standard GPT-4 (should add default max_tokens)\nfunction TestOpenAI(input: string) -> string {\n  client GPT4\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model without max_tokens (should not add default)\nfunction TestOpenAIO1NoMaxTokens(input: string) -> string {\n  client OpenAIO1\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_tokens (should fail)\nfunction TestOpenAIO1WithMaxTokens(input: string) -> string {\n  client OpenAIO1WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_completion_tokens\nfunction TestOpenAIO1WithMaxCompletionTokens(input: string) -> string {\n  client OpenAIO1WithMaxCompletionTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test GPT-4 with explicit max_tokens (should keep user value)\nfunction TestOpenAIWithMaxTokens(input: string) -> string {\n  client GPT4WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test OpenAI with null max_tokens (should not add default)\nfunction TestOpenAIWithNullMaxTokens(input: string) -> string {\n  client OpenAIWithNullMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nclient GPT4oMini {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o-mini\"\n  }\n}\n\nfunction TestOpenAIGPT4oMini(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n    {{ input }}\n  \"#\n}\nfunction TestOpenAIGPT4oMini2(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction TestOpenAIGPT4oMini3(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction OpenAIGPT4oMissingBaseUrlEnvVar(input: string) -> string {\n  client GPT4oBaseUrlNotSet\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\n// Add test cases to verify the behavior\ntest TestOpenAIClients {\n  functions [\n    TestOpenAI,\n    TestOpenAIO1NoMaxTokens,\n    TestOpenAIO1WithMaxTokens,\n    TestOpenAIWithMaxTokens,\n    TestOpenAIO1WithMaxCompletionTokens,\n    TestOpenAIWithNullMaxTokens\n  ]\n  args {\n    input #\"\n    Cherry blossoms\n    \"#\n  }\n}\n\nclient<llm> OpenAIWithNullMaxTokens {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o\"\n    max_tokens null\n  }\n}",
+  "test-files/providers/openai.baml": "function PromptTestOpenAI(input: string) -> string {\n  client GPT35\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAILegacyProvider(input: string) -> string {\n  client GPT35LegacyProvider\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIShorthand(input: string) -> string {\n  client \"openai/gpt-4o\"\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n\n\n\n// Test standard GPT-4 (should add default max_tokens)\nfunction TestOpenAI(input: string) -> string {\n  client GPT4\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIConcurrencyClientEnvBaseUrl(input: string) -> string {\n  client OpenAIConcurrencyTestClientEnvBaseUrl\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIConcurrencyClientHardocodedBaseUrl(input: string) -> string {\n  client OpenAIConcurrencyTestClientHardocodedBaseUrl\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model without max_tokens (should not add default)\nfunction TestOpenAIO1NoMaxTokens(input: string) -> string {\n  client OpenAIO1\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_tokens (should fail)\nfunction TestOpenAIO1WithMaxTokens(input: string) -> string {\n  client OpenAIO1WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_completion_tokens\nfunction TestOpenAIO1WithMaxCompletionTokens(input: string) -> string {\n  client OpenAIO1WithMaxCompletionTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test GPT-4 with explicit max_tokens (should keep user value)\nfunction TestOpenAIWithMaxTokens(input: string) -> string {\n  client GPT4WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test OpenAI with null max_tokens (should not add default)\nfunction TestOpenAIWithNullMaxTokens(input: string) -> string {\n  client OpenAIWithNullMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nclient GPT4oMini {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o-mini\"\n  }\n}\n\nfunction TestOpenAIGPT4oMini(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n    {{ input }}\n  \"#\n}\nfunction TestOpenAIGPT4oMini2(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction TestOpenAIGPT4oMini3(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction OpenAIGPT4oMissingBaseUrlEnvVar(input: string) -> string {\n  client GPT4oBaseUrlNotSet\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\n// Add test cases to verify the behavior\ntest TestOpenAIClients {\n  functions [\n    TestOpenAI,\n    TestOpenAIO1NoMaxTokens,\n    TestOpenAIO1WithMaxTokens,\n    TestOpenAIWithMaxTokens,\n    TestOpenAIO1WithMaxCompletionTokens,\n    TestOpenAIWithNullMaxTokens\n  ]\n  args {\n    input #\"\n    Cherry blossoms\n    \"#\n  }\n}\n\nclient<llm> OpenAIWithNullMaxTokens {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o\"\n    max_tokens null\n  }\n}",
   "test-files/providers/openrouter.baml": "function TestOpenRouterMistralSmall3_1_24b(input: string) -> string {\n  client OpenRouterMistralSmall3_1_24b\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n \n \ntest TestName {\n  functions [TestOpenRouterMistralSmall3_1_24b]\n  args {\n    input #\"\n      hello world\n    \"#\n  }\n}\n  \n \n\nclient<llm> OpenRouterMistralSmall3_1_24b {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://openrouter.ai/api/v1\"\n    api_key env.OPENROUTER_API_KEY\n    model \"mistralai/mistral-small-3.1-24b-instruct\"\n    temperature 0.1\n    headers {\n      \"HTTP-Referer\" \"https://me.com\" // Optional\n      \"X-Title\" \"me\" // Optional\n    }\n  }\n}",
   "test-files/providers/strategy.baml": "function TestFallbackStrategy(input: string) -> string {\n  client Resilient_SimpleSyntax\n  prompt #\"\n    {{ _.role('system') }}\n    You are a helpful assistant.\n\n    {{ _.role('user') }}\n    Write a nice short story about {{ input }}\n  \"#\n}\n\nfunction TestRoundRobinStrategy(input: string) -> string {\n  client Lottery_SimpleSyntax\n  prompt #\"\n    {{ _.role('system') }}\n    You are a helpful assistant.\n\n    {{ _.role('user') }}\n    Write a nice short story about {{ input }}\n  \"#\n}\n",
   "test-files/providers/tests.baml": "test TestOpenAIShorthand {\n  functions [TestOpenAIShorthand]\n  args {\n    input \"Donkey kong and peanut butter\"\n  }\n}\n\ntest TestAWS {\n  functions [\n    TestAws\n  ]\n  args {\n    input \"Donkey kong and peanut butter\"\n  }\n}\n\ntest TestProvider {\n  functions [\n    TestAnthropic, TestVertex, PromptTestOpenAI, TestAzure, TestOllama, TestGemini, TestGeminiThinking, TestAws,\n    TestAwsInvalidRegion,\n    TestOpenAIShorthand,\n    TestAnthropicShorthand,\n    TestAwsInvalidAccessKey,\n    TestAwsInvalidProfile,\n    TestAwsInvalidSessionToken\n  ]\n  args {\n    input \"Donkey kong and peanut butter\"\n  }\n}\n\ntest TestName {\n  functions [TestCaching]\n  args {\n    input #\"\nIn a near-future society where dreams have become a tradable commodity and shared experience, a lonely and socially awkward teenager named Alex discovers they possess a rare and powerful ability to not only view but also manipulate the dreams of others. Initially thrilled by this newfound power, Alex begins subtly altering the dreams of classmates and family members, helping them overcome fears, boost confidence, or experience fantastical adventures. As Alex's skills grow, so does their influence. They start selling premium dream experiences on the black market, crafting intricate and addictive dreamscapes for wealthy clients. However, the line between dream and reality begins to blur for those exposed to Alex's creations. Some clients struggle to differentiate between their true memories and the artificial ones implanted by Alex's dream manipulation.\n\nComplications arise when a mysterious government agency takes notice of Alex's unique abilities. They offer Alex a chance to use their gift for \"the greater good,\" hinting at applications in therapy, criminal rehabilitation, and even national security. Simultaneously, an underground resistance movement reaches out, warning Alex about the dangers of dream manipulation and the potential for mass control and exploitation. Caught between these opposing forces, Alex must navigate a complex web of ethical dilemmas. They grapple with questions of free will, the nature of consciousness, and the responsibility that comes with having power over people's minds. As the consequences of their actions spiral outward, affecting the lives of loved ones and strangers alike, Alex is forced to confront the true nature of their ability and decide how—or if—it should be used.\n\nThe story explores themes of identity, the subconscious mind, the ethics of technology, and the power of imagination. It delves into the potential consequences of a world where our most private thoughts and experiences are no longer truly our own, and examines the fine line between helping others and manipulating them for personal gain or a perceived greater good. The narrative further expands on the societal implications of such abilities, questioning the moral boundaries of altering consciousness and the potential for abuse in a world where dreams can be commodified. It challenges the reader to consider the impact of technology on personal autonomy and the ethical responsibilities of those who wield such power.\n\nAs Alex's journey unfolds, they encounter various individuals whose lives have been touched by their dream manipulations, each presenting a unique perspective on the ethical quandaries at hand. From a classmate who gains newfound confidence to a wealthy client who becomes addicted to the dreamscapes, the ripple effects of Alex's actions are profound and far-reaching. The government agency's interest in Alex's abilities raises questions about the potential for state control and surveillance, while the resistance movement highlights the dangers of unchecked power and the importance of safeguarding individual freedoms.\n\nUltimately, Alex's story is one of self-discovery and moral reckoning, as they must decide whether to embrace their abilities for personal gain, align with the government's vision of a controlled utopia, or join the resistance in their fight for freedom and autonomy. The narrative invites readers to reflect on the nature of reality, the boundaries of human experience, and the ethical implications of a world where dreams are no longer private sanctuaries but shared and manipulated commodities. It also explores the psychological impact on Alex, who must deal with the burden of knowing the intimate fears and desires of others, and the isolation that comes from being unable to share their own dreams without altering them.\n\nThe story further examines the technological advancements that have made dream manipulation possible, questioning the role of innovation in society and the potential for both progress and peril. It considers the societal divide between those who can afford to buy enhanced dream experiences and those who cannot, highlighting issues of inequality and access. As Alex becomes more entangled in the web of their own making, they must confront the possibility that their actions could lead to unintended consequences, not just for themselves but for the fabric of society as a whole.\n\nIn the end, Alex's journey is a cautionary tale about the power of dreams and the responsibilities that come with wielding such influence. It serves as a reminder of the importance of ethical considerations in the face of technological advancement and the need to balance innovation with humanity. The story leaves readers pondering the true cost of a world where dreams are no longer sacred, and the potential for both wonder and danger in the uncharted territories of the mind. But it's also a story about the power of imagination and the potential for change, even in a world where our deepest thoughts are no longer our own. And it's a story about the power of choice, and the importance of fighting for the freedom to dream.\n\nIn conclusion, this story is a reflection on the power of dreams and the responsibilities that come with wielding such influence. It serves as a reminder of the importance of ethical considerations in the face of technological advancement and the need to balance innovation with humanity. The story leaves readers pondering the true cost of a world where dreams are no longer sacred, and the potential for both wonder and danger in the uncharted territories of the mind. But it's also a story about the power of imagination and the potential for change, even in a world where our deepest thoughts are no longer our own. And it's a story about the power of choice, and the importance of fighting for the freedom to dream.\n    \"#\n    not_cached #\"\n      hello world\n    \"#\n  }\n}",
diff --git a/integ-tests/react/baml_client/parser.ts b/integ-tests/react/baml_client/parser.ts
index 0fa173148f..5295671d77 100644
--- a/integ-tests/react/baml_client/parser.ts
+++ b/integ-tests/react/baml_client/parser.ts
@@ -4170,6 +4170,52 @@ export class LlmResponseParser {
     }
   }
   
+  TestOpenAIConcurrencyClientEnvBaseUrl(
+      llmResponse: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry, env?: Record<string, string | undefined> }
+  ): string {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.parseLlmResponse(
+        "TestOpenAIConcurrencyClientEnvBaseUrl",
+        llmResponse,
+        false,
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        env,
+      ) as string
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
+  TestOpenAIConcurrencyClientHardocodedBaseUrl(
+      llmResponse: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry, env?: Record<string, string | undefined> }
+  ): string {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.parseLlmResponse(
+        "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+        llmResponse,
+        false,
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        env,
+      ) as string
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestOpenAIDummyClient(
       llmResponse: string,
       __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry, env?: Record<string, string | undefined> }
@@ -10064,6 +10110,52 @@ export class LlmStreamParser {
     }
   }
   
+  TestOpenAIConcurrencyClientEnvBaseUrl(
+      llmResponse: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry, env?: Record<string, string | undefined> }
+  ): string {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.parseLlmResponse(
+        "TestOpenAIConcurrencyClientEnvBaseUrl",
+        llmResponse,
+        true,
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        env,
+      ) as string
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
+  TestOpenAIConcurrencyClientHardocodedBaseUrl(
+      llmResponse: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry, env?: Record<string, string | undefined> }
+  ): string {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.parseLlmResponse(
+        "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+        llmResponse,
+        true,
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        env,
+      ) as string
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestOpenAIDummyClient(
       llmResponse: string,
       __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry, env?: Record<string, string | undefined> }
diff --git a/integ-tests/react/baml_client/react/hooks.tsx b/integ-tests/react/baml_client/react/hooks.tsx
index 130968f564..e99e200daf 100644
--- a/integ-tests/react/baml_client/react/hooks.tsx
+++ b/integ-tests/react/baml_client/react/hooks.tsx
@@ -9373,6 +9373,106 @@ export function useTestOpenAI(
     return useBamlAction(action, props as HookInput<'TestOpenAI', { stream: false }>)
   }
 }
+/**
+ * A specialized hook for the TestOpenAIConcurrencyClientEnvBaseUrl BAML function that supports both streaming and non‑streaming responses.
+ *
+ * **Input Types:**
+ *
+ * - input: string
+ *
+ *
+ * **Return Type:**
+ * - **Non‑streaming:** string
+ * - **Streaming Partial:** string
+ * - **Streaming Final:** string
+ *
+ * **Usage Patterns:**
+ * 1. **Non‑streaming (Default)**
+ *    - Best for quick responses and simple UI updates.
+ * 2. **Streaming**
+ *    - Ideal for long‑running operations or real‑time feedback.
+ *
+ * **Edge Cases:**
+ * - Ensure robust error handling via `onError`.
+ * - Handle cases where partial data may be incomplete or missing.
+ *
+ * @example
+ * ```tsx
+ * // Basic non‑streaming usage:
+ * const { data, error, isLoading, mutate } = useTestOpenAIConcurrencyClientEnvBaseUrl({ stream: false});
+ *
+ * // Streaming usage:
+ * const { data, streamData, isLoading, error, mutate } = useTestOpenAIConcurrencyClientEnvBaseUrl({
+ *   stream: true | undefined,
+ *   onStreamData: (partial) => console.log('Partial update:', partial),
+ *   onFinalData: (final) => console.log('Final result:', final),
+ *   onError: (err) => console.error('Error:', err),
+ * });
+ * ```
+ */
+export function useTestOpenAIConcurrencyClientEnvBaseUrl(props: HookInput<'TestOpenAIConcurrencyClientEnvBaseUrl', { stream: false }>): HookOutput<'TestOpenAIConcurrencyClientEnvBaseUrl', { stream: false }>
+export function useTestOpenAIConcurrencyClientEnvBaseUrl(props?: HookInput<'TestOpenAIConcurrencyClientEnvBaseUrl', { stream?: true }>): HookOutput<'TestOpenAIConcurrencyClientEnvBaseUrl', { stream: true }>
+export function useTestOpenAIConcurrencyClientEnvBaseUrl(
+  props: HookInput<'TestOpenAIConcurrencyClientEnvBaseUrl', { stream?: boolean }> = {},
+): HookOutput<'TestOpenAIConcurrencyClientEnvBaseUrl', { stream: true }> | HookOutput<'TestOpenAIConcurrencyClientEnvBaseUrl', { stream: false }> {
+  let action: ServerAction = Actions.TestOpenAIConcurrencyClientEnvBaseUrl;
+  if (isStreamingProps(props)) {
+    action = StreamingActions.TestOpenAIConcurrencyClientEnvBaseUrl;
+    return useBamlAction(action, props)
+  } else {
+    return useBamlAction(action, props as HookInput<'TestOpenAIConcurrencyClientEnvBaseUrl', { stream: false }>)
+  }
+}
+/**
+ * A specialized hook for the TestOpenAIConcurrencyClientHardocodedBaseUrl BAML function that supports both streaming and non‑streaming responses.
+ *
+ * **Input Types:**
+ *
+ * - input: string
+ *
+ *
+ * **Return Type:**
+ * - **Non‑streaming:** string
+ * - **Streaming Partial:** string
+ * - **Streaming Final:** string
+ *
+ * **Usage Patterns:**
+ * 1. **Non‑streaming (Default)**
+ *    - Best for quick responses and simple UI updates.
+ * 2. **Streaming**
+ *    - Ideal for long‑running operations or real‑time feedback.
+ *
+ * **Edge Cases:**
+ * - Ensure robust error handling via `onError`.
+ * - Handle cases where partial data may be incomplete or missing.
+ *
+ * @example
+ * ```tsx
+ * // Basic non‑streaming usage:
+ * const { data, error, isLoading, mutate } = useTestOpenAIConcurrencyClientHardocodedBaseUrl({ stream: false});
+ *
+ * // Streaming usage:
+ * const { data, streamData, isLoading, error, mutate } = useTestOpenAIConcurrencyClientHardocodedBaseUrl({
+ *   stream: true | undefined,
+ *   onStreamData: (partial) => console.log('Partial update:', partial),
+ *   onFinalData: (final) => console.log('Final result:', final),
+ *   onError: (err) => console.error('Error:', err),
+ * });
+ * ```
+ */
+export function useTestOpenAIConcurrencyClientHardocodedBaseUrl(props: HookInput<'TestOpenAIConcurrencyClientHardocodedBaseUrl', { stream: false }>): HookOutput<'TestOpenAIConcurrencyClientHardocodedBaseUrl', { stream: false }>
+export function useTestOpenAIConcurrencyClientHardocodedBaseUrl(props?: HookInput<'TestOpenAIConcurrencyClientHardocodedBaseUrl', { stream?: true }>): HookOutput<'TestOpenAIConcurrencyClientHardocodedBaseUrl', { stream: true }>
+export function useTestOpenAIConcurrencyClientHardocodedBaseUrl(
+  props: HookInput<'TestOpenAIConcurrencyClientHardocodedBaseUrl', { stream?: boolean }> = {},
+): HookOutput<'TestOpenAIConcurrencyClientHardocodedBaseUrl', { stream: true }> | HookOutput<'TestOpenAIConcurrencyClientHardocodedBaseUrl', { stream: false }> {
+  let action: ServerAction = Actions.TestOpenAIConcurrencyClientHardocodedBaseUrl;
+  if (isStreamingProps(props)) {
+    action = StreamingActions.TestOpenAIConcurrencyClientHardocodedBaseUrl;
+    return useBamlAction(action, props)
+  } else {
+    return useBamlAction(action, props as HookInput<'TestOpenAIConcurrencyClientHardocodedBaseUrl', { stream: false }>)
+  }
+}
 /**
  * A specialized hook for the TestOpenAIDummyClient BAML function that supports both streaming and non‑streaming responses.
  *
diff --git a/integ-tests/react/baml_client/react/server.ts b/integ-tests/react/baml_client/react/server.ts
index 709d1791ec..658c748e24 100644
--- a/integ-tests/react/baml_client/react/server.ts
+++ b/integ-tests/react/baml_client/react/server.ts
@@ -3311,6 +3311,42 @@ export const TestOpenAI = async (
   );
 };
 
+/**
+ * Executes the "TestOpenAIConcurrencyClientEnvBaseUrl" BAML action.
+ *
+ * This server action calls the underlying BAML function "TestOpenAIConcurrencyClientEnvBaseUrl"
+ * with the specified parameters.
+ *
+ * @param { string } input - Input parameter.
+ *
+ * @returns {Promise<string>} A promise that resolves with the result of the action.
+ */
+export const TestOpenAIConcurrencyClientEnvBaseUrl = async (
+  input: string,
+): Promise<string> => {
+  return b.TestOpenAIConcurrencyClientEnvBaseUrl(
+    input,
+  );
+};
+
+/**
+ * Executes the "TestOpenAIConcurrencyClientHardocodedBaseUrl" BAML action.
+ *
+ * This server action calls the underlying BAML function "TestOpenAIConcurrencyClientHardocodedBaseUrl"
+ * with the specified parameters.
+ *
+ * @param { string } input - Input parameter.
+ *
+ * @returns {Promise<string>} A promise that resolves with the result of the action.
+ */
+export const TestOpenAIConcurrencyClientHardocodedBaseUrl = async (
+  input: string,
+): Promise<string> => {
+  return b.TestOpenAIConcurrencyClientHardocodedBaseUrl(
+    input,
+  );
+};
+
 /**
  * Executes the "TestOpenAIDummyClient" BAML action.
  *
diff --git a/integ-tests/react/baml_client/react/server_streaming.ts b/integ-tests/react/baml_client/react/server_streaming.ts
index 5828d57bae..f343e97fe8 100644
--- a/integ-tests/react/baml_client/react/server_streaming.ts
+++ b/integ-tests/react/baml_client/react/server_streaming.ts
@@ -3491,6 +3491,44 @@ export const TestOpenAI = async (
   return Promise.resolve(stream.toStreamable());
 };
 
+/**
+ * Executes the streaming variant of the "TestOpenAIConcurrencyClientEnvBaseUrl" BAML action.
+ *
+ * This action initiates a streaming response by calling the corresponding
+ * BAML stream function. The returned stream yields incremental updates.
+ *
+ * @param { string } input - Input parameter.
+ *
+ * @returns {ReadableStream<Uint8Array>} A stream that yields incremental updates from the action.
+ */
+export const TestOpenAIConcurrencyClientEnvBaseUrl = async (
+  input: string,
+): Promise<ReadableStream<Uint8Array>> => {
+  const stream = b.stream.TestOpenAIConcurrencyClientEnvBaseUrl(
+    input,
+  );
+  return Promise.resolve(stream.toStreamable());
+};
+
+/**
+ * Executes the streaming variant of the "TestOpenAIConcurrencyClientHardocodedBaseUrl" BAML action.
+ *
+ * This action initiates a streaming response by calling the corresponding
+ * BAML stream function. The returned stream yields incremental updates.
+ *
+ * @param { string } input - Input parameter.
+ *
+ * @returns {ReadableStream<Uint8Array>} A stream that yields incremental updates from the action.
+ */
+export const TestOpenAIConcurrencyClientHardocodedBaseUrl = async (
+  input: string,
+): Promise<ReadableStream<Uint8Array>> => {
+  const stream = b.stream.TestOpenAIConcurrencyClientHardocodedBaseUrl(
+    input,
+  );
+  return Promise.resolve(stream.toStreamable());
+};
+
 /**
  * Executes the streaming variant of the "TestOpenAIDummyClient" BAML action.
  *
diff --git a/integ-tests/react/baml_client/react/server_streaming_types.ts b/integ-tests/react/baml_client/react/server_streaming_types.ts
index 2edce97076..6d153ad0a2 100644
--- a/integ-tests/react/baml_client/react/server_streaming_types.ts
+++ b/integ-tests/react/baml_client/react/server_streaming_types.ts
@@ -233,6 +233,8 @@ export type StreamingServerTypes = {
   TestOllama: string | null,
   TestOllamaHaiku: Haiku,
   TestOpenAI: string,
+  TestOpenAIConcurrencyClientEnvBaseUrl: string,
+  TestOpenAIConcurrencyClientHardocodedBaseUrl: string,
   TestOpenAIDummyClient: string,
   TestOpenAIGPT4oMini: string,
   TestOpenAIGPT4oMini2: string,
diff --git a/integ-tests/react/baml_client/sync_client.ts b/integ-tests/react/baml_client/sync_client.ts
index c564c458a8..109e47c3be 100644
--- a/integ-tests/react/baml_client/sync_client.ts
+++ b/integ-tests/react/baml_client/sync_client.ts
@@ -7656,6 +7656,90 @@ export class BamlSyncClient {
     }
   }
   
+  TestOpenAIConcurrencyClientEnvBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+  ): string {
+    try {
+      const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+      const signal = options.signal;
+
+      if (signal?.aborted) {
+        throw new BamlAbortError('Operation was aborted', signal.reason);
+      }
+
+      // Check if onTick is provided and reject for sync operations
+      if (options.onTick) {
+        throw new Error("onTick is not supported for synchronous functions. Please use the async client instead.");
+      }
+
+      const collector = options.collector ? (Array.isArray(options.collector) ? options.collector : [options.collector]) : [];
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      const raw = this.runtime.callFunctionSync(
+        "TestOpenAIConcurrencyClientEnvBaseUrl",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        options.tb?.__tb(),
+        options.clientRegistry,
+        collector,
+        options.tags || {},
+        env,
+        signal,
+        options.events,
+      )
+      return raw.parsed(false) as string
+    } catch (error: any) {
+      throw toBamlError(error);
+    }
+  }
+  
+  TestOpenAIConcurrencyClientHardocodedBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+  ): string {
+    try {
+      const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+      const signal = options.signal;
+
+      if (signal?.aborted) {
+        throw new BamlAbortError('Operation was aborted', signal.reason);
+      }
+
+      // Check if onTick is provided and reject for sync operations
+      if (options.onTick) {
+        throw new Error("onTick is not supported for synchronous functions. Please use the async client instead.");
+      }
+
+      const collector = options.collector ? (Array.isArray(options.collector) ? options.collector : [options.collector]) : [];
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      const raw = this.runtime.callFunctionSync(
+        "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        options.tb?.__tb(),
+        options.clientRegistry,
+        collector,
+        options.tags || {},
+        env,
+        signal,
+        options.events,
+      )
+      return raw.parsed(false) as string
+    } catch (error: any) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestOpenAIDummyClient(
       input: string,
       __baml_options__?: BamlCallOptions<never>
diff --git a/integ-tests/react/baml_client/sync_request.ts b/integ-tests/react/baml_client/sync_request.ts
index 5c8a450fa9..f2cb3ef120 100644
--- a/integ-tests/react/baml_client/sync_request.ts
+++ b/integ-tests/react/baml_client/sync_request.ts
@@ -4537,6 +4537,56 @@ export class HttpRequest {
     }
   }
   
+  TestOpenAIConcurrencyClientEnvBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+  ): HTTPRequest {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.buildRequestSync(
+        "TestOpenAIConcurrencyClientEnvBaseUrl",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        false,
+        env,
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
+  TestOpenAIConcurrencyClientHardocodedBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+  ): HTTPRequest {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.buildRequestSync(
+        "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        false,
+        env,
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestOpenAIDummyClient(
       input: string,
       __baml_options__?: BamlCallOptions<never>
@@ -10943,6 +10993,56 @@ export class HttpStreamRequest {
     }
   }
   
+  TestOpenAIConcurrencyClientEnvBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+  ): HTTPRequest {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.buildRequestSync(
+        "TestOpenAIConcurrencyClientEnvBaseUrl",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        true,
+        env,
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
+  TestOpenAIConcurrencyClientHardocodedBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+  ): HTTPRequest {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.buildRequestSync(
+        "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        true,
+        env,
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestOpenAIDummyClient(
       input: string,
       __baml_options__?: BamlCallOptions<never>
diff --git a/integ-tests/ruby/baml_client/client.rb b/integ-tests/ruby/baml_client/client.rb
index 2250291b2e..cd06923a50 100644
--- a/integ-tests/ruby/baml_client/client.rb
+++ b/integ-tests/ruby/baml_client/client.rb
@@ -4551,6 +4551,56 @@ def TestOpenAI(
           input: String,
           baml_options: T::Hash[Symbol, T.any(BamlClient::TypeBuilder, Baml::ClientRegistry, T.any(Baml::Collector, T::Array[Baml::Collector]), T::Hash[Symbol, String], T::Hash[String, String])]
       ).returns(String)}
+      def TestOpenAIConcurrencyClientEnvBaseUrl(
+          *varargs,
+          input:,
+          baml_options: {}
+      )
+          if varargs.any?
+              raise ArgumentError.new("TestOpenAIConcurrencyClientEnvBaseUrl may only be called with keyword arguments")
+          end
+
+          options = @options.merge_options(BamlCallOptions.from_hash(baml_options))
+
+          result = options.call_function_sync(function_name: "TestOpenAIConcurrencyClientEnvBaseUrl", args: {
+              input: input,
+          })
+
+          parsed = result.parsed_using_types(BamlClient::Types, BamlClient::PartialTypes, false)
+          # for sorbet we need to cast to the return type since parsed is now the right value
+          # We just need to tell sorbet that the return type is the right type
+          parsed.cast_to(String)
+      end
+      sig {params(
+          varargs: T.untyped,
+          input: String,
+          baml_options: T::Hash[Symbol, T.any(BamlClient::TypeBuilder, Baml::ClientRegistry, T.any(Baml::Collector, T::Array[Baml::Collector]), T::Hash[Symbol, String], T::Hash[String, String])]
+      ).returns(String)}
+      def TestOpenAIConcurrencyClientHardocodedBaseUrl(
+          *varargs,
+          input:,
+          baml_options: {}
+      )
+          if varargs.any?
+              raise ArgumentError.new("TestOpenAIConcurrencyClientHardocodedBaseUrl may only be called with keyword arguments")
+          end
+
+          options = @options.merge_options(BamlCallOptions.from_hash(baml_options))
+
+          result = options.call_function_sync(function_name: "TestOpenAIConcurrencyClientHardocodedBaseUrl", args: {
+              input: input,
+          })
+
+          parsed = result.parsed_using_types(BamlClient::Types, BamlClient::PartialTypes, false)
+          # for sorbet we need to cast to the return type since parsed is now the right value
+          # We just need to tell sorbet that the return type is the right type
+          parsed.cast_to(String)
+      end
+      sig {params(
+          varargs: T.untyped,
+          input: String,
+          baml_options: T::Hash[Symbol, T.any(BamlClient::TypeBuilder, Baml::ClientRegistry, T.any(Baml::Collector, T::Array[Baml::Collector]), T::Hash[Symbol, String], T::Hash[String, String])]
+      ).returns(String)}
       def TestOpenAIDummyClient(
           *varargs,
           input:,
@@ -10237,6 +10287,56 @@ def TestOpenAI(
           input: String,
           baml_options: T::Hash[Symbol, T.any(BamlClient::TypeBuilder, Baml::ClientRegistry, T.any(Baml::Collector, T::Array[Baml::Collector]), T::Hash[Symbol, String], T::Hash[String, String])]
       ).returns(Baml::BamlStream[String, String])}
+      def TestOpenAIConcurrencyClientEnvBaseUrl(
+          *varargs,
+          input:,
+          baml_options: {}
+      )
+          if varargs.any?
+              raise ArgumentError.new("TestOpenAIConcurrencyClientEnvBaseUrl may only be called with keyword arguments")
+          end
+
+          options = @options.merge_options(BamlCallOptions.from_hash(baml_options))
+
+          ctx, result = options.create_sync_stream(function_name: "TestOpenAIConcurrencyClientEnvBaseUrl", args: {
+              input: input,
+          })
+
+          Baml::BamlStream[String, String].new(
+              ffi_stream: result,
+              ctx_manager: ctx
+          )
+      end
+      sig {params(
+          varargs: T.untyped,
+          input: String,
+          baml_options: T::Hash[Symbol, T.any(BamlClient::TypeBuilder, Baml::ClientRegistry, T.any(Baml::Collector, T::Array[Baml::Collector]), T::Hash[Symbol, String], T::Hash[String, String])]
+      ).returns(Baml::BamlStream[String, String])}
+      def TestOpenAIConcurrencyClientHardocodedBaseUrl(
+          *varargs,
+          input:,
+          baml_options: {}
+      )
+          if varargs.any?
+              raise ArgumentError.new("TestOpenAIConcurrencyClientHardocodedBaseUrl may only be called with keyword arguments")
+          end
+
+          options = @options.merge_options(BamlCallOptions.from_hash(baml_options))
+
+          ctx, result = options.create_sync_stream(function_name: "TestOpenAIConcurrencyClientHardocodedBaseUrl", args: {
+              input: input,
+          })
+
+          Baml::BamlStream[String, String].new(
+              ffi_stream: result,
+              ctx_manager: ctx
+          )
+      end
+      sig {params(
+          varargs: T.untyped,
+          input: String,
+          baml_options: T::Hash[Symbol, T.any(BamlClient::TypeBuilder, Baml::ClientRegistry, T.any(Baml::Collector, T::Array[Baml::Collector]), T::Hash[Symbol, String], T::Hash[String, String])]
+      ).returns(Baml::BamlStream[String, String])}
       def TestOpenAIDummyClient(
           *varargs,
           input:,
diff --git a/integ-tests/typescript-esm/baml_client/async_client.ts b/integ-tests/typescript-esm/baml_client/async_client.ts
index aa95fdcbcc..c7a1a704bd 100644
--- a/integ-tests/typescript-esm/baml_client/async_client.ts
+++ b/integ-tests/typescript-esm/baml_client/async_client.ts
@@ -8736,6 +8736,102 @@ export type RecursivePartialNull<T> = MovedRecursivePartialNull<T>
             }
             }
             
+        async TestOpenAIConcurrencyClientEnvBaseUrl(
+        input: string,
+        __baml_options__?: BamlCallOptions<never>
+        ): Promise<string> {
+          try {
+          const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+          const signal = options.signal;
+
+          if (signal?.aborted) {
+          throw new BamlAbortError('Operation was aborted', signal.reason);
+          }
+
+          // Check if onTick is provided - route through streaming if so
+          if (options.onTick) {
+          const stream = this.stream.TestOpenAIConcurrencyClientEnvBaseUrl(
+          input,
+          __baml_options__
+          );
+
+          return await stream.getFinalResponse();
+          }
+
+          const collector = options.collector ? (Array.isArray(options.collector) ? options.collector :
+          [options.collector]) : [];
+          const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+          const env: Record<string, string> = Object.fromEntries(
+            Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+            );
+            const raw = await this.runtime.callFunction(
+            "TestOpenAIConcurrencyClientEnvBaseUrl",
+            {
+            "input": input
+            },
+            this.ctxManager.cloneContext(),
+            options.tb?.__tb(),
+            options.clientRegistry,
+            collector,
+            options.tags || {},
+            env,
+            signal,
+            options.events,
+            )
+            return raw.parsed(false) as string
+            } catch (error) {
+            throw toBamlError(error);
+            }
+            }
+            
+        async TestOpenAIConcurrencyClientHardocodedBaseUrl(
+        input: string,
+        __baml_options__?: BamlCallOptions<never>
+        ): Promise<string> {
+          try {
+          const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+          const signal = options.signal;
+
+          if (signal?.aborted) {
+          throw new BamlAbortError('Operation was aborted', signal.reason);
+          }
+
+          // Check if onTick is provided - route through streaming if so
+          if (options.onTick) {
+          const stream = this.stream.TestOpenAIConcurrencyClientHardocodedBaseUrl(
+          input,
+          __baml_options__
+          );
+
+          return await stream.getFinalResponse();
+          }
+
+          const collector = options.collector ? (Array.isArray(options.collector) ? options.collector :
+          [options.collector]) : [];
+          const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+          const env: Record<string, string> = Object.fromEntries(
+            Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+            );
+            const raw = await this.runtime.callFunction(
+            "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+            {
+            "input": input
+            },
+            this.ctxManager.cloneContext(),
+            options.tb?.__tb(),
+            options.clientRegistry,
+            collector,
+            options.tags || {},
+            env,
+            signal,
+            options.events,
+            )
+            return raw.parsed(false) as string
+            } catch (error) {
+            throw toBamlError(error);
+            }
+            }
+            
         async TestOpenAIDummyClient(
         input: string,
         __baml_options__?: BamlCallOptions<never>
@@ -24278,6 +24374,138 @@ export type RecursivePartialNull<T> = MovedRecursivePartialNull<T>
                   }
                   }
                   
+            TestOpenAIConcurrencyClientEnvBaseUrl(
+            input: string,
+            __baml_options__?: BamlCallOptions<never>
+            ): BamlStream<string, string>
+              {
+              try {
+              const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+              const signal = options.signal;
+
+              if (signal?.aborted) {
+              throw new BamlAbortError('Operation was aborted', signal.reason);
+              }
+
+              let collector = options.collector ? (Array.isArray(options.collector) ? options.collector :
+              [options.collector]) : [];
+
+              let onTickWrapper: (() => void) | undefined;
+
+              // Create collector and wrap onTick if provided
+              if (options.onTick) {
+              const tickCollector = new Collector("on-tick-collector");
+              collector = [...collector, tickCollector];
+
+              onTickWrapper = () => {
+              const log = tickCollector.last;
+              if (log) {
+              try {
+              options.onTick!("Unknown", log);
+              } catch (error) {
+              console.error("Error in onTick callback for TestOpenAIConcurrencyClientEnvBaseUrl", error);
+              }
+              }
+              };
+              }
+
+              const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+              const env: Record<string, string> = Object.fromEntries(
+                Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+                );
+                const raw = this.runtime.streamFunction(
+                "TestOpenAIConcurrencyClientEnvBaseUrl",
+                {
+                "input": input
+                },
+                undefined,
+                this.ctxManager.cloneContext(),
+                options.tb?.__tb(),
+                options.clientRegistry,
+                collector,
+                options.tags || {},
+                env,
+                signal,
+                onTickWrapper,
+                )
+                return new BamlStream<string, string>(
+                  raw,
+                  (a): string => a,
+                  (a): string => a,
+                  this.ctxManager.cloneContext(),
+                  options.signal,
+                  )
+                  } catch (error) {
+                  throw toBamlError(error);
+                  }
+                  }
+                  
+            TestOpenAIConcurrencyClientHardocodedBaseUrl(
+            input: string,
+            __baml_options__?: BamlCallOptions<never>
+            ): BamlStream<string, string>
+              {
+              try {
+              const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+              const signal = options.signal;
+
+              if (signal?.aborted) {
+              throw new BamlAbortError('Operation was aborted', signal.reason);
+              }
+
+              let collector = options.collector ? (Array.isArray(options.collector) ? options.collector :
+              [options.collector]) : [];
+
+              let onTickWrapper: (() => void) | undefined;
+
+              // Create collector and wrap onTick if provided
+              if (options.onTick) {
+              const tickCollector = new Collector("on-tick-collector");
+              collector = [...collector, tickCollector];
+
+              onTickWrapper = () => {
+              const log = tickCollector.last;
+              if (log) {
+              try {
+              options.onTick!("Unknown", log);
+              } catch (error) {
+              console.error("Error in onTick callback for TestOpenAIConcurrencyClientHardocodedBaseUrl", error);
+              }
+              }
+              };
+              }
+
+              const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+              const env: Record<string, string> = Object.fromEntries(
+                Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+                );
+                const raw = this.runtime.streamFunction(
+                "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+                {
+                "input": input
+                },
+                undefined,
+                this.ctxManager.cloneContext(),
+                options.tb?.__tb(),
+                options.clientRegistry,
+                collector,
+                options.tags || {},
+                env,
+                signal,
+                onTickWrapper,
+                )
+                return new BamlStream<string, string>(
+                  raw,
+                  (a): string => a,
+                  (a): string => a,
+                  this.ctxManager.cloneContext(),
+                  options.signal,
+                  )
+                  } catch (error) {
+                  throw toBamlError(error);
+                  }
+                  }
+                  
             TestOpenAIDummyClient(
             input: string,
             __baml_options__?: BamlCallOptions<never>
diff --git a/integ-tests/typescript-esm/baml_client/async_request.ts b/integ-tests/typescript-esm/baml_client/async_request.ts
index 758936f3cd..86ab5f96c2 100644
--- a/integ-tests/typescript-esm/baml_client/async_request.ts
+++ b/integ-tests/typescript-esm/baml_client/async_request.ts
@@ -4541,6 +4541,56 @@ env?: Record<string, string | undefined>
       }
       }
       
+  async TestOpenAIConcurrencyClientEnvBaseUrl(
+  input: string,
+  __baml_options__?: BamlCallOptions<never>
+  ): Promise<HTTPRequest> {
+    try {
+    const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+    const env: Record<string, string> = Object.fromEntries(
+      Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return await this.runtime.buildRequest(
+      "TestOpenAIConcurrencyClientEnvBaseUrl",
+      {
+      "input": input
+      },
+      this.ctxManager.cloneContext(),
+      __baml_options__?.tb?.__tb(),
+      __baml_options__?.clientRegistry,
+      false,
+      env
+      )
+      } catch (error) {
+      throw toBamlError(error);
+      }
+      }
+      
+  async TestOpenAIConcurrencyClientHardocodedBaseUrl(
+  input: string,
+  __baml_options__?: BamlCallOptions<never>
+  ): Promise<HTTPRequest> {
+    try {
+    const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+    const env: Record<string, string> = Object.fromEntries(
+      Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return await this.runtime.buildRequest(
+      "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+      {
+      "input": input
+      },
+      this.ctxManager.cloneContext(),
+      __baml_options__?.tb?.__tb(),
+      __baml_options__?.clientRegistry,
+      false,
+      env
+      )
+      } catch (error) {
+      throw toBamlError(error);
+      }
+      }
+      
   async TestOpenAIDummyClient(
   input: string,
   __baml_options__?: BamlCallOptions<never>
@@ -10947,6 +10997,56 @@ env?: Record<string, string | undefined>
           }
           }
           
+      async TestOpenAIConcurrencyClientEnvBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+      ): Promise<HTTPRequest> {
+        try {
+        const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+        const env: Record<string, string> = Object.fromEntries(
+          Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+          );
+          return await this.runtime.buildRequest(
+          "TestOpenAIConcurrencyClientEnvBaseUrl",
+          {
+          "input": input
+          },
+          this.ctxManager.cloneContext(),
+          __baml_options__?.tb?.__tb(),
+          __baml_options__?.clientRegistry,
+          true,
+          env
+          )
+          } catch (error) {
+          throw toBamlError(error);
+          }
+          }
+          
+      async TestOpenAIConcurrencyClientHardocodedBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+      ): Promise<HTTPRequest> {
+        try {
+        const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+        const env: Record<string, string> = Object.fromEntries(
+          Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+          );
+          return await this.runtime.buildRequest(
+          "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+          {
+          "input": input
+          },
+          this.ctxManager.cloneContext(),
+          __baml_options__?.tb?.__tb(),
+          __baml_options__?.clientRegistry,
+          true,
+          env
+          )
+          } catch (error) {
+          throw toBamlError(error);
+          }
+          }
+          
       async TestOpenAIDummyClient(
       input: string,
       __baml_options__?: BamlCallOptions<never>
diff --git a/integ-tests/typescript-esm/baml_client/inlinedbaml.ts b/integ-tests/typescript-esm/baml_client/inlinedbaml.ts
index 467be2794b..f3a78048ff 100644
--- a/integ-tests/typescript-esm/baml_client/inlinedbaml.ts
+++ b/integ-tests/typescript-esm/baml_client/inlinedbaml.ts
@@ -20,7 +20,7 @@ $ pnpm add @boundaryml/baml
 
 const fileMap = {
   
-  "clients.baml": "retry_policy Bar {\n  max_retries 3\n  strategy {\n    type exponential_backoff\n  }\n}\n\nretry_policy Foo {\n  max_retries 3\n  strategy {\n    type constant_delay\n    delay_ms 100\n  }\n}\n\nclient<llm> GPT4 {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> GPT4o {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// For integ tests\nclient<llm> GPT4oBaseUrlNotSet {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n    base_url env.OPEN_API_BASE_DO_NOT_SET_THIS\n  }\n}\n\n\nclient<llm> GPT4Turbo {\n  retry_policy Bar\n  provider openai\n  options {\n    model gpt-4-turbo\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nretry_policy GPT4oRetry {\n  max_retries 2\n  strategy {\n    type exponential_backoff\n  }\n}\n\nclient<llm> GPT35 {\n  provider openai\n  retry_policy GPT4oRetry\n  options {\n    model \"gpt-4o-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> GPT35LegacyProvider {\n  provider openai\n  options {\n    model \"gpt-3.5-turbo\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> Ollama {\n  provider ollama\n  options {\n    model llama3.1\n  }\n}\n\nclient<llm> GPT35Azure {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    // base_url \"https://west-us-azure-baml.openai.azure.com/openai/deployments/gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\n// Azure O1 client without max_tokens (should not add default)\nclient<llm> AzureO1 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O1 client with explicit max_tokens (should keep user value)\nclient<llm> AzureO1WithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> AzureO1WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// Azure GPT-35 client with explicit max_tokens (should keep user value)\nclient<llm> GPT35AzureWithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure client with invalid resource name (for testing failures)\nclient<llm> GPT35AzureFailed {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml-incorrect-suffix\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\nclient<llm> Gemini {\n  provider google-ai\n  options {\n    model gemini-2.5-flash\n    api_key env.GOOGLE_API_KEY\n    safetySettings {\n      category HARM_CATEGORY_HATE_SPEECH\n      threshold BLOCK_LOW_AND_ABOVE\n    }\n  }\n}\n\nclient<llm> Gemini25ProThinking {\n  provider google-ai\n  retry_policy Constant\n  options {\n    model \"gemini-2.5-pro\"\n    api_key env.GOOGLE_API_KEY\n    generationConfig {\n      thinkingConfig {\n          thinkingBudget 1024\n          includeThoughts true\n      }\n    }\n  }\n}\n\nclient<llm> GeminiOpenAiGeneric {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://generativelanguage.googleapis.com/v1beta/\"\n    model \"gemini-2.5-flash\"\n    api_key env.GOOGLE_API_KEY\n  }\n}\n\nclient<llm> Vertex {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> VertexWithQueryParams {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    project_id gloo-ai\n    query_params {\n      key env.VERTEX_API_KEY\n    }\n  }\n}\n\nclient<llm> VertexClaude {\n  provider vertex-ai\n  options {\n    model \"claude-3-5-sonnet@20240620\"\n    location us-east5\n    anthropic_version \"vertex-2023-10-16\"\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> AwsBedrock {\n  provider aws-bedrock\n  options {\n    inference_configuration {\n      max_tokens 2048\n    }\n    // max_tokens 100000\n    // max_completion_tokens 100000\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    //model \"arn:aws:bedrock:us-east-1:404337120808:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-east-1.amazonaws.com/\"\n    // region \"us-east-1\"\n    // access_key_id env.AWS_ACCESS_KEY_ID\n    // secret_access_key env.AWS_SECRET_ACCESS_KEY\n    // session_token env.AWS_SESSION_TOKEN\n    // session_token null\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidRegion {\n  provider aws-bedrock\n  options {\n    region \"us-invalid-7\"\n    inference_configuration {\n      max_tokens 100\n    }\n    // model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidEndpoint {\n  provider aws-bedrock\n  options {\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-doenotexist-3.amazonaws.com\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidAccessKey {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidProfile {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    profile \"boundaryml-dev-invalid\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidSessionToken {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\n\nclient<llm> Invalid{\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> Sonnet {\n  provider anthropic\n  options {\n    model claude-3-5-sonnet-20241022\n    api_key env.ANTHROPIC_API_KEY\n  }\n}\n\n\nclient<llm> SonnetThinking {\n  provider anthropic\n  options {\n    model \"claude-3-7-sonnet-20250219\"\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 2048\n    thinking {\n      type \"enabled\"\n      budget_tokens 1024\n    }\n  }\n}\n\nclient<llm> Claude {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> ClaudeWithCaching {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 500\n    allowed_role_metadata [\"cache_control\"]\n    headers {\n      \"anthropic-beta\" \"prompt-caching-2024-07-31\"\n    }\n  }\n}\n\nclient<llm> Resilient_SimpleSyntax {\n  retry_policy Foo\n  provider baml-fallback\n  options {\n    strategy [\n      GPT4Turbo\n      GPT35\n      Lottery_SimpleSyntax\n    ]\n  }\n}\n\nclient<llm> Lottery_SimpleSyntax {\n  provider baml-round-robin\n  options {\n    start 0\n    strategy [\n      Claude\n      GPT35\n    ]\n  }\n}\n\nclient<llm> TogetherAi {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://api.together.ai/v1\"\n    api_key env.TOGETHER_API_KEY\n    model \"meta-llama/Llama-3-70b-chat-hf\"\n  }\n}\n\n// OpenAI O1 client without max_tokens (should not add default)\nclient<llm> OpenAIO1 {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// OpenAI O1 client with explicit max_tokens (should fail)\nclient<llm> OpenAIO1WithMaxTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// OpenAI O1 client with explicit max_completion_tokens\nclient<llm> OpenAIO1WithMaxCompletionTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// OpenAI GPT-4 client with explicit max_tokens\nclient<llm> GPT4WithMaxTokens {\n  provider openai\n  options {\n    model \"gpt-4\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure O3 client without max_tokens (should not add default)\nclient<llm> AzureO3 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O3 client with explicit max_completion_tokens\nclient<llm> AzureO3WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n",
+  "clients.baml": "retry_policy Bar {\n  max_retries 3\n  strategy {\n    type exponential_backoff\n  }\n}\n\nretry_policy Foo {\n  max_retries 3\n  strategy {\n    type constant_delay\n    delay_ms 100\n  }\n}\n\nclient<llm> GPT4 {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> GPT4o {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// For integ tests\nclient<llm> GPT4oBaseUrlNotSet {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n    base_url env.OPEN_API_BASE_DO_NOT_SET_THIS\n  }\n}\n\n\nclient<llm> GPT4Turbo {\n  retry_policy Bar\n  provider openai\n  options {\n    model gpt-4-turbo\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nretry_policy GPT4oRetry {\n  max_retries 2\n  strategy {\n    type exponential_backoff\n  }\n}\n\nclient<llm> GPT35 {\n  provider openai\n  retry_policy GPT4oRetry\n  options {\n    model \"gpt-4o-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> GPT35LegacyProvider {\n  provider openai\n  options {\n    model \"gpt-3.5-turbo\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> OpenAIConcurrencyTestClientEnvBaseUrl {\n  provider openai-generic\n  options {\n    base_url env.OPENAI_CONCURRENCY_TEST_BASE_URL\n    model \"concurrency-test\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> OpenAIConcurrencyTestClientHardocodedBaseUrl {\n  provider openai-generic\n  options {\n    base_url \"http://127.0.0.1:9876/v1/\"\n    model \"concurrency-test\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> Ollama {\n  provider ollama\n  options {\n    model llama3.1\n  }\n}\n\nclient<llm> GPT35Azure {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    // base_url \"https://west-us-azure-baml.openai.azure.com/openai/deployments/gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\n// Azure O1 client without max_tokens (should not add default)\nclient<llm> AzureO1 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O1 client with explicit max_tokens (should keep user value)\nclient<llm> AzureO1WithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> AzureO1WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// Azure GPT-35 client with explicit max_tokens (should keep user value)\nclient<llm> GPT35AzureWithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure client with invalid resource name (for testing failures)\nclient<llm> GPT35AzureFailed {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml-incorrect-suffix\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\nclient<llm> Gemini {\n  provider google-ai\n  options {\n    model gemini-2.5-flash\n    api_key env.GOOGLE_API_KEY\n    safetySettings {\n      category HARM_CATEGORY_HATE_SPEECH\n      threshold BLOCK_LOW_AND_ABOVE\n    }\n  }\n}\n\nclient<llm> Gemini25ProThinking {\n  provider google-ai\n  retry_policy Constant\n  options {\n    model \"gemini-2.5-pro\"\n    api_key env.GOOGLE_API_KEY\n    generationConfig {\n      thinkingConfig {\n          thinkingBudget 1024\n          includeThoughts true\n      }\n    }\n  }\n}\n\nclient<llm> GeminiOpenAiGeneric {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://generativelanguage.googleapis.com/v1beta/\"\n    model \"gemini-2.5-flash\"\n    api_key env.GOOGLE_API_KEY\n  }\n}\n\nclient<llm> Vertex {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> VertexWithQueryParams {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    project_id gloo-ai\n    query_params {\n      key env.VERTEX_API_KEY\n    }\n  }\n}\n\nclient<llm> VertexClaude {\n  provider vertex-ai\n  options {\n    model \"claude-3-5-sonnet@20240620\"\n    location us-east5\n    anthropic_version \"vertex-2023-10-16\"\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> AwsBedrock {\n  provider aws-bedrock\n  options {\n    inference_configuration {\n      max_tokens 2048\n    }\n    // max_tokens 100000\n    // max_completion_tokens 100000\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    //model \"arn:aws:bedrock:us-east-1:404337120808:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-east-1.amazonaws.com/\"\n    // region \"us-east-1\"\n    // access_key_id env.AWS_ACCESS_KEY_ID\n    // secret_access_key env.AWS_SECRET_ACCESS_KEY\n    // session_token env.AWS_SESSION_TOKEN\n    // session_token null\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidRegion {\n  provider aws-bedrock\n  options {\n    region \"us-invalid-7\"\n    inference_configuration {\n      max_tokens 100\n    }\n    // model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidEndpoint {\n  provider aws-bedrock\n  options {\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-doenotexist-3.amazonaws.com\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidAccessKey {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidProfile {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    profile \"boundaryml-dev-invalid\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidSessionToken {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\n\nclient<llm> Invalid{\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> Sonnet {\n  provider anthropic\n  options {\n    model claude-3-5-sonnet-20241022\n    api_key env.ANTHROPIC_API_KEY\n  }\n}\n\n\nclient<llm> SonnetThinking {\n  provider anthropic\n  options {\n    model \"claude-3-7-sonnet-20250219\"\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 2048\n    thinking {\n      type \"enabled\"\n      budget_tokens 1024\n    }\n  }\n}\n\nclient<llm> Claude {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> ClaudeWithCaching {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 500\n    allowed_role_metadata [\"cache_control\"]\n    headers {\n      \"anthropic-beta\" \"prompt-caching-2024-07-31\"\n    }\n  }\n}\n\nclient<llm> Resilient_SimpleSyntax {\n  retry_policy Foo\n  provider baml-fallback\n  options {\n    strategy [\n      GPT4Turbo\n      GPT35\n      Lottery_SimpleSyntax\n    ]\n  }\n}\n\nclient<llm> Lottery_SimpleSyntax {\n  provider baml-round-robin\n  options {\n    start 0\n    strategy [\n      Claude\n      GPT35\n    ]\n  }\n}\n\nclient<llm> TogetherAi {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://api.together.ai/v1\"\n    api_key env.TOGETHER_API_KEY\n    model \"meta-llama/Llama-3-70b-chat-hf\"\n  }\n}\n\n// OpenAI O1 client without max_tokens (should not add default)\nclient<llm> OpenAIO1 {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// OpenAI O1 client with explicit max_tokens (should fail)\nclient<llm> OpenAIO1WithMaxTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// OpenAI O1 client with explicit max_completion_tokens\nclient<llm> OpenAIO1WithMaxCompletionTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// OpenAI GPT-4 client with explicit max_tokens\nclient<llm> GPT4WithMaxTokens {\n  provider openai\n  options {\n    model \"gpt-4\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure O3 client without max_tokens (should not add default)\nclient<llm> AzureO3 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O3 client with explicit max_completion_tokens\nclient<llm> AzureO3WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n",
   "custom-task.baml": "class BookOrder {\n  orderId string @description(#\"\n    The ID of the book order\n  \"#)\n  title string @description(#\"\n    The title of the ordered book\n  \"#)\n  quantity int @description(#\"\n    The quantity of books ordered\n  \"#)\n  price float @description(#\"\n    The price of the book\n  \"#)\n}\n\nclass FlightConfirmation {\n  confirmationNumber string @description(#\"\n    The flight confirmation number\n  \"#)\n  flightNumber string @description(#\"\n    The flight number\n  \"#)\n  departureTime string @description(#\"\n    The scheduled departure time of the flight\n  \"#)\n  arrivalTime string @description(#\"\n    The scheduled arrival time of the flight\n  \"#)\n  seatNumber string @description(#\"\n    The seat number assigned on the flight\n  \"#)\n}\n\nclass GroceryReceipt {\n  receiptId string @description(#\"\n    The ID of the grocery receipt\n  \"#)\n  storeName string @description(#\"\n    The name of the grocery store\n  \"#)\n  items (string | int | float)[] @description(#\"\n    A list of items purchased. Each item consists of a name, quantity, and price.\n  \"#)\n  totalAmount float @description(#\"\n    The total amount spent on groceries\n  \"#)\n}\n \nclass CustomTaskResult {\n  bookOrder BookOrder | null\n  flightConfirmation FlightConfirmation | null\n  groceryReceipt GroceryReceipt | null\n}\n\nfunction CustomTask(input: string) -> BookOrder | FlightConfirmation | GroceryReceipt {\n  client \"openai/gpt-4o-mini\"\n  prompt #\"\n    Given the input string, extract either an order for a book, a flight confirmation, or a grocery receipt.\n\n    {{ ctx.output_format }}\n\n    Input:\n    \n    {{ input}}\n  \"#\n}\n\ntest CustomTask {\n  functions [CustomTask]\n  args {\n    input #\"\nDear [Your Name],\n\nThank you for booking with [Airline Name]! We are pleased to confirm your upcoming flight.\n\nFlight Confirmation Details:\n\nBooking Reference: ABC123\nPassenger Name: [Your Name]\nFlight Number: XY789\nDeparture Date: September 15, 2024\nDeparture Time: 10:30 AM\nArrival Time: 1:45 PM\nDeparture Airport: John F. Kennedy International Airport (JFK), New York, NY\nArrival Airport: Los Angeles International Airport (LAX), Los Angeles, CA\nSeat Number: 12A\nClass: Economy\nBaggage Allowance:\n\nChecked Baggage: 1 piece, up to 23 kg\nCarry-On Baggage: 1 piece, up to 7 kg\nImportant Information:\n\nPlease arrive at the airport at least 2 hours before your scheduled departure.\nCheck-in online via our website or mobile app to save time at the airport.\nEnsure that your identification documents are up to date and match the name on your booking.\nContact Us:\n\nIf you have any questions or need to make changes to your booking, please contact our customer service team at 1-800-123-4567 or email us at support@[airline].com.\n\nWe wish you a pleasant journey and thank you for choosing [Airline Name].\n\nBest regards,\n\n[Airline Name] Customer Service\n    \"#\n  }\n}",
   "fiddle-examples/audio/audio.baml": "function DescribeAudio(audio: audio) -> string {\n  client GPT4o\n  prompt #\"\n    Describe the audio below in 20 words:\n    {{ _.role(\"user\") }}\n    {{ audio }}\n  \"#\n\n}\n\n\n\n\n// chat role user present\nfunction DescribeAudio2(audio: audio) -> string {\n  client GPT4Turbo\n  prompt #\"\n    {{ _.role(\"user\") }}\n    You should return 1 answer that answer the following command.\n\n    Describe this in 5 words:\n    {{ audio }}\n  \"#\n}\n\ntest TestAudio {\n    functions [DescribeAudio]\n  args {\n    audio { url \"https://www.pacdv.com/sounds/voices/friday-rocks.wav\"}\n  }\n}\n\ntest TestAudio2 {\n  functions [DescribeAudio2]\n  args {\n    audio { file \"friday-rocks.wav\" }\n      }\n}\n",
   "fiddle-examples/chain-of-thought.baml": "class Email {\n    subject string\n    body string\n    from_address string\n}\n\nenum OrderStatus {\n    ORDERED\n    SHIPPED\n    DELIVERED\n    CANCELLED\n}\n\nclass OrderInfo {\n    order_status OrderStatus\n    tracking_number string?\n    estimated_arrival_date string?\n}\n\nfunction GetOrderInfo(email: Email) -> OrderInfo {\n  client GPT4\n  prompt #\"\n    Given the email below:\n\n    ```\n    from: {{email.from_address}}\n    Email Subject: {{email.subject}}\n    Email Body: {{email.body}}\n    ```\n\n    Extract this info from the email in JSON format:\n    {{ ctx.output_format }}\n\n    Before you output the JSON, please explain your\n    reasoning step-by-step. Here is an example on how to do this:\n    'If we think step by step we can see that ...\n     therefore the output JSON is:\n    {\n      ... the json schema ...\n    }'\n  \"#\n}",
@@ -120,7 +120,7 @@ const fileMap = {
   "test-files/providers/openai-responses-validation.baml": "// OpenAI Responses Provider Validation Tests\n// These tests validate that the openai-responses provider is properly configured\n\n// Test 1: Basic provider recognition\n// This should parse successfully once openai-responses is available\nclient<llm> ValidateOpenAIResponses {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n  }\n}\n\n// Test 2: Valid client_response_type values for openai-responses\nclient<llm> ValidateResponseTypeOpenAI {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    client_response_type \"openai\"\n  }\n}\n\nclient<llm> ValidateResponseTypeOpenAIResponses {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    client_response_type \"openai-responses\"\n  }\n}\n\nclient<llm> ValidateResponseTypeAnthropic {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    client_response_type \"anthropic\"\n  }\n}\n\n// Test 3: Provider should be in allowed list\n// This will validate that \"openai-responses\" is included in ClientProvider::allowed_providers()\n\n// Test 4: Default base URL should be correct\nclient<llm> ValidateDefaultBaseURL {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    // Should default to https://api.openai.com/v1\n  }\n}\n\n// Test 5: Custom base URL should work\nclient<llm> ValidateCustomBaseURL {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    base_url \"https://custom.openai.com/v1\"\n  }\n}\n\n// Simple test functions to validate the clients work\nfunction ValidateBasicResponses(input: string) -> string {\n  client ValidateOpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Say \"success\" if you can read this: {{ input }}\n  \"#\n}\n\nfunction ValidateResponseTypes(input: string) -> string {\n  client ValidateResponseTypeOpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Respond with \"response-type-works\" for: {{ input }}\n  \"#\n}\n\n// Validation test suite\ntest ValidateOpenAIResponsesProvider {\n  functions [\n    ValidateBasicResponses,\n    ValidateResponseTypes\n  ]\n  args {\n    input \"test\"\n  }\n}",
   "test-files/providers/openai-responses.baml": "// OpenAI Responses API Provider Tests\n// Tests the new openai-responses provider that uses the OpenAI Responses API\n\n// Basic OpenAI Responses client\nclient<llm> OpenAIResponses {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n  }\n}\n\n// OpenAI Responses client with explicit response type\nclient<llm> OpenAIResponsesExplicit {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    client_response_type \"openai-responses\"\n  }\n}\n\n// OpenAI Responses client with custom base URL (for testing)\nclient<llm> OpenAIResponsesCustomURL {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    base_url \"https://api.openai.com/v1\"\n  }\n}\n\n// Test basic functionality with responses API\nfunction TestOpenAIResponses(input: string) -> string {\n  client OpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a short haiku about {{ input }}. Make it simple and beautiful.\n  \"#\n}\n\n// Test with explicit response type configuration\nfunction TestOpenAIResponsesExplicit(input: string) -> string {\n  client OpenAIResponsesExplicit\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Create a brief poem about {{ input }}. Keep it under 50 words.\n  \"#\n}\n\n// Test with custom base URL\nfunction TestOpenAIResponsesCustomURL(input: string) -> string {\n  client OpenAIResponsesCustomURL\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Tell me an interesting fact about {{ input }}.\n  \"#\n}\n\n// Test with multi-turn conversation\nfunction TestOpenAIResponsesConversation(topic: string) -> string {\n  client OpenAIResponses\n  prompt #\"\n    {{ _.role(\"system\") }}\n    You are a helpful assistant that provides concise answers.\n    \n    {{ _.role(\"user\") }}\n    What is {{ topic }}?\n    \n    {{ _.role(\"assistant\") }}\n    {{ topic }} is a fascinating subject. Let me explain briefly.\n    \n    {{ _.role(\"user\") }}\n    Can you give me a simple example?\n  \"#\n}\n\n// Test with different model parameter\nclient<llm> OpenAIResponsesGPT4 {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4\"\n  }\n}\n\nfunction TestOpenAIResponsesDifferentModel(input: string) -> string {\n  client OpenAIResponsesGPT4\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Explain {{ input }} in one sentence.\n  \"#\n}\n\n// Test error handling with invalid configuration\nclient<llm> OpenAIResponsesInvalidResponseType {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    // This should work since openai response type is valid for responses provider\n    client_response_type \"openai\"\n  }\n}\n\nfunction TestOpenAIResponsesWithOpenAIResponseType(input: string) -> string {\n  client OpenAIResponsesInvalidResponseType\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write about {{ input }}.\n  \"#\n}\n\n// Comprehensive test suite for OpenAI Responses\ntest TestOpenAIResponsesProviders {\n  functions [\n    TestOpenAIResponses,\n    TestOpenAIResponsesExplicit,\n    TestOpenAIResponsesCustomURL,\n    TestOpenAIResponsesConversation,\n    TestOpenAIResponsesDifferentModel,\n    TestOpenAIResponsesWithOpenAIResponseType\n  ]\n  args {\n    input \"mountains\"\n    topic \"machine learning\"\n  }\n}\n\n// Test shorthand syntax (this should work but use standard openai, not responses)\nfunction TestOpenAIResponsesShorthand(input: string) -> string {\n  client \"openai/gpt-5-mini\"\n  prompt #\"\n    {{ _.role(\"user\") }}\n    What do you think about {{ input }}?\n  \"#\n}\n\n// Test to ensure the provider correctly routes to /v1/responses endpoint\n// This is validated by the implementation, not by the test execution\nfunction TestOpenAIResponsesEndpoint(input: string) -> string {\n  client OpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    This request should go to /v1/responses endpoint, not /v1/chat/completions.\n    Respond with a short message about {{ input }}.\n  \"#\n}\n\n// Test that demonstrates automatic response type selection\nclient<llm> OpenAIResponsesAutoType {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    // No explicit client_response_type - should automatically use openai-responses\n  }\n}\n\nfunction TestOpenAIResponsesAutoType(input: string) -> string {\n  client OpenAIResponsesAutoType\n  prompt #\"\n    {{ _.role(\"user\") }}\n    This client should automatically use openai-responses response type.\n    Write a short description of {{ input }}.\n  \"#\n}\n\n// Additional test for validation\ntest TestOpenAIResponsesValidation {\n  functions [\n    TestOpenAIResponsesShorthand,\n    TestOpenAIResponsesEndpoint,\n    TestOpenAIResponsesAutoType,\n    TestOpenAIResponsesExplicit,\n    TestOpenAIProviderWithResponsesType\n  ]\n  args {\n    input \"artificial intelligence\"\n  }\n}\n\n// Test image input/output with OpenAI Responses API\nclient<llm> OpenAIResponsesImage {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5\"\n  }\n}\n\nfunction TestOpenAIResponsesImageInput(image: image | string | pdf | audio) -> string {\n  client OpenAIResponsesImage\n  prompt #\"\n    {{ _.role(\"user\") }}\n    what is in this content?\n    {{ image }}\n  \"#\n}\n\n// Test for image analysis\ntest TestOpenAIResponsesImageAnalysis {\n  functions [\n    TestOpenAIResponsesImageInput\n  ]\n  args {\n    image \"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg\"\n  }\n}\n\n// Test web search with OpenAI Responses API\nclient<llm> OpenAIResponsesWebSearch {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    tools [\n      {\n        type \"web_search_preview\"\n      }\n    ]\n  }\n}\n\nfunction TestOpenAIResponsesWebSearch(query: string) -> string {\n  client OpenAIResponsesWebSearch\n  prompt #\"\n    {{ _.role(\"user\") }}\n    {{ query }}\n  \"#\n}\n\n// Test for web search functionality\ntest TestOpenAIResponsesWebSearchTest {\n  functions [\n    TestOpenAIResponsesWebSearch\n  ]\n  args {\n    query \"What was a positive news story from today?\"\n  }\n}\n\n\n// Test function calling with OpenAI Responses API\nclient<llm> OpenAIResponsesFunctionCall {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    tools [\n      {\n        type \"function\"\n        name \"get_current_weather\"\n        description \"Get the current weather in a given location\"\n        parameters {\n          type \"object\"\n          properties {\n            location {\n              type \"string\"\n              description \"The city and state, e.g. San Francisco, CA\"\n            }\n            unit {\n              type \"string\"\n              enum [\"celsius\", \"fahrenheit\"]\n            }\n          }\n          required [\"location\", \"unit\"]\n        }\n      }\n    ]\n    tool_choice \"auto\"\n  }\n}\n\nfunction TestOpenAIResponsesFunctionCall(query: string) -> string {\n  client OpenAIResponsesFunctionCall\n  prompt #\"\n    {{ _.role(\"user\") }}\n    {{ query }}\n  \"#\n}\n\n// Test for function calling\ntest TestOpenAIResponsesFunctionCallTest {\n  functions [\n    TestOpenAIResponsesFunctionCall\n  ]\n  args {\n    query \"What is the weather like in Boston today?\"\n  }\n}\n\n// Test using standard openai provider with openai-responses client_response_type\nclient<llm> OpenAIWithResponsesType {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    client_response_type \"openai-responses\"\n  }\n}\n\nfunction TestOpenAIProviderWithResponsesType(input: string) -> string {\n  client OpenAIWithResponsesType\n  prompt #\"\n    {{ _.role(\"user\") }}\n    This uses the openai provider but with openai-responses client_response_type.\n    Write a short summary about {{ input }}.\n  \"#\n}\n\n// Test reasoning with OpenAI Responses API\nclient<llm> OpenAIResponsesReasoning {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5\"\n    reasoning{\n      effort \"high\"\n    }\n  }\n}\n\nfunction TestOpenAIResponsesReasoning(problem: string) -> string {\n  client OpenAIResponsesReasoning\n  prompt #\"\n    {{ _.role(\"user\") }}\n    {{ problem }}\n  \"#\n}\n\n// Test for reasoning capability\ntest TestOpenAIResponsesReasoningTest {\n  functions [\n    TestOpenAIResponsesReasoning\n  ]\n  args {\n    problem \"Solve this step by step: If a train travels at 60 mph for 2.5 hours, then at 80 mph for 1.5 hours, what is the total distance traveled?\"\n  }\n}\n\nclient<llm> Gpt5 {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5\"\n  }\n}\n\n\nfunction TestOpenAIResponsesAllRoles(problem: string) -> string {\n  client Gpt5\n  prompt #\"\n    {{ _.role(\"system\") }}\n    Hi\n    {{ _.role(\"developer\") }}\n    Hi\n    {{ _.role(\"assistant\") }}\n    Hi\n    {{ _.role(\"user\") }}\n    {{ problem }}\n  \"#\n}\n\n\nfunction TestOpenaiResponsesPdfs(pdf: pdf) -> string {\n  client Gpt5\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Summarize in one sentence the contents of this:\n    {{ pdf }}\n  \"#\n} \n\ntest TestOpenaiResponsesPdfsTest {\n  functions [\n    TestOpenaiResponsesPdfs\n  ]\n  args {\n    pdf { url \"https://www.berkshirehathaway.com/letters/2024ltr.pdf\" }\n  }\n}\n\ntest TestOpenaiResponsesPdfsTestFile {\n  functions [\n    TestOpenaiResponsesPdfs\n  ]\n  args {\n    pdf { file \"../../dummy.pdf\" }\n  }\n}\n\n\ntest TestOpenAIResponsesAllRolesTest {\n  functions [\n    TestOpenAIResponsesAllRoles\n  ]\n  args {\n    problem \"What is the weather like in Boston today?\"\n  }\n}",
   "test-files/providers/openai-with-anthropic-response.baml": "client<llm> OpenAIWithAnthropicResponse {\n  provider openai-responses\n  options {\n    model \"gpt-4o\"\n    client_response_type \"openai-responses\"\n    base_url \"http://localhost:8000\"\n  }\n}\n\nfunction OpenAIWithAnthropicResponseHello(s: string) -> string {\n  client OpenAIWithAnthropicResponse\n  prompt #\"\n    Return the string \"Hello, world!\" with {{ s }} included in the response.\n    {{ _.role(\"user\") }}\n  \"#\n}\n\ntest TestOpenAIWithAnthropicResponse {\n  functions [\n    OpenAIWithAnthropicResponseHello\n  ]\n  args {\n    s \"Cherry blossoms\"\n  }\n}",
-  "test-files/providers/openai.baml": "function PromptTestOpenAI(input: string) -> string {\n  client GPT35\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAILegacyProvider(input: string) -> string {\n  client GPT35LegacyProvider\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIShorthand(input: string) -> string {\n  client \"openai/gpt-4o\"\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n\n\n\n// Test standard GPT-4 (should add default max_tokens)\nfunction TestOpenAI(input: string) -> string {\n  client GPT4\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model without max_tokens (should not add default)\nfunction TestOpenAIO1NoMaxTokens(input: string) -> string {\n  client OpenAIO1\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_tokens (should fail)\nfunction TestOpenAIO1WithMaxTokens(input: string) -> string {\n  client OpenAIO1WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_completion_tokens\nfunction TestOpenAIO1WithMaxCompletionTokens(input: string) -> string {\n  client OpenAIO1WithMaxCompletionTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test GPT-4 with explicit max_tokens (should keep user value)\nfunction TestOpenAIWithMaxTokens(input: string) -> string {\n  client GPT4WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test OpenAI with null max_tokens (should not add default)\nfunction TestOpenAIWithNullMaxTokens(input: string) -> string {\n  client OpenAIWithNullMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nclient GPT4oMini {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o-mini\"\n  }\n}\n\nfunction TestOpenAIGPT4oMini(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n    {{ input }}\n  \"#\n}\nfunction TestOpenAIGPT4oMini2(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction TestOpenAIGPT4oMini3(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction OpenAIGPT4oMissingBaseUrlEnvVar(input: string) -> string {\n  client GPT4oBaseUrlNotSet\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\n// Add test cases to verify the behavior\ntest TestOpenAIClients {\n  functions [\n    TestOpenAI,\n    TestOpenAIO1NoMaxTokens,\n    TestOpenAIO1WithMaxTokens,\n    TestOpenAIWithMaxTokens,\n    TestOpenAIO1WithMaxCompletionTokens,\n    TestOpenAIWithNullMaxTokens\n  ]\n  args {\n    input #\"\n    Cherry blossoms\n    \"#\n  }\n}\n\nclient<llm> OpenAIWithNullMaxTokens {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o\"\n    max_tokens null\n  }\n}",
+  "test-files/providers/openai.baml": "function PromptTestOpenAI(input: string) -> string {\n  client GPT35\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAILegacyProvider(input: string) -> string {\n  client GPT35LegacyProvider\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIShorthand(input: string) -> string {\n  client \"openai/gpt-4o\"\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n\n\n\n// Test standard GPT-4 (should add default max_tokens)\nfunction TestOpenAI(input: string) -> string {\n  client GPT4\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIConcurrencyClientEnvBaseUrl(input: string) -> string {\n  client OpenAIConcurrencyTestClientEnvBaseUrl\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIConcurrencyClientHardocodedBaseUrl(input: string) -> string {\n  client OpenAIConcurrencyTestClientHardocodedBaseUrl\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model without max_tokens (should not add default)\nfunction TestOpenAIO1NoMaxTokens(input: string) -> string {\n  client OpenAIO1\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_tokens (should fail)\nfunction TestOpenAIO1WithMaxTokens(input: string) -> string {\n  client OpenAIO1WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_completion_tokens\nfunction TestOpenAIO1WithMaxCompletionTokens(input: string) -> string {\n  client OpenAIO1WithMaxCompletionTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test GPT-4 with explicit max_tokens (should keep user value)\nfunction TestOpenAIWithMaxTokens(input: string) -> string {\n  client GPT4WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test OpenAI with null max_tokens (should not add default)\nfunction TestOpenAIWithNullMaxTokens(input: string) -> string {\n  client OpenAIWithNullMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nclient GPT4oMini {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o-mini\"\n  }\n}\n\nfunction TestOpenAIGPT4oMini(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n    {{ input }}\n  \"#\n}\nfunction TestOpenAIGPT4oMini2(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction TestOpenAIGPT4oMini3(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction OpenAIGPT4oMissingBaseUrlEnvVar(input: string) -> string {\n  client GPT4oBaseUrlNotSet\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\n// Add test cases to verify the behavior\ntest TestOpenAIClients {\n  functions [\n    TestOpenAI,\n    TestOpenAIO1NoMaxTokens,\n    TestOpenAIO1WithMaxTokens,\n    TestOpenAIWithMaxTokens,\n    TestOpenAIO1WithMaxCompletionTokens,\n    TestOpenAIWithNullMaxTokens\n  ]\n  args {\n    input #\"\n    Cherry blossoms\n    \"#\n  }\n}\n\nclient<llm> OpenAIWithNullMaxTokens {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o\"\n    max_tokens null\n  }\n}",
   "test-files/providers/openrouter.baml": "function TestOpenRouterMistralSmall3_1_24b(input: string) -> string {\n  client OpenRouterMistralSmall3_1_24b\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n \n \ntest TestName {\n  functions [TestOpenRouterMistralSmall3_1_24b]\n  args {\n    input #\"\n      hello world\n    \"#\n  }\n}\n  \n \n\nclient<llm> OpenRouterMistralSmall3_1_24b {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://openrouter.ai/api/v1\"\n    api_key env.OPENROUTER_API_KEY\n    model \"mistralai/mistral-small-3.1-24b-instruct\"\n    temperature 0.1\n    headers {\n      \"HTTP-Referer\" \"https://me.com\" // Optional\n      \"X-Title\" \"me\" // Optional\n    }\n  }\n}",
   "test-files/providers/strategy.baml": "function TestFallbackStrategy(input: string) -> string {\n  client Resilient_SimpleSyntax\n  prompt #\"\n    {{ _.role('system') }}\n    You are a helpful assistant.\n\n    {{ _.role('user') }}\n    Write a nice short story about {{ input }}\n  \"#\n}\n\nfunction TestRoundRobinStrategy(input: string) -> string {\n  client Lottery_SimpleSyntax\n  prompt #\"\n    {{ _.role('system') }}\n    You are a helpful assistant.\n\n    {{ _.role('user') }}\n    Write a nice short story about {{ input }}\n  \"#\n}\n",
   "test-files/providers/tests.baml": "test TestOpenAIShorthand {\n  functions [TestOpenAIShorthand]\n  args {\n    input \"Donkey kong and peanut butter\"\n  }\n}\n\ntest TestAWS {\n  functions [\n    TestAws\n  ]\n  args {\n    input \"Donkey kong and peanut butter\"\n  }\n}\n\ntest TestProvider {\n  functions [\n    TestAnthropic, TestVertex, PromptTestOpenAI, TestAzure, TestOllama, TestGemini, TestGeminiThinking, TestAws,\n    TestAwsInvalidRegion,\n    TestOpenAIShorthand,\n    TestAnthropicShorthand,\n    TestAwsInvalidAccessKey,\n    TestAwsInvalidProfile,\n    TestAwsInvalidSessionToken\n  ]\n  args {\n    input \"Donkey kong and peanut butter\"\n  }\n}\n\ntest TestName {\n  functions [TestCaching]\n  args {\n    input #\"\nIn a near-future society where dreams have become a tradable commodity and shared experience, a lonely and socially awkward teenager named Alex discovers they possess a rare and powerful ability to not only view but also manipulate the dreams of others. Initially thrilled by this newfound power, Alex begins subtly altering the dreams of classmates and family members, helping them overcome fears, boost confidence, or experience fantastical adventures. As Alex's skills grow, so does their influence. They start selling premium dream experiences on the black market, crafting intricate and addictive dreamscapes for wealthy clients. However, the line between dream and reality begins to blur for those exposed to Alex's creations. Some clients struggle to differentiate between their true memories and the artificial ones implanted by Alex's dream manipulation.\n\nComplications arise when a mysterious government agency takes notice of Alex's unique abilities. They offer Alex a chance to use their gift for \"the greater good,\" hinting at applications in therapy, criminal rehabilitation, and even national security. Simultaneously, an underground resistance movement reaches out, warning Alex about the dangers of dream manipulation and the potential for mass control and exploitation. Caught between these opposing forces, Alex must navigate a complex web of ethical dilemmas. They grapple with questions of free will, the nature of consciousness, and the responsibility that comes with having power over people's minds. As the consequences of their actions spiral outward, affecting the lives of loved ones and strangers alike, Alex is forced to confront the true nature of their ability and decide how—or if—it should be used.\n\nThe story explores themes of identity, the subconscious mind, the ethics of technology, and the power of imagination. It delves into the potential consequences of a world where our most private thoughts and experiences are no longer truly our own, and examines the fine line between helping others and manipulating them for personal gain or a perceived greater good. The narrative further expands on the societal implications of such abilities, questioning the moral boundaries of altering consciousness and the potential for abuse in a world where dreams can be commodified. It challenges the reader to consider the impact of technology on personal autonomy and the ethical responsibilities of those who wield such power.\n\nAs Alex's journey unfolds, they encounter various individuals whose lives have been touched by their dream manipulations, each presenting a unique perspective on the ethical quandaries at hand. From a classmate who gains newfound confidence to a wealthy client who becomes addicted to the dreamscapes, the ripple effects of Alex's actions are profound and far-reaching. The government agency's interest in Alex's abilities raises questions about the potential for state control and surveillance, while the resistance movement highlights the dangers of unchecked power and the importance of safeguarding individual freedoms.\n\nUltimately, Alex's story is one of self-discovery and moral reckoning, as they must decide whether to embrace their abilities for personal gain, align with the government's vision of a controlled utopia, or join the resistance in their fight for freedom and autonomy. The narrative invites readers to reflect on the nature of reality, the boundaries of human experience, and the ethical implications of a world where dreams are no longer private sanctuaries but shared and manipulated commodities. It also explores the psychological impact on Alex, who must deal with the burden of knowing the intimate fears and desires of others, and the isolation that comes from being unable to share their own dreams without altering them.\n\nThe story further examines the technological advancements that have made dream manipulation possible, questioning the role of innovation in society and the potential for both progress and peril. It considers the societal divide between those who can afford to buy enhanced dream experiences and those who cannot, highlighting issues of inequality and access. As Alex becomes more entangled in the web of their own making, they must confront the possibility that their actions could lead to unintended consequences, not just for themselves but for the fabric of society as a whole.\n\nIn the end, Alex's journey is a cautionary tale about the power of dreams and the responsibilities that come with wielding such influence. It serves as a reminder of the importance of ethical considerations in the face of technological advancement and the need to balance innovation with humanity. The story leaves readers pondering the true cost of a world where dreams are no longer sacred, and the potential for both wonder and danger in the uncharted territories of the mind. But it's also a story about the power of imagination and the potential for change, even in a world where our deepest thoughts are no longer our own. And it's a story about the power of choice, and the importance of fighting for the freedom to dream.\n\nIn conclusion, this story is a reflection on the power of dreams and the responsibilities that come with wielding such influence. It serves as a reminder of the importance of ethical considerations in the face of technological advancement and the need to balance innovation with humanity. The story leaves readers pondering the true cost of a world where dreams are no longer sacred, and the potential for both wonder and danger in the uncharted territories of the mind. But it's also a story about the power of imagination and the potential for change, even in a world where our deepest thoughts are no longer our own. And it's a story about the power of choice, and the importance of fighting for the freedom to dream.\n    \"#\n    not_cached #\"\n      hello world\n    \"#\n  }\n}",
diff --git a/integ-tests/typescript-esm/baml_client/parser.ts b/integ-tests/typescript-esm/baml_client/parser.ts
index b51f0ae3f2..421925b6a2 100644
--- a/integ-tests/typescript-esm/baml_client/parser.ts
+++ b/integ-tests/typescript-esm/baml_client/parser.ts
@@ -4170,6 +4170,52 @@ export class LlmResponseParser {
     }
   }
   
+  TestOpenAIConcurrencyClientEnvBaseUrl(
+      llmResponse: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry, env?: Record<string, string | undefined> }
+  ): string {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.parseLlmResponse(
+        "TestOpenAIConcurrencyClientEnvBaseUrl",
+        llmResponse,
+        false,
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        env,
+      ) as string
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
+  TestOpenAIConcurrencyClientHardocodedBaseUrl(
+      llmResponse: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry, env?: Record<string, string | undefined> }
+  ): string {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.parseLlmResponse(
+        "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+        llmResponse,
+        false,
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        env,
+      ) as string
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestOpenAIDummyClient(
       llmResponse: string,
       __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry, env?: Record<string, string | undefined> }
@@ -10064,6 +10110,52 @@ export class LlmStreamParser {
     }
   }
   
+  TestOpenAIConcurrencyClientEnvBaseUrl(
+      llmResponse: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry, env?: Record<string, string | undefined> }
+  ): string {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.parseLlmResponse(
+        "TestOpenAIConcurrencyClientEnvBaseUrl",
+        llmResponse,
+        true,
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        env,
+      ) as string
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
+  TestOpenAIConcurrencyClientHardocodedBaseUrl(
+      llmResponse: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry, env?: Record<string, string | undefined> }
+  ): string {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.parseLlmResponse(
+        "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+        llmResponse,
+        true,
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        env,
+      ) as string
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestOpenAIDummyClient(
       llmResponse: string,
       __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry, env?: Record<string, string | undefined> }
diff --git a/integ-tests/typescript-esm/baml_client/sync_client.ts b/integ-tests/typescript-esm/baml_client/sync_client.ts
index 23fcaf42c1..becbf622ac 100644
--- a/integ-tests/typescript-esm/baml_client/sync_client.ts
+++ b/integ-tests/typescript-esm/baml_client/sync_client.ts
@@ -7656,6 +7656,90 @@ export class BamlSyncClient {
     }
   }
   
+  TestOpenAIConcurrencyClientEnvBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+  ): string {
+    try {
+      const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+      const signal = options.signal;
+
+      if (signal?.aborted) {
+        throw new BamlAbortError('Operation was aborted', signal.reason);
+      }
+
+      // Check if onTick is provided and reject for sync operations
+      if (options.onTick) {
+        throw new Error("onTick is not supported for synchronous functions. Please use the async client instead.");
+      }
+
+      const collector = options.collector ? (Array.isArray(options.collector) ? options.collector : [options.collector]) : [];
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      const raw = this.runtime.callFunctionSync(
+        "TestOpenAIConcurrencyClientEnvBaseUrl",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        options.tb?.__tb(),
+        options.clientRegistry,
+        collector,
+        options.tags || {},
+        env,
+        signal,
+        options.events,
+      )
+      return raw.parsed(false) as string
+    } catch (error: any) {
+      throw toBamlError(error);
+    }
+  }
+  
+  TestOpenAIConcurrencyClientHardocodedBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+  ): string {
+    try {
+      const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+      const signal = options.signal;
+
+      if (signal?.aborted) {
+        throw new BamlAbortError('Operation was aborted', signal.reason);
+      }
+
+      // Check if onTick is provided and reject for sync operations
+      if (options.onTick) {
+        throw new Error("onTick is not supported for synchronous functions. Please use the async client instead.");
+      }
+
+      const collector = options.collector ? (Array.isArray(options.collector) ? options.collector : [options.collector]) : [];
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      const raw = this.runtime.callFunctionSync(
+        "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        options.tb?.__tb(),
+        options.clientRegistry,
+        collector,
+        options.tags || {},
+        env,
+        signal,
+        options.events,
+      )
+      return raw.parsed(false) as string
+    } catch (error: any) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestOpenAIDummyClient(
       input: string,
       __baml_options__?: BamlCallOptions<never>
diff --git a/integ-tests/typescript-esm/baml_client/sync_request.ts b/integ-tests/typescript-esm/baml_client/sync_request.ts
index e2e6fb6cba..aba3bce3cc 100644
--- a/integ-tests/typescript-esm/baml_client/sync_request.ts
+++ b/integ-tests/typescript-esm/baml_client/sync_request.ts
@@ -4537,6 +4537,56 @@ export class HttpRequest {
     }
   }
   
+  TestOpenAIConcurrencyClientEnvBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+  ): HTTPRequest {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.buildRequestSync(
+        "TestOpenAIConcurrencyClientEnvBaseUrl",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        false,
+        env,
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
+  TestOpenAIConcurrencyClientHardocodedBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+  ): HTTPRequest {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.buildRequestSync(
+        "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        false,
+        env,
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestOpenAIDummyClient(
       input: string,
       __baml_options__?: BamlCallOptions<never>
@@ -10943,6 +10993,56 @@ export class HttpStreamRequest {
     }
   }
   
+  TestOpenAIConcurrencyClientEnvBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+  ): HTTPRequest {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.buildRequestSync(
+        "TestOpenAIConcurrencyClientEnvBaseUrl",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        true,
+        env,
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
+  TestOpenAIConcurrencyClientHardocodedBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+  ): HTTPRequest {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.buildRequestSync(
+        "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        true,
+        env,
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestOpenAIDummyClient(
       input: string,
       __baml_options__?: BamlCallOptions<never>
diff --git a/integ-tests/typescript/baml_client/async_client.ts b/integ-tests/typescript/baml_client/async_client.ts
index b96a94c533..259a0f169a 100644
--- a/integ-tests/typescript/baml_client/async_client.ts
+++ b/integ-tests/typescript/baml_client/async_client.ts
@@ -8736,6 +8736,102 @@ export type RecursivePartialNull<T> = MovedRecursivePartialNull<T>
             }
             }
             
+        async TestOpenAIConcurrencyClientEnvBaseUrl(
+        input: string,
+        __baml_options__?: BamlCallOptions<never>
+        ): Promise<string> {
+          try {
+          const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+          const signal = options.signal;
+
+          if (signal?.aborted) {
+          throw new BamlAbortError('Operation was aborted', signal.reason);
+          }
+
+          // Check if onTick is provided - route through streaming if so
+          if (options.onTick) {
+          const stream = this.stream.TestOpenAIConcurrencyClientEnvBaseUrl(
+          input,
+          __baml_options__
+          );
+
+          return await stream.getFinalResponse();
+          }
+
+          const collector = options.collector ? (Array.isArray(options.collector) ? options.collector :
+          [options.collector]) : [];
+          const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+          const env: Record<string, string> = Object.fromEntries(
+            Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+            );
+            const raw = await this.runtime.callFunction(
+            "TestOpenAIConcurrencyClientEnvBaseUrl",
+            {
+            "input": input
+            },
+            this.ctxManager.cloneContext(),
+            options.tb?.__tb(),
+            options.clientRegistry,
+            collector,
+            options.tags || {},
+            env,
+            signal,
+            options.events,
+            )
+            return raw.parsed(false) as string
+            } catch (error) {
+            throw toBamlError(error);
+            }
+            }
+            
+        async TestOpenAIConcurrencyClientHardocodedBaseUrl(
+        input: string,
+        __baml_options__?: BamlCallOptions<never>
+        ): Promise<string> {
+          try {
+          const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+          const signal = options.signal;
+
+          if (signal?.aborted) {
+          throw new BamlAbortError('Operation was aborted', signal.reason);
+          }
+
+          // Check if onTick is provided - route through streaming if so
+          if (options.onTick) {
+          const stream = this.stream.TestOpenAIConcurrencyClientHardocodedBaseUrl(
+          input,
+          __baml_options__
+          );
+
+          return await stream.getFinalResponse();
+          }
+
+          const collector = options.collector ? (Array.isArray(options.collector) ? options.collector :
+          [options.collector]) : [];
+          const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+          const env: Record<string, string> = Object.fromEntries(
+            Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+            );
+            const raw = await this.runtime.callFunction(
+            "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+            {
+            "input": input
+            },
+            this.ctxManager.cloneContext(),
+            options.tb?.__tb(),
+            options.clientRegistry,
+            collector,
+            options.tags || {},
+            env,
+            signal,
+            options.events,
+            )
+            return raw.parsed(false) as string
+            } catch (error) {
+            throw toBamlError(error);
+            }
+            }
+            
         async TestOpenAIDummyClient(
         input: string,
         __baml_options__?: BamlCallOptions<never>
@@ -24278,6 +24374,138 @@ export type RecursivePartialNull<T> = MovedRecursivePartialNull<T>
                   }
                   }
                   
+            TestOpenAIConcurrencyClientEnvBaseUrl(
+            input: string,
+            __baml_options__?: BamlCallOptions<never>
+            ): BamlStream<string, string>
+              {
+              try {
+              const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+              const signal = options.signal;
+
+              if (signal?.aborted) {
+              throw new BamlAbortError('Operation was aborted', signal.reason);
+              }
+
+              let collector = options.collector ? (Array.isArray(options.collector) ? options.collector :
+              [options.collector]) : [];
+
+              let onTickWrapper: (() => void) | undefined;
+
+              // Create collector and wrap onTick if provided
+              if (options.onTick) {
+              const tickCollector = new Collector("on-tick-collector");
+              collector = [...collector, tickCollector];
+
+              onTickWrapper = () => {
+              const log = tickCollector.last;
+              if (log) {
+              try {
+              options.onTick!("Unknown", log);
+              } catch (error) {
+              console.error("Error in onTick callback for TestOpenAIConcurrencyClientEnvBaseUrl", error);
+              }
+              }
+              };
+              }
+
+              const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+              const env: Record<string, string> = Object.fromEntries(
+                Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+                );
+                const raw = this.runtime.streamFunction(
+                "TestOpenAIConcurrencyClientEnvBaseUrl",
+                {
+                "input": input
+                },
+                undefined,
+                this.ctxManager.cloneContext(),
+                options.tb?.__tb(),
+                options.clientRegistry,
+                collector,
+                options.tags || {},
+                env,
+                signal,
+                onTickWrapper,
+                )
+                return new BamlStream<string, string>(
+                  raw,
+                  (a): string => a,
+                  (a): string => a,
+                  this.ctxManager.cloneContext(),
+                  options.signal,
+                  )
+                  } catch (error) {
+                  throw toBamlError(error);
+                  }
+                  }
+                  
+            TestOpenAIConcurrencyClientHardocodedBaseUrl(
+            input: string,
+            __baml_options__?: BamlCallOptions<never>
+            ): BamlStream<string, string>
+              {
+              try {
+              const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+              const signal = options.signal;
+
+              if (signal?.aborted) {
+              throw new BamlAbortError('Operation was aborted', signal.reason);
+              }
+
+              let collector = options.collector ? (Array.isArray(options.collector) ? options.collector :
+              [options.collector]) : [];
+
+              let onTickWrapper: (() => void) | undefined;
+
+              // Create collector and wrap onTick if provided
+              if (options.onTick) {
+              const tickCollector = new Collector("on-tick-collector");
+              collector = [...collector, tickCollector];
+
+              onTickWrapper = () => {
+              const log = tickCollector.last;
+              if (log) {
+              try {
+              options.onTick!("Unknown", log);
+              } catch (error) {
+              console.error("Error in onTick callback for TestOpenAIConcurrencyClientHardocodedBaseUrl", error);
+              }
+              }
+              };
+              }
+
+              const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+              const env: Record<string, string> = Object.fromEntries(
+                Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+                );
+                const raw = this.runtime.streamFunction(
+                "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+                {
+                "input": input
+                },
+                undefined,
+                this.ctxManager.cloneContext(),
+                options.tb?.__tb(),
+                options.clientRegistry,
+                collector,
+                options.tags || {},
+                env,
+                signal,
+                onTickWrapper,
+                )
+                return new BamlStream<string, string>(
+                  raw,
+                  (a): string => a,
+                  (a): string => a,
+                  this.ctxManager.cloneContext(),
+                  options.signal,
+                  )
+                  } catch (error) {
+                  throw toBamlError(error);
+                  }
+                  }
+                  
             TestOpenAIDummyClient(
             input: string,
             __baml_options__?: BamlCallOptions<never>
diff --git a/integ-tests/typescript/baml_client/async_request.ts b/integ-tests/typescript/baml_client/async_request.ts
index a04b4ec23c..d0e6619f92 100644
--- a/integ-tests/typescript/baml_client/async_request.ts
+++ b/integ-tests/typescript/baml_client/async_request.ts
@@ -4541,6 +4541,56 @@ env?: Record<string, string | undefined>
       }
       }
       
+  async TestOpenAIConcurrencyClientEnvBaseUrl(
+  input: string,
+  __baml_options__?: BamlCallOptions<never>
+  ): Promise<HTTPRequest> {
+    try {
+    const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+    const env: Record<string, string> = Object.fromEntries(
+      Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return await this.runtime.buildRequest(
+      "TestOpenAIConcurrencyClientEnvBaseUrl",
+      {
+      "input": input
+      },
+      this.ctxManager.cloneContext(),
+      __baml_options__?.tb?.__tb(),
+      __baml_options__?.clientRegistry,
+      false,
+      env
+      )
+      } catch (error) {
+      throw toBamlError(error);
+      }
+      }
+      
+  async TestOpenAIConcurrencyClientHardocodedBaseUrl(
+  input: string,
+  __baml_options__?: BamlCallOptions<never>
+  ): Promise<HTTPRequest> {
+    try {
+    const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+    const env: Record<string, string> = Object.fromEntries(
+      Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return await this.runtime.buildRequest(
+      "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+      {
+      "input": input
+      },
+      this.ctxManager.cloneContext(),
+      __baml_options__?.tb?.__tb(),
+      __baml_options__?.clientRegistry,
+      false,
+      env
+      )
+      } catch (error) {
+      throw toBamlError(error);
+      }
+      }
+      
   async TestOpenAIDummyClient(
   input: string,
   __baml_options__?: BamlCallOptions<never>
@@ -10947,6 +10997,56 @@ env?: Record<string, string | undefined>
           }
           }
           
+      async TestOpenAIConcurrencyClientEnvBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+      ): Promise<HTTPRequest> {
+        try {
+        const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+        const env: Record<string, string> = Object.fromEntries(
+          Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+          );
+          return await this.runtime.buildRequest(
+          "TestOpenAIConcurrencyClientEnvBaseUrl",
+          {
+          "input": input
+          },
+          this.ctxManager.cloneContext(),
+          __baml_options__?.tb?.__tb(),
+          __baml_options__?.clientRegistry,
+          true,
+          env
+          )
+          } catch (error) {
+          throw toBamlError(error);
+          }
+          }
+          
+      async TestOpenAIConcurrencyClientHardocodedBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+      ): Promise<HTTPRequest> {
+        try {
+        const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+        const env: Record<string, string> = Object.fromEntries(
+          Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+          );
+          return await this.runtime.buildRequest(
+          "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+          {
+          "input": input
+          },
+          this.ctxManager.cloneContext(),
+          __baml_options__?.tb?.__tb(),
+          __baml_options__?.clientRegistry,
+          true,
+          env
+          )
+          } catch (error) {
+          throw toBamlError(error);
+          }
+          }
+          
       async TestOpenAIDummyClient(
       input: string,
       __baml_options__?: BamlCallOptions<never>
diff --git a/integ-tests/typescript/baml_client/inlinedbaml.ts b/integ-tests/typescript/baml_client/inlinedbaml.ts
index 467be2794b..f3a78048ff 100644
--- a/integ-tests/typescript/baml_client/inlinedbaml.ts
+++ b/integ-tests/typescript/baml_client/inlinedbaml.ts
@@ -20,7 +20,7 @@ $ pnpm add @boundaryml/baml
 
 const fileMap = {
   
-  "clients.baml": "retry_policy Bar {\n  max_retries 3\n  strategy {\n    type exponential_backoff\n  }\n}\n\nretry_policy Foo {\n  max_retries 3\n  strategy {\n    type constant_delay\n    delay_ms 100\n  }\n}\n\nclient<llm> GPT4 {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> GPT4o {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// For integ tests\nclient<llm> GPT4oBaseUrlNotSet {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n    base_url env.OPEN_API_BASE_DO_NOT_SET_THIS\n  }\n}\n\n\nclient<llm> GPT4Turbo {\n  retry_policy Bar\n  provider openai\n  options {\n    model gpt-4-turbo\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nretry_policy GPT4oRetry {\n  max_retries 2\n  strategy {\n    type exponential_backoff\n  }\n}\n\nclient<llm> GPT35 {\n  provider openai\n  retry_policy GPT4oRetry\n  options {\n    model \"gpt-4o-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> GPT35LegacyProvider {\n  provider openai\n  options {\n    model \"gpt-3.5-turbo\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> Ollama {\n  provider ollama\n  options {\n    model llama3.1\n  }\n}\n\nclient<llm> GPT35Azure {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    // base_url \"https://west-us-azure-baml.openai.azure.com/openai/deployments/gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\n// Azure O1 client without max_tokens (should not add default)\nclient<llm> AzureO1 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O1 client with explicit max_tokens (should keep user value)\nclient<llm> AzureO1WithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> AzureO1WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// Azure GPT-35 client with explicit max_tokens (should keep user value)\nclient<llm> GPT35AzureWithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure client with invalid resource name (for testing failures)\nclient<llm> GPT35AzureFailed {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml-incorrect-suffix\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\nclient<llm> Gemini {\n  provider google-ai\n  options {\n    model gemini-2.5-flash\n    api_key env.GOOGLE_API_KEY\n    safetySettings {\n      category HARM_CATEGORY_HATE_SPEECH\n      threshold BLOCK_LOW_AND_ABOVE\n    }\n  }\n}\n\nclient<llm> Gemini25ProThinking {\n  provider google-ai\n  retry_policy Constant\n  options {\n    model \"gemini-2.5-pro\"\n    api_key env.GOOGLE_API_KEY\n    generationConfig {\n      thinkingConfig {\n          thinkingBudget 1024\n          includeThoughts true\n      }\n    }\n  }\n}\n\nclient<llm> GeminiOpenAiGeneric {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://generativelanguage.googleapis.com/v1beta/\"\n    model \"gemini-2.5-flash\"\n    api_key env.GOOGLE_API_KEY\n  }\n}\n\nclient<llm> Vertex {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> VertexWithQueryParams {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    project_id gloo-ai\n    query_params {\n      key env.VERTEX_API_KEY\n    }\n  }\n}\n\nclient<llm> VertexClaude {\n  provider vertex-ai\n  options {\n    model \"claude-3-5-sonnet@20240620\"\n    location us-east5\n    anthropic_version \"vertex-2023-10-16\"\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> AwsBedrock {\n  provider aws-bedrock\n  options {\n    inference_configuration {\n      max_tokens 2048\n    }\n    // max_tokens 100000\n    // max_completion_tokens 100000\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    //model \"arn:aws:bedrock:us-east-1:404337120808:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-east-1.amazonaws.com/\"\n    // region \"us-east-1\"\n    // access_key_id env.AWS_ACCESS_KEY_ID\n    // secret_access_key env.AWS_SECRET_ACCESS_KEY\n    // session_token env.AWS_SESSION_TOKEN\n    // session_token null\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidRegion {\n  provider aws-bedrock\n  options {\n    region \"us-invalid-7\"\n    inference_configuration {\n      max_tokens 100\n    }\n    // model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidEndpoint {\n  provider aws-bedrock\n  options {\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-doenotexist-3.amazonaws.com\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidAccessKey {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidProfile {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    profile \"boundaryml-dev-invalid\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidSessionToken {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\n\nclient<llm> Invalid{\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> Sonnet {\n  provider anthropic\n  options {\n    model claude-3-5-sonnet-20241022\n    api_key env.ANTHROPIC_API_KEY\n  }\n}\n\n\nclient<llm> SonnetThinking {\n  provider anthropic\n  options {\n    model \"claude-3-7-sonnet-20250219\"\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 2048\n    thinking {\n      type \"enabled\"\n      budget_tokens 1024\n    }\n  }\n}\n\nclient<llm> Claude {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> ClaudeWithCaching {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 500\n    allowed_role_metadata [\"cache_control\"]\n    headers {\n      \"anthropic-beta\" \"prompt-caching-2024-07-31\"\n    }\n  }\n}\n\nclient<llm> Resilient_SimpleSyntax {\n  retry_policy Foo\n  provider baml-fallback\n  options {\n    strategy [\n      GPT4Turbo\n      GPT35\n      Lottery_SimpleSyntax\n    ]\n  }\n}\n\nclient<llm> Lottery_SimpleSyntax {\n  provider baml-round-robin\n  options {\n    start 0\n    strategy [\n      Claude\n      GPT35\n    ]\n  }\n}\n\nclient<llm> TogetherAi {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://api.together.ai/v1\"\n    api_key env.TOGETHER_API_KEY\n    model \"meta-llama/Llama-3-70b-chat-hf\"\n  }\n}\n\n// OpenAI O1 client without max_tokens (should not add default)\nclient<llm> OpenAIO1 {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// OpenAI O1 client with explicit max_tokens (should fail)\nclient<llm> OpenAIO1WithMaxTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// OpenAI O1 client with explicit max_completion_tokens\nclient<llm> OpenAIO1WithMaxCompletionTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// OpenAI GPT-4 client with explicit max_tokens\nclient<llm> GPT4WithMaxTokens {\n  provider openai\n  options {\n    model \"gpt-4\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure O3 client without max_tokens (should not add default)\nclient<llm> AzureO3 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O3 client with explicit max_completion_tokens\nclient<llm> AzureO3WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n",
+  "clients.baml": "retry_policy Bar {\n  max_retries 3\n  strategy {\n    type exponential_backoff\n  }\n}\n\nretry_policy Foo {\n  max_retries 3\n  strategy {\n    type constant_delay\n    delay_ms 100\n  }\n}\n\nclient<llm> GPT4 {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> GPT4o {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// For integ tests\nclient<llm> GPT4oBaseUrlNotSet {\n  provider openai\n  options {\n    model gpt-4o\n    api_key env.OPENAI_API_KEY\n    base_url env.OPEN_API_BASE_DO_NOT_SET_THIS\n  }\n}\n\n\nclient<llm> GPT4Turbo {\n  retry_policy Bar\n  provider openai\n  options {\n    model gpt-4-turbo\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nretry_policy GPT4oRetry {\n  max_retries 2\n  strategy {\n    type exponential_backoff\n  }\n}\n\nclient<llm> GPT35 {\n  provider openai\n  retry_policy GPT4oRetry\n  options {\n    model \"gpt-4o-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> GPT35LegacyProvider {\n  provider openai\n  options {\n    model \"gpt-3.5-turbo\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> OpenAIConcurrencyTestClientEnvBaseUrl {\n  provider openai-generic\n  options {\n    base_url env.OPENAI_CONCURRENCY_TEST_BASE_URL\n    model \"concurrency-test\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> OpenAIConcurrencyTestClientHardocodedBaseUrl {\n  provider openai-generic\n  options {\n    base_url \"http://127.0.0.1:9876/v1/\"\n    model \"concurrency-test\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> Ollama {\n  provider ollama\n  options {\n    model llama3.1\n  }\n}\n\nclient<llm> GPT35Azure {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    // base_url \"https://west-us-azure-baml.openai.azure.com/openai/deployments/gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\n// Azure O1 client without max_tokens (should not add default)\nclient<llm> AzureO1 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O1 client with explicit max_tokens (should keep user value)\nclient<llm> AzureO1WithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> AzureO1WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o1-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// Azure GPT-35 client with explicit max_tokens (should keep user value)\nclient<llm> GPT35AzureWithMaxTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_version \"2024-02-01\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure client with invalid resource name (for testing failures)\nclient<llm> GPT35AzureFailed {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml-incorrect-suffix\"\n    deployment_id \"gpt-35-turbo-default\"\n    api_key env.AZURE_OPENAI_API_KEY\n  }\n}\n\nclient<llm> Gemini {\n  provider google-ai\n  options {\n    model gemini-2.5-flash\n    api_key env.GOOGLE_API_KEY\n    safetySettings {\n      category HARM_CATEGORY_HATE_SPEECH\n      threshold BLOCK_LOW_AND_ABOVE\n    }\n  }\n}\n\nclient<llm> Gemini25ProThinking {\n  provider google-ai\n  retry_policy Constant\n  options {\n    model \"gemini-2.5-pro\"\n    api_key env.GOOGLE_API_KEY\n    generationConfig {\n      thinkingConfig {\n          thinkingBudget 1024\n          includeThoughts true\n      }\n    }\n  }\n}\n\nclient<llm> GeminiOpenAiGeneric {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://generativelanguage.googleapis.com/v1beta/\"\n    model \"gemini-2.5-flash\"\n    api_key env.GOOGLE_API_KEY\n  }\n}\n\nclient<llm> Vertex {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> VertexWithQueryParams {\n  provider vertex-ai\n  options {\n    model gemini-2.5-flash\n    location us-central1\n    project_id gloo-ai\n    query_params {\n      key env.VERTEX_API_KEY\n    }\n  }\n}\n\nclient<llm> VertexClaude {\n  provider vertex-ai\n  options {\n    model \"claude-3-5-sonnet@20240620\"\n    location us-east5\n    anthropic_version \"vertex-2023-10-16\"\n    credentials env.INTEG_TESTS_GOOGLE_APPLICATION_CREDENTIALS_CONTENT\n  }\n}\n\n\nclient<llm> AwsBedrock {\n  provider aws-bedrock\n  options {\n    inference_configuration {\n      max_tokens 2048\n    }\n    // max_tokens 100000\n    // max_completion_tokens 100000\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    //model \"arn:aws:bedrock:us-east-1:404337120808:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-east-1.amazonaws.com/\"\n    // region \"us-east-1\"\n    // access_key_id env.AWS_ACCESS_KEY_ID\n    // secret_access_key env.AWS_SECRET_ACCESS_KEY\n    // session_token env.AWS_SESSION_TOKEN\n    // session_token null\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidRegion {\n  provider aws-bedrock\n  options {\n    region \"us-invalid-7\"\n    inference_configuration {\n      max_tokens 100\n    }\n    // model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    // model_id \"anthropic.claude-3-haiku-20240307-v1:0\"\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    // model_id \"mistral.mistral-7b-instruct-v0:2\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidEndpoint {\n  provider aws-bedrock\n  options {\n    model \"anthropic.claude-3-5-sonnet-20240620-v1:0\"\n    endpoint_url \"https://bedrock-runtime.us-doenotexist-3.amazonaws.com\"\n  }\n}\n\nclient<llm> AwsBedrockInvalidAccessKey {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidProfile {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    profile \"boundaryml-dev-invalid\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> AwsBedrockInvalidSessionToken {\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\n\nclient<llm> Invalid{\n  provider aws-bedrock\n  options {\n    model_id \"meta.llama3-8b-instruct-v1:0\"\n    region \"us-east-1\"\n    access_key_id \"AKIAINVALID12345678\"\n    secret_access_key \"abcdef1234567890abcdef1234567890abcdef12\"\n    session_token \"invalid-session-token\"\n    inference_configuration {\n      max_tokens 100\n    }\n  }\n}\n\nclient<llm> Sonnet {\n  provider anthropic\n  options {\n    model claude-3-5-sonnet-20241022\n    api_key env.ANTHROPIC_API_KEY\n  }\n}\n\n\nclient<llm> SonnetThinking {\n  provider anthropic\n  options {\n    model \"claude-3-7-sonnet-20250219\"\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 2048\n    thinking {\n      type \"enabled\"\n      budget_tokens 1024\n    }\n  }\n}\n\nclient<llm> Claude {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 1000\n  }\n}\n\nclient<llm> ClaudeWithCaching {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.ANTHROPIC_API_KEY\n    max_tokens 500\n    allowed_role_metadata [\"cache_control\"]\n    headers {\n      \"anthropic-beta\" \"prompt-caching-2024-07-31\"\n    }\n  }\n}\n\nclient<llm> Resilient_SimpleSyntax {\n  retry_policy Foo\n  provider baml-fallback\n  options {\n    strategy [\n      GPT4Turbo\n      GPT35\n      Lottery_SimpleSyntax\n    ]\n  }\n}\n\nclient<llm> Lottery_SimpleSyntax {\n  provider baml-round-robin\n  options {\n    start 0\n    strategy [\n      Claude\n      GPT35\n    ]\n  }\n}\n\nclient<llm> TogetherAi {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://api.together.ai/v1\"\n    api_key env.TOGETHER_API_KEY\n    model \"meta-llama/Llama-3-70b-chat-hf\"\n  }\n}\n\n// OpenAI O1 client without max_tokens (should not add default)\nclient<llm> OpenAIO1 {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n// OpenAI O1 client with explicit max_tokens (should fail)\nclient<llm> OpenAIO1WithMaxTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// OpenAI O1 client with explicit max_completion_tokens\nclient<llm> OpenAIO1WithMaxCompletionTokens {\n  provider openai\n  options {\n    model \"o1-mini\"\n    api_key env.OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n\n// OpenAI GPT-4 client with explicit max_tokens\nclient<llm> GPT4WithMaxTokens {\n  provider openai\n  options {\n    model \"gpt-4\"\n    api_key env.OPENAI_API_KEY\n    max_tokens 1000\n  }\n}\n\n// Azure O3 client without max_tokens (should not add default)\nclient<llm> AzureO3 {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_tokens null\n  }\n}\n\n// Azure O3 client with explicit max_completion_tokens\nclient<llm> AzureO3WithMaxCompletionTokens {\n  provider azure-openai\n  options {\n    resource_name \"west-us-azure-baml\"\n    deployment_id \"o3-mini\"\n    api_version \"2024-08-01-preview\"\n    api_key env.AZURE_OPENAI_API_KEY\n    max_completion_tokens 1000\n  }\n}\n",
   "custom-task.baml": "class BookOrder {\n  orderId string @description(#\"\n    The ID of the book order\n  \"#)\n  title string @description(#\"\n    The title of the ordered book\n  \"#)\n  quantity int @description(#\"\n    The quantity of books ordered\n  \"#)\n  price float @description(#\"\n    The price of the book\n  \"#)\n}\n\nclass FlightConfirmation {\n  confirmationNumber string @description(#\"\n    The flight confirmation number\n  \"#)\n  flightNumber string @description(#\"\n    The flight number\n  \"#)\n  departureTime string @description(#\"\n    The scheduled departure time of the flight\n  \"#)\n  arrivalTime string @description(#\"\n    The scheduled arrival time of the flight\n  \"#)\n  seatNumber string @description(#\"\n    The seat number assigned on the flight\n  \"#)\n}\n\nclass GroceryReceipt {\n  receiptId string @description(#\"\n    The ID of the grocery receipt\n  \"#)\n  storeName string @description(#\"\n    The name of the grocery store\n  \"#)\n  items (string | int | float)[] @description(#\"\n    A list of items purchased. Each item consists of a name, quantity, and price.\n  \"#)\n  totalAmount float @description(#\"\n    The total amount spent on groceries\n  \"#)\n}\n \nclass CustomTaskResult {\n  bookOrder BookOrder | null\n  flightConfirmation FlightConfirmation | null\n  groceryReceipt GroceryReceipt | null\n}\n\nfunction CustomTask(input: string) -> BookOrder | FlightConfirmation | GroceryReceipt {\n  client \"openai/gpt-4o-mini\"\n  prompt #\"\n    Given the input string, extract either an order for a book, a flight confirmation, or a grocery receipt.\n\n    {{ ctx.output_format }}\n\n    Input:\n    \n    {{ input}}\n  \"#\n}\n\ntest CustomTask {\n  functions [CustomTask]\n  args {\n    input #\"\nDear [Your Name],\n\nThank you for booking with [Airline Name]! We are pleased to confirm your upcoming flight.\n\nFlight Confirmation Details:\n\nBooking Reference: ABC123\nPassenger Name: [Your Name]\nFlight Number: XY789\nDeparture Date: September 15, 2024\nDeparture Time: 10:30 AM\nArrival Time: 1:45 PM\nDeparture Airport: John F. Kennedy International Airport (JFK), New York, NY\nArrival Airport: Los Angeles International Airport (LAX), Los Angeles, CA\nSeat Number: 12A\nClass: Economy\nBaggage Allowance:\n\nChecked Baggage: 1 piece, up to 23 kg\nCarry-On Baggage: 1 piece, up to 7 kg\nImportant Information:\n\nPlease arrive at the airport at least 2 hours before your scheduled departure.\nCheck-in online via our website or mobile app to save time at the airport.\nEnsure that your identification documents are up to date and match the name on your booking.\nContact Us:\n\nIf you have any questions or need to make changes to your booking, please contact our customer service team at 1-800-123-4567 or email us at support@[airline].com.\n\nWe wish you a pleasant journey and thank you for choosing [Airline Name].\n\nBest regards,\n\n[Airline Name] Customer Service\n    \"#\n  }\n}",
   "fiddle-examples/audio/audio.baml": "function DescribeAudio(audio: audio) -> string {\n  client GPT4o\n  prompt #\"\n    Describe the audio below in 20 words:\n    {{ _.role(\"user\") }}\n    {{ audio }}\n  \"#\n\n}\n\n\n\n\n// chat role user present\nfunction DescribeAudio2(audio: audio) -> string {\n  client GPT4Turbo\n  prompt #\"\n    {{ _.role(\"user\") }}\n    You should return 1 answer that answer the following command.\n\n    Describe this in 5 words:\n    {{ audio }}\n  \"#\n}\n\ntest TestAudio {\n    functions [DescribeAudio]\n  args {\n    audio { url \"https://www.pacdv.com/sounds/voices/friday-rocks.wav\"}\n  }\n}\n\ntest TestAudio2 {\n  functions [DescribeAudio2]\n  args {\n    audio { file \"friday-rocks.wav\" }\n      }\n}\n",
   "fiddle-examples/chain-of-thought.baml": "class Email {\n    subject string\n    body string\n    from_address string\n}\n\nenum OrderStatus {\n    ORDERED\n    SHIPPED\n    DELIVERED\n    CANCELLED\n}\n\nclass OrderInfo {\n    order_status OrderStatus\n    tracking_number string?\n    estimated_arrival_date string?\n}\n\nfunction GetOrderInfo(email: Email) -> OrderInfo {\n  client GPT4\n  prompt #\"\n    Given the email below:\n\n    ```\n    from: {{email.from_address}}\n    Email Subject: {{email.subject}}\n    Email Body: {{email.body}}\n    ```\n\n    Extract this info from the email in JSON format:\n    {{ ctx.output_format }}\n\n    Before you output the JSON, please explain your\n    reasoning step-by-step. Here is an example on how to do this:\n    'If we think step by step we can see that ...\n     therefore the output JSON is:\n    {\n      ... the json schema ...\n    }'\n  \"#\n}",
@@ -120,7 +120,7 @@ const fileMap = {
   "test-files/providers/openai-responses-validation.baml": "// OpenAI Responses Provider Validation Tests\n// These tests validate that the openai-responses provider is properly configured\n\n// Test 1: Basic provider recognition\n// This should parse successfully once openai-responses is available\nclient<llm> ValidateOpenAIResponses {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n  }\n}\n\n// Test 2: Valid client_response_type values for openai-responses\nclient<llm> ValidateResponseTypeOpenAI {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    client_response_type \"openai\"\n  }\n}\n\nclient<llm> ValidateResponseTypeOpenAIResponses {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    client_response_type \"openai-responses\"\n  }\n}\n\nclient<llm> ValidateResponseTypeAnthropic {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    client_response_type \"anthropic\"\n  }\n}\n\n// Test 3: Provider should be in allowed list\n// This will validate that \"openai-responses\" is included in ClientProvider::allowed_providers()\n\n// Test 4: Default base URL should be correct\nclient<llm> ValidateDefaultBaseURL {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    // Should default to https://api.openai.com/v1\n  }\n}\n\n// Test 5: Custom base URL should work\nclient<llm> ValidateCustomBaseURL {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    base_url \"https://custom.openai.com/v1\"\n  }\n}\n\n// Simple test functions to validate the clients work\nfunction ValidateBasicResponses(input: string) -> string {\n  client ValidateOpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Say \"success\" if you can read this: {{ input }}\n  \"#\n}\n\nfunction ValidateResponseTypes(input: string) -> string {\n  client ValidateResponseTypeOpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Respond with \"response-type-works\" for: {{ input }}\n  \"#\n}\n\n// Validation test suite\ntest ValidateOpenAIResponsesProvider {\n  functions [\n    ValidateBasicResponses,\n    ValidateResponseTypes\n  ]\n  args {\n    input \"test\"\n  }\n}",
   "test-files/providers/openai-responses.baml": "// OpenAI Responses API Provider Tests\n// Tests the new openai-responses provider that uses the OpenAI Responses API\n\n// Basic OpenAI Responses client\nclient<llm> OpenAIResponses {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n  }\n}\n\n// OpenAI Responses client with explicit response type\nclient<llm> OpenAIResponsesExplicit {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    client_response_type \"openai-responses\"\n  }\n}\n\n// OpenAI Responses client with custom base URL (for testing)\nclient<llm> OpenAIResponsesCustomURL {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    base_url \"https://api.openai.com/v1\"\n  }\n}\n\n// Test basic functionality with responses API\nfunction TestOpenAIResponses(input: string) -> string {\n  client OpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a short haiku about {{ input }}. Make it simple and beautiful.\n  \"#\n}\n\n// Test with explicit response type configuration\nfunction TestOpenAIResponsesExplicit(input: string) -> string {\n  client OpenAIResponsesExplicit\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Create a brief poem about {{ input }}. Keep it under 50 words.\n  \"#\n}\n\n// Test with custom base URL\nfunction TestOpenAIResponsesCustomURL(input: string) -> string {\n  client OpenAIResponsesCustomURL\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Tell me an interesting fact about {{ input }}.\n  \"#\n}\n\n// Test with multi-turn conversation\nfunction TestOpenAIResponsesConversation(topic: string) -> string {\n  client OpenAIResponses\n  prompt #\"\n    {{ _.role(\"system\") }}\n    You are a helpful assistant that provides concise answers.\n    \n    {{ _.role(\"user\") }}\n    What is {{ topic }}?\n    \n    {{ _.role(\"assistant\") }}\n    {{ topic }} is a fascinating subject. Let me explain briefly.\n    \n    {{ _.role(\"user\") }}\n    Can you give me a simple example?\n  \"#\n}\n\n// Test with different model parameter\nclient<llm> OpenAIResponsesGPT4 {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4\"\n  }\n}\n\nfunction TestOpenAIResponsesDifferentModel(input: string) -> string {\n  client OpenAIResponsesGPT4\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Explain {{ input }} in one sentence.\n  \"#\n}\n\n// Test error handling with invalid configuration\nclient<llm> OpenAIResponsesInvalidResponseType {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4.1\"\n    // This should work since openai response type is valid for responses provider\n    client_response_type \"openai\"\n  }\n}\n\nfunction TestOpenAIResponsesWithOpenAIResponseType(input: string) -> string {\n  client OpenAIResponsesInvalidResponseType\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write about {{ input }}.\n  \"#\n}\n\n// Comprehensive test suite for OpenAI Responses\ntest TestOpenAIResponsesProviders {\n  functions [\n    TestOpenAIResponses,\n    TestOpenAIResponsesExplicit,\n    TestOpenAIResponsesCustomURL,\n    TestOpenAIResponsesConversation,\n    TestOpenAIResponsesDifferentModel,\n    TestOpenAIResponsesWithOpenAIResponseType\n  ]\n  args {\n    input \"mountains\"\n    topic \"machine learning\"\n  }\n}\n\n// Test shorthand syntax (this should work but use standard openai, not responses)\nfunction TestOpenAIResponsesShorthand(input: string) -> string {\n  client \"openai/gpt-5-mini\"\n  prompt #\"\n    {{ _.role(\"user\") }}\n    What do you think about {{ input }}?\n  \"#\n}\n\n// Test to ensure the provider correctly routes to /v1/responses endpoint\n// This is validated by the implementation, not by the test execution\nfunction TestOpenAIResponsesEndpoint(input: string) -> string {\n  client OpenAIResponses\n  prompt #\"\n    {{ _.role(\"user\") }}\n    This request should go to /v1/responses endpoint, not /v1/chat/completions.\n    Respond with a short message about {{ input }}.\n  \"#\n}\n\n// Test that demonstrates automatic response type selection\nclient<llm> OpenAIResponsesAutoType {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    // No explicit client_response_type - should automatically use openai-responses\n  }\n}\n\nfunction TestOpenAIResponsesAutoType(input: string) -> string {\n  client OpenAIResponsesAutoType\n  prompt #\"\n    {{ _.role(\"user\") }}\n    This client should automatically use openai-responses response type.\n    Write a short description of {{ input }}.\n  \"#\n}\n\n// Additional test for validation\ntest TestOpenAIResponsesValidation {\n  functions [\n    TestOpenAIResponsesShorthand,\n    TestOpenAIResponsesEndpoint,\n    TestOpenAIResponsesAutoType,\n    TestOpenAIResponsesExplicit,\n    TestOpenAIProviderWithResponsesType\n  ]\n  args {\n    input \"artificial intelligence\"\n  }\n}\n\n// Test image input/output with OpenAI Responses API\nclient<llm> OpenAIResponsesImage {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5\"\n  }\n}\n\nfunction TestOpenAIResponsesImageInput(image: image | string | pdf | audio) -> string {\n  client OpenAIResponsesImage\n  prompt #\"\n    {{ _.role(\"user\") }}\n    what is in this content?\n    {{ image }}\n  \"#\n}\n\n// Test for image analysis\ntest TestOpenAIResponsesImageAnalysis {\n  functions [\n    TestOpenAIResponsesImageInput\n  ]\n  args {\n    image \"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg\"\n  }\n}\n\n// Test web search with OpenAI Responses API\nclient<llm> OpenAIResponsesWebSearch {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    tools [\n      {\n        type \"web_search_preview\"\n      }\n    ]\n  }\n}\n\nfunction TestOpenAIResponsesWebSearch(query: string) -> string {\n  client OpenAIResponsesWebSearch\n  prompt #\"\n    {{ _.role(\"user\") }}\n    {{ query }}\n  \"#\n}\n\n// Test for web search functionality\ntest TestOpenAIResponsesWebSearchTest {\n  functions [\n    TestOpenAIResponsesWebSearch\n  ]\n  args {\n    query \"What was a positive news story from today?\"\n  }\n}\n\n\n// Test function calling with OpenAI Responses API\nclient<llm> OpenAIResponsesFunctionCall {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    tools [\n      {\n        type \"function\"\n        name \"get_current_weather\"\n        description \"Get the current weather in a given location\"\n        parameters {\n          type \"object\"\n          properties {\n            location {\n              type \"string\"\n              description \"The city and state, e.g. San Francisco, CA\"\n            }\n            unit {\n              type \"string\"\n              enum [\"celsius\", \"fahrenheit\"]\n            }\n          }\n          required [\"location\", \"unit\"]\n        }\n      }\n    ]\n    tool_choice \"auto\"\n  }\n}\n\nfunction TestOpenAIResponsesFunctionCall(query: string) -> string {\n  client OpenAIResponsesFunctionCall\n  prompt #\"\n    {{ _.role(\"user\") }}\n    {{ query }}\n  \"#\n}\n\n// Test for function calling\ntest TestOpenAIResponsesFunctionCallTest {\n  functions [\n    TestOpenAIResponsesFunctionCall\n  ]\n  args {\n    query \"What is the weather like in Boston today?\"\n  }\n}\n\n// Test using standard openai provider with openai-responses client_response_type\nclient<llm> OpenAIWithResponsesType {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5-mini\"\n    client_response_type \"openai-responses\"\n  }\n}\n\nfunction TestOpenAIProviderWithResponsesType(input: string) -> string {\n  client OpenAIWithResponsesType\n  prompt #\"\n    {{ _.role(\"user\") }}\n    This uses the openai provider but with openai-responses client_response_type.\n    Write a short summary about {{ input }}.\n  \"#\n}\n\n// Test reasoning with OpenAI Responses API\nclient<llm> OpenAIResponsesReasoning {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5\"\n    reasoning{\n      effort \"high\"\n    }\n  }\n}\n\nfunction TestOpenAIResponsesReasoning(problem: string) -> string {\n  client OpenAIResponsesReasoning\n  prompt #\"\n    {{ _.role(\"user\") }}\n    {{ problem }}\n  \"#\n}\n\n// Test for reasoning capability\ntest TestOpenAIResponsesReasoningTest {\n  functions [\n    TestOpenAIResponsesReasoning\n  ]\n  args {\n    problem \"Solve this step by step: If a train travels at 60 mph for 2.5 hours, then at 80 mph for 1.5 hours, what is the total distance traveled?\"\n  }\n}\n\nclient<llm> Gpt5 {\n  provider openai-responses\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-5\"\n  }\n}\n\n\nfunction TestOpenAIResponsesAllRoles(problem: string) -> string {\n  client Gpt5\n  prompt #\"\n    {{ _.role(\"system\") }}\n    Hi\n    {{ _.role(\"developer\") }}\n    Hi\n    {{ _.role(\"assistant\") }}\n    Hi\n    {{ _.role(\"user\") }}\n    {{ problem }}\n  \"#\n}\n\n\nfunction TestOpenaiResponsesPdfs(pdf: pdf) -> string {\n  client Gpt5\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Summarize in one sentence the contents of this:\n    {{ pdf }}\n  \"#\n} \n\ntest TestOpenaiResponsesPdfsTest {\n  functions [\n    TestOpenaiResponsesPdfs\n  ]\n  args {\n    pdf { url \"https://www.berkshirehathaway.com/letters/2024ltr.pdf\" }\n  }\n}\n\ntest TestOpenaiResponsesPdfsTestFile {\n  functions [\n    TestOpenaiResponsesPdfs\n  ]\n  args {\n    pdf { file \"../../dummy.pdf\" }\n  }\n}\n\n\ntest TestOpenAIResponsesAllRolesTest {\n  functions [\n    TestOpenAIResponsesAllRoles\n  ]\n  args {\n    problem \"What is the weather like in Boston today?\"\n  }\n}",
   "test-files/providers/openai-with-anthropic-response.baml": "client<llm> OpenAIWithAnthropicResponse {\n  provider openai-responses\n  options {\n    model \"gpt-4o\"\n    client_response_type \"openai-responses\"\n    base_url \"http://localhost:8000\"\n  }\n}\n\nfunction OpenAIWithAnthropicResponseHello(s: string) -> string {\n  client OpenAIWithAnthropicResponse\n  prompt #\"\n    Return the string \"Hello, world!\" with {{ s }} included in the response.\n    {{ _.role(\"user\") }}\n  \"#\n}\n\ntest TestOpenAIWithAnthropicResponse {\n  functions [\n    OpenAIWithAnthropicResponseHello\n  ]\n  args {\n    s \"Cherry blossoms\"\n  }\n}",
-  "test-files/providers/openai.baml": "function PromptTestOpenAI(input: string) -> string {\n  client GPT35\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAILegacyProvider(input: string) -> string {\n  client GPT35LegacyProvider\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIShorthand(input: string) -> string {\n  client \"openai/gpt-4o\"\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n\n\n\n// Test standard GPT-4 (should add default max_tokens)\nfunction TestOpenAI(input: string) -> string {\n  client GPT4\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model without max_tokens (should not add default)\nfunction TestOpenAIO1NoMaxTokens(input: string) -> string {\n  client OpenAIO1\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_tokens (should fail)\nfunction TestOpenAIO1WithMaxTokens(input: string) -> string {\n  client OpenAIO1WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_completion_tokens\nfunction TestOpenAIO1WithMaxCompletionTokens(input: string) -> string {\n  client OpenAIO1WithMaxCompletionTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test GPT-4 with explicit max_tokens (should keep user value)\nfunction TestOpenAIWithMaxTokens(input: string) -> string {\n  client GPT4WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test OpenAI with null max_tokens (should not add default)\nfunction TestOpenAIWithNullMaxTokens(input: string) -> string {\n  client OpenAIWithNullMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nclient GPT4oMini {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o-mini\"\n  }\n}\n\nfunction TestOpenAIGPT4oMini(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n    {{ input }}\n  \"#\n}\nfunction TestOpenAIGPT4oMini2(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction TestOpenAIGPT4oMini3(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction OpenAIGPT4oMissingBaseUrlEnvVar(input: string) -> string {\n  client GPT4oBaseUrlNotSet\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\n// Add test cases to verify the behavior\ntest TestOpenAIClients {\n  functions [\n    TestOpenAI,\n    TestOpenAIO1NoMaxTokens,\n    TestOpenAIO1WithMaxTokens,\n    TestOpenAIWithMaxTokens,\n    TestOpenAIO1WithMaxCompletionTokens,\n    TestOpenAIWithNullMaxTokens\n  ]\n  args {\n    input #\"\n    Cherry blossoms\n    \"#\n  }\n}\n\nclient<llm> OpenAIWithNullMaxTokens {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o\"\n    max_tokens null\n  }\n}",
+  "test-files/providers/openai.baml": "function PromptTestOpenAI(input: string) -> string {\n  client GPT35\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAILegacyProvider(input: string) -> string {\n  client GPT35LegacyProvider\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIShorthand(input: string) -> string {\n  client \"openai/gpt-4o\"\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n\n\n\n// Test standard GPT-4 (should add default max_tokens)\nfunction TestOpenAI(input: string) -> string {\n  client GPT4\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIConcurrencyClientEnvBaseUrl(input: string) -> string {\n  client OpenAIConcurrencyTestClientEnvBaseUrl\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\nfunction TestOpenAIConcurrencyClientHardocodedBaseUrl(input: string) -> string {\n  client OpenAIConcurrencyTestClientHardocodedBaseUrl\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku. Make it 50 paragraphs\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model without max_tokens (should not add default)\nfunction TestOpenAIO1NoMaxTokens(input: string) -> string {\n  client OpenAIO1\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_tokens (should fail)\nfunction TestOpenAIO1WithMaxTokens(input: string) -> string {\n  client OpenAIO1WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_completion_tokens\nfunction TestOpenAIO1WithMaxCompletionTokens(input: string) -> string {\n  client OpenAIO1WithMaxCompletionTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test GPT-4 with explicit max_tokens (should keep user value)\nfunction TestOpenAIWithMaxTokens(input: string) -> string {\n  client GPT4WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test OpenAI with null max_tokens (should not add default)\nfunction TestOpenAIWithNullMaxTokens(input: string) -> string {\n  client OpenAIWithNullMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nclient GPT4oMini {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o-mini\"\n  }\n}\n\nfunction TestOpenAIGPT4oMini(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n    {{ input }}\n  \"#\n}\nfunction TestOpenAIGPT4oMini2(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction TestOpenAIGPT4oMini3(input: string) -> string {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\nfunction OpenAIGPT4oMissingBaseUrlEnvVar(input: string) -> string {\n  client GPT4oBaseUrlNotSet\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n  \"#\n}\n\n// Add test cases to verify the behavior\ntest TestOpenAIClients {\n  functions [\n    TestOpenAI,\n    TestOpenAIO1NoMaxTokens,\n    TestOpenAIO1WithMaxTokens,\n    TestOpenAIWithMaxTokens,\n    TestOpenAIO1WithMaxCompletionTokens,\n    TestOpenAIWithNullMaxTokens\n  ]\n  args {\n    input #\"\n    Cherry blossoms\n    \"#\n  }\n}\n\nclient<llm> OpenAIWithNullMaxTokens {\n  provider openai\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o\"\n    max_tokens null\n  }\n}",
   "test-files/providers/openrouter.baml": "function TestOpenRouterMistralSmall3_1_24b(input: string) -> string {\n  client OpenRouterMistralSmall3_1_24b\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n \n \ntest TestName {\n  functions [TestOpenRouterMistralSmall3_1_24b]\n  args {\n    input #\"\n      hello world\n    \"#\n  }\n}\n  \n \n\nclient<llm> OpenRouterMistralSmall3_1_24b {\n  provider \"openai-generic\"\n  options {\n    base_url \"https://openrouter.ai/api/v1\"\n    api_key env.OPENROUTER_API_KEY\n    model \"mistralai/mistral-small-3.1-24b-instruct\"\n    temperature 0.1\n    headers {\n      \"HTTP-Referer\" \"https://me.com\" // Optional\n      \"X-Title\" \"me\" // Optional\n    }\n  }\n}",
   "test-files/providers/strategy.baml": "function TestFallbackStrategy(input: string) -> string {\n  client Resilient_SimpleSyntax\n  prompt #\"\n    {{ _.role('system') }}\n    You are a helpful assistant.\n\n    {{ _.role('user') }}\n    Write a nice short story about {{ input }}\n  \"#\n}\n\nfunction TestRoundRobinStrategy(input: string) -> string {\n  client Lottery_SimpleSyntax\n  prompt #\"\n    {{ _.role('system') }}\n    You are a helpful assistant.\n\n    {{ _.role('user') }}\n    Write a nice short story about {{ input }}\n  \"#\n}\n",
   "test-files/providers/tests.baml": "test TestOpenAIShorthand {\n  functions [TestOpenAIShorthand]\n  args {\n    input \"Donkey kong and peanut butter\"\n  }\n}\n\ntest TestAWS {\n  functions [\n    TestAws\n  ]\n  args {\n    input \"Donkey kong and peanut butter\"\n  }\n}\n\ntest TestProvider {\n  functions [\n    TestAnthropic, TestVertex, PromptTestOpenAI, TestAzure, TestOllama, TestGemini, TestGeminiThinking, TestAws,\n    TestAwsInvalidRegion,\n    TestOpenAIShorthand,\n    TestAnthropicShorthand,\n    TestAwsInvalidAccessKey,\n    TestAwsInvalidProfile,\n    TestAwsInvalidSessionToken\n  ]\n  args {\n    input \"Donkey kong and peanut butter\"\n  }\n}\n\ntest TestName {\n  functions [TestCaching]\n  args {\n    input #\"\nIn a near-future society where dreams have become a tradable commodity and shared experience, a lonely and socially awkward teenager named Alex discovers they possess a rare and powerful ability to not only view but also manipulate the dreams of others. Initially thrilled by this newfound power, Alex begins subtly altering the dreams of classmates and family members, helping them overcome fears, boost confidence, or experience fantastical adventures. As Alex's skills grow, so does their influence. They start selling premium dream experiences on the black market, crafting intricate and addictive dreamscapes for wealthy clients. However, the line between dream and reality begins to blur for those exposed to Alex's creations. Some clients struggle to differentiate between their true memories and the artificial ones implanted by Alex's dream manipulation.\n\nComplications arise when a mysterious government agency takes notice of Alex's unique abilities. They offer Alex a chance to use their gift for \"the greater good,\" hinting at applications in therapy, criminal rehabilitation, and even national security. Simultaneously, an underground resistance movement reaches out, warning Alex about the dangers of dream manipulation and the potential for mass control and exploitation. Caught between these opposing forces, Alex must navigate a complex web of ethical dilemmas. They grapple with questions of free will, the nature of consciousness, and the responsibility that comes with having power over people's minds. As the consequences of their actions spiral outward, affecting the lives of loved ones and strangers alike, Alex is forced to confront the true nature of their ability and decide how—or if—it should be used.\n\nThe story explores themes of identity, the subconscious mind, the ethics of technology, and the power of imagination. It delves into the potential consequences of a world where our most private thoughts and experiences are no longer truly our own, and examines the fine line between helping others and manipulating them for personal gain or a perceived greater good. The narrative further expands on the societal implications of such abilities, questioning the moral boundaries of altering consciousness and the potential for abuse in a world where dreams can be commodified. It challenges the reader to consider the impact of technology on personal autonomy and the ethical responsibilities of those who wield such power.\n\nAs Alex's journey unfolds, they encounter various individuals whose lives have been touched by their dream manipulations, each presenting a unique perspective on the ethical quandaries at hand. From a classmate who gains newfound confidence to a wealthy client who becomes addicted to the dreamscapes, the ripple effects of Alex's actions are profound and far-reaching. The government agency's interest in Alex's abilities raises questions about the potential for state control and surveillance, while the resistance movement highlights the dangers of unchecked power and the importance of safeguarding individual freedoms.\n\nUltimately, Alex's story is one of self-discovery and moral reckoning, as they must decide whether to embrace their abilities for personal gain, align with the government's vision of a controlled utopia, or join the resistance in their fight for freedom and autonomy. The narrative invites readers to reflect on the nature of reality, the boundaries of human experience, and the ethical implications of a world where dreams are no longer private sanctuaries but shared and manipulated commodities. It also explores the psychological impact on Alex, who must deal with the burden of knowing the intimate fears and desires of others, and the isolation that comes from being unable to share their own dreams without altering them.\n\nThe story further examines the technological advancements that have made dream manipulation possible, questioning the role of innovation in society and the potential for both progress and peril. It considers the societal divide between those who can afford to buy enhanced dream experiences and those who cannot, highlighting issues of inequality and access. As Alex becomes more entangled in the web of their own making, they must confront the possibility that their actions could lead to unintended consequences, not just for themselves but for the fabric of society as a whole.\n\nIn the end, Alex's journey is a cautionary tale about the power of dreams and the responsibilities that come with wielding such influence. It serves as a reminder of the importance of ethical considerations in the face of technological advancement and the need to balance innovation with humanity. The story leaves readers pondering the true cost of a world where dreams are no longer sacred, and the potential for both wonder and danger in the uncharted territories of the mind. But it's also a story about the power of imagination and the potential for change, even in a world where our deepest thoughts are no longer our own. And it's a story about the power of choice, and the importance of fighting for the freedom to dream.\n\nIn conclusion, this story is a reflection on the power of dreams and the responsibilities that come with wielding such influence. It serves as a reminder of the importance of ethical considerations in the face of technological advancement and the need to balance innovation with humanity. The story leaves readers pondering the true cost of a world where dreams are no longer sacred, and the potential for both wonder and danger in the uncharted territories of the mind. But it's also a story about the power of imagination and the potential for change, even in a world where our deepest thoughts are no longer our own. And it's a story about the power of choice, and the importance of fighting for the freedom to dream.\n    \"#\n    not_cached #\"\n      hello world\n    \"#\n  }\n}",
diff --git a/integ-tests/typescript/baml_client/parser.ts b/integ-tests/typescript/baml_client/parser.ts
index 0fa173148f..5295671d77 100644
--- a/integ-tests/typescript/baml_client/parser.ts
+++ b/integ-tests/typescript/baml_client/parser.ts
@@ -4170,6 +4170,52 @@ export class LlmResponseParser {
     }
   }
   
+  TestOpenAIConcurrencyClientEnvBaseUrl(
+      llmResponse: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry, env?: Record<string, string | undefined> }
+  ): string {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.parseLlmResponse(
+        "TestOpenAIConcurrencyClientEnvBaseUrl",
+        llmResponse,
+        false,
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        env,
+      ) as string
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
+  TestOpenAIConcurrencyClientHardocodedBaseUrl(
+      llmResponse: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry, env?: Record<string, string | undefined> }
+  ): string {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.parseLlmResponse(
+        "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+        llmResponse,
+        false,
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        env,
+      ) as string
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestOpenAIDummyClient(
       llmResponse: string,
       __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry, env?: Record<string, string | undefined> }
@@ -10064,6 +10110,52 @@ export class LlmStreamParser {
     }
   }
   
+  TestOpenAIConcurrencyClientEnvBaseUrl(
+      llmResponse: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry, env?: Record<string, string | undefined> }
+  ): string {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.parseLlmResponse(
+        "TestOpenAIConcurrencyClientEnvBaseUrl",
+        llmResponse,
+        true,
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        env,
+      ) as string
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
+  TestOpenAIConcurrencyClientHardocodedBaseUrl(
+      llmResponse: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry, env?: Record<string, string | undefined> }
+  ): string {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.parseLlmResponse(
+        "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+        llmResponse,
+        true,
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        env,
+      ) as string
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestOpenAIDummyClient(
       llmResponse: string,
       __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry, env?: Record<string, string | undefined> }
diff --git a/integ-tests/typescript/baml_client/sync_client.ts b/integ-tests/typescript/baml_client/sync_client.ts
index c564c458a8..109e47c3be 100644
--- a/integ-tests/typescript/baml_client/sync_client.ts
+++ b/integ-tests/typescript/baml_client/sync_client.ts
@@ -7656,6 +7656,90 @@ export class BamlSyncClient {
     }
   }
   
+  TestOpenAIConcurrencyClientEnvBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+  ): string {
+    try {
+      const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+      const signal = options.signal;
+
+      if (signal?.aborted) {
+        throw new BamlAbortError('Operation was aborted', signal.reason);
+      }
+
+      // Check if onTick is provided and reject for sync operations
+      if (options.onTick) {
+        throw new Error("onTick is not supported for synchronous functions. Please use the async client instead.");
+      }
+
+      const collector = options.collector ? (Array.isArray(options.collector) ? options.collector : [options.collector]) : [];
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      const raw = this.runtime.callFunctionSync(
+        "TestOpenAIConcurrencyClientEnvBaseUrl",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        options.tb?.__tb(),
+        options.clientRegistry,
+        collector,
+        options.tags || {},
+        env,
+        signal,
+        options.events,
+      )
+      return raw.parsed(false) as string
+    } catch (error: any) {
+      throw toBamlError(error);
+    }
+  }
+  
+  TestOpenAIConcurrencyClientHardocodedBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+  ): string {
+    try {
+      const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+      const signal = options.signal;
+
+      if (signal?.aborted) {
+        throw new BamlAbortError('Operation was aborted', signal.reason);
+      }
+
+      // Check if onTick is provided and reject for sync operations
+      if (options.onTick) {
+        throw new Error("onTick is not supported for synchronous functions. Please use the async client instead.");
+      }
+
+      const collector = options.collector ? (Array.isArray(options.collector) ? options.collector : [options.collector]) : [];
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      const raw = this.runtime.callFunctionSync(
+        "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        options.tb?.__tb(),
+        options.clientRegistry,
+        collector,
+        options.tags || {},
+        env,
+        signal,
+        options.events,
+      )
+      return raw.parsed(false) as string
+    } catch (error: any) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestOpenAIDummyClient(
       input: string,
       __baml_options__?: BamlCallOptions<never>
diff --git a/integ-tests/typescript/baml_client/sync_request.ts b/integ-tests/typescript/baml_client/sync_request.ts
index 5c8a450fa9..f2cb3ef120 100644
--- a/integ-tests/typescript/baml_client/sync_request.ts
+++ b/integ-tests/typescript/baml_client/sync_request.ts
@@ -4537,6 +4537,56 @@ export class HttpRequest {
     }
   }
   
+  TestOpenAIConcurrencyClientEnvBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+  ): HTTPRequest {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.buildRequestSync(
+        "TestOpenAIConcurrencyClientEnvBaseUrl",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        false,
+        env,
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
+  TestOpenAIConcurrencyClientHardocodedBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+  ): HTTPRequest {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.buildRequestSync(
+        "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        false,
+        env,
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestOpenAIDummyClient(
       input: string,
       __baml_options__?: BamlCallOptions<never>
@@ -10943,6 +10993,56 @@ export class HttpStreamRequest {
     }
   }
   
+  TestOpenAIConcurrencyClientEnvBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+  ): HTTPRequest {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.buildRequestSync(
+        "TestOpenAIConcurrencyClientEnvBaseUrl",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        true,
+        env,
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
+  TestOpenAIConcurrencyClientHardocodedBaseUrl(
+      input: string,
+      __baml_options__?: BamlCallOptions<never>
+  ): HTTPRequest {
+    try {
+      const rawEnv = __baml_options__?.env ? { ...process.env, ...__baml_options__.env } : { ...process.env };
+      const env: Record<string, string> = Object.fromEntries(
+        Object.entries(rawEnv).filter(([_, value]) => value !== undefined) as [string, string][]
+      );
+      return this.runtime.buildRequestSync(
+        "TestOpenAIConcurrencyClientHardocodedBaseUrl",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        true,
+        env,
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestOpenAIDummyClient(
       input: string,
       __baml_options__?: BamlCallOptions<never>