katanemo · Spherrrical · May 18, 2026 · May 18, 2026 · May 18, 2026 · May 18, 2026
diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs
@@ -434,17 +434,14 @@ impl StreamContext {
             }
             Ok(streaming_chunk)
         } else {
-            if body_size == 0 {
-                return Err(Action::Continue);
-            }
             debug!(
                 "request_id={}: upstream response complete, streaming=false body_size={}",
                 self.request_identifier(),
                 body_size
             );
-            match self.get_http_response_body(0, body_size) {
-                Some(body) => Ok(body),
-                None => {
+            match self.get_http_response_body(0, usize::MAX) {
+                Some(body) if !body.is_empty() => Ok(body),
+                _ => {
                     warn!(
                         "request_id={}: non streaming response body empty",
                         self.request_identifier()
@@ -1173,7 +1170,14 @@ impl HttpContext for StreamContext {
         }
 
         let current_time = get_current_time().unwrap();
-        if end_of_stream && body_size == 0 {
+
+        // Non-streaming upstream responses may arrive in multiple chunks; wait for the
+        // full buffered body before parsing.
+        if !self.streaming_response && !end_of_stream {
+            return Action::Continue;
+        }
+
+        if end_of_stream && body_size == 0 && self.streaming_response {
             debug!(
                 "request_id={}: response body complete, total_bytes={}",
                 self.request_identifier(),
@@ -1194,15 +1198,20 @@ impl HttpContext for StreamContext {
                 );
 
                 // For error responses, forward the upstream error directly without parsing
-                if body_size > 0 {
-                    if let Ok(body) = self.read_raw_response_body(body_size) {
+                if let Ok(body) = self.read_raw_response_body(body_size) {
+                    if !body.is_empty() {
                         debug!(
                             "request_id={}: upstream error body: {}",
                             self.request_identifier(),
                             String::from_utf8_lossy(&body)
                         );
                         // Forward the error response as-is
-                        self.set_http_response_body(0, body_size, &body);
+                        let replace_size = if body_size > 0 {
+                            body_size
+                        } else {
+                            body.len()
+                        };
+                        self.set_http_response_body(0, replace_size, &body);
                     }
                 }
                 return Action::Continue;
@@ -1232,6 +1241,19 @@ impl HttpContext for StreamContext {
             Err(action) => return action,
         };
 
+        if !self.streaming_response && body.is_empty() {
+            if end_of_stream {
+                self.handle_end_of_request_metrics_and_traces(current_time);
+            }
+            return Action::Continue;
+        }
+
+        let replace_size = if body_size > 0 {
+            body_size
+        } else {
+            body.len()
+        };
+
         debug!(
             "request_id={}: upstream raw response, body_size={} content={}",
             self.request_identifier(),
@@ -1243,14 +1265,14 @@ impl HttpContext for StreamContext {
         if self.streaming_response {
             match self.handle_streaming_response(&body, provider_id) {
                 Ok(serialized_body) => {
-                    self.set_http_response_body(0, body_size, &serialized_body);
+                    self.set_http_response_body(0, replace_size, &serialized_body);
                 }
                 Err(action) => return action,
             }
         } else {
             match self.handle_non_streaming_response(&body, provider_id) {
                 Ok(serialized_body) => {
-                    self.set_http_response_body(0, body_size, &serialized_body);
+                    self.set_http_response_body(0, replace_size, &serialized_body);
                 }
                 Err(action) => return action,
             }

diff --git a/demos/shared/test_runner/run_demo_tests.sh b/demos/shared/test_runner/run_demo_tests.sh
@@ -19,7 +19,8 @@ run_hurl_with_retries() {
   local max_attempts=1
   local attempt=1
 
-  if [ "$demo_name" = "llm_routing/preference_based_routing" ]; then
+  if [ "$demo_name" = "llm_routing/preference_based_routing" ] \
+    || [ "$demo_name" = "advanced/currency_exchange" ]; then
     max_attempts=3
   fi
 

diff --git a/docs/source/resources/includes/plano_config_full_reference.yaml b/docs/source/resources/includes/plano_config_full_reference.yaml
@@ -47,6 +47,14 @@ model_providers:
     http_host: api.custom-provider.com
     access_key: $CUSTOM_API_KEY
 
+  # headers: optional map of extra HTTP headers sent on upstream requests (after auth).
+  # Use for provider-specific requirements such as User-Agent, org IDs, or account headers.
+  - model: moonshotai/kimi-for-coding
+    access_key: $MOONSHOTAI_API_KEY
+    base_url: https://api.kimi.com/coding/v1
+    headers:
+      User-Agent: "KimiCLI/1.3"
+
 # Model aliases - use friendly names instead of full provider model names
 model_aliases:
   fast-llm:

diff --git a/docs/source/resources/includes/plano_config_full_reference_rendered.yaml b/docs/source/resources/includes/plano_config_full_reference_rendered.yaml
@@ -88,6 +88,18 @@ listeners:
     port: 443
     protocol: https
     provider_interface: openai
+  - access_key: $MOONSHOTAI_API_KEY
+    base_url: https://api.kimi.com/coding/v1
+    base_url_path_prefix: /coding/v1
+    cluster_name: moonshotai_api.kimi.com
+    endpoint: api.kimi.com
+    headers:
+      User-Agent: KimiCLI/1.3
+    model: kimi-for-coding
+    name: moonshotai/kimi-for-coding
+    port: 443
+    protocol: https
+    provider_interface: moonshotai
   name: model_1
   output_filters:
   - input_guards
@@ -144,6 +156,18 @@ model_providers:
   port: 443
   protocol: https
   provider_interface: openai
+- access_key: $MOONSHOTAI_API_KEY
+  base_url: https://api.kimi.com/coding/v1
+  base_url_path_prefix: /coding/v1
+  cluster_name: moonshotai_api.kimi.com
+  endpoint: api.kimi.com
+  headers:
+    User-Agent: KimiCLI/1.3
+  model: kimi-for-coding
+  name: moonshotai/kimi-for-coding
+  port: 443
+  protocol: https
+  provider_interface: moonshotai
 - internal: true
   model: Plano-Orchestrator
   name: plano-orchestrator