Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 34 additions & 12 deletions crates/llm_gateway/src/stream_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -434,17 +434,14 @@ impl StreamContext {
}
Ok(streaming_chunk)
} else {
if body_size == 0 {
return Err(Action::Continue);
}
debug!(
"request_id={}: upstream response complete, streaming=false body_size={}",
self.request_identifier(),
body_size
);
match self.get_http_response_body(0, body_size) {
Some(body) => Ok(body),
None => {
match self.get_http_response_body(0, usize::MAX) {
Some(body) if !body.is_empty() => Ok(body),
_ => {
warn!(
"request_id={}: non streaming response body empty",
self.request_identifier()
Expand Down Expand Up @@ -1173,7 +1170,14 @@ impl HttpContext for StreamContext {
}

let current_time = get_current_time().unwrap();
if end_of_stream && body_size == 0 {

// Non-streaming upstream responses may arrive in multiple chunks; wait for the
// full buffered body before parsing.
if !self.streaming_response && !end_of_stream {
return Action::Continue;
}

if end_of_stream && body_size == 0 && self.streaming_response {
debug!(
"request_id={}: response body complete, total_bytes={}",
self.request_identifier(),
Expand All @@ -1194,15 +1198,20 @@ impl HttpContext for StreamContext {
);

// For error responses, forward the upstream error directly without parsing
if body_size > 0 {
if let Ok(body) = self.read_raw_response_body(body_size) {
if let Ok(body) = self.read_raw_response_body(body_size) {
if !body.is_empty() {
debug!(
"request_id={}: upstream error body: {}",
self.request_identifier(),
String::from_utf8_lossy(&body)
);
// Forward the error response as-is
self.set_http_response_body(0, body_size, &body);
let replace_size = if body_size > 0 {
body_size
} else {
body.len()
};
self.set_http_response_body(0, replace_size, &body);
}
}
return Action::Continue;
Expand Down Expand Up @@ -1232,6 +1241,19 @@ impl HttpContext for StreamContext {
Err(action) => return action,
};

if !self.streaming_response && body.is_empty() {
if end_of_stream {
self.handle_end_of_request_metrics_and_traces(current_time);
}
return Action::Continue;
}

let replace_size = if body_size > 0 {
body_size
} else {
body.len()
};

debug!(
"request_id={}: upstream raw response, body_size={} content={}",
self.request_identifier(),
Expand All @@ -1243,14 +1265,14 @@ impl HttpContext for StreamContext {
if self.streaming_response {
match self.handle_streaming_response(&body, provider_id) {
Ok(serialized_body) => {
self.set_http_response_body(0, body_size, &serialized_body);
self.set_http_response_body(0, replace_size, &serialized_body);
}
Err(action) => return action,
}
} else {
match self.handle_non_streaming_response(&body, provider_id) {
Ok(serialized_body) => {
self.set_http_response_body(0, body_size, &serialized_body);
self.set_http_response_body(0, replace_size, &serialized_body);
}
Err(action) => return action,
}
Expand Down
3 changes: 2 additions & 1 deletion demos/shared/test_runner/run_demo_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ run_hurl_with_retries() {
local max_attempts=1
local attempt=1

if [ "$demo_name" = "llm_routing/preference_based_routing" ]; then
if [ "$demo_name" = "llm_routing/preference_based_routing" ] \
|| [ "$demo_name" = "advanced/currency_exchange" ]; then
max_attempts=3
fi

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,14 @@ model_providers:
http_host: api.custom-provider.com
access_key: $CUSTOM_API_KEY

# headers: optional map of extra HTTP headers sent on upstream requests (after auth).
# Use for provider-specific requirements such as User-Agent, org IDs, or account headers.
- model: moonshotai/kimi-for-coding
access_key: $MOONSHOTAI_API_KEY
base_url: https://api.kimi.com/coding/v1
headers:
User-Agent: "KimiCLI/1.3"

# Model aliases - use friendly names instead of full provider model names
model_aliases:
fast-llm:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,18 @@ listeners:
port: 443
protocol: https
provider_interface: openai
- access_key: $MOONSHOTAI_API_KEY
base_url: https://api.kimi.com/coding/v1
base_url_path_prefix: /coding/v1
cluster_name: moonshotai_api.kimi.com
endpoint: api.kimi.com
headers:
User-Agent: KimiCLI/1.3
model: kimi-for-coding
name: moonshotai/kimi-for-coding
port: 443
protocol: https
provider_interface: moonshotai
name: model_1
output_filters:
- input_guards
Expand Down Expand Up @@ -144,6 +156,18 @@ model_providers:
port: 443
protocol: https
provider_interface: openai
- access_key: $MOONSHOTAI_API_KEY
base_url: https://api.kimi.com/coding/v1
base_url_path_prefix: /coding/v1
cluster_name: moonshotai_api.kimi.com
endpoint: api.kimi.com
headers:
User-Agent: KimiCLI/1.3
model: kimi-for-coding
name: moonshotai/kimi-for-coding
port: 443
protocol: https
provider_interface: moonshotai
- internal: true
model: Plano-Orchestrator
name: plano-orchestrator
Expand Down
Loading