diff --git a/internal/translator/codex/openai/chat-completions/codex_openai_request.go b/internal/translator/codex/openai/chat-completions/codex_openai_request.go index 6cc701e707..14f92af4e8 100644 --- a/internal/translator/codex/openai/chat-completions/codex_openai_request.go +++ b/internal/translator/codex/openai/chat-completions/codex_openai_request.go @@ -121,13 +121,94 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b case "tool": // Handle tool response messages as top-level function_call_output objects toolCallID := m.Get("tool_call_id").String() - content := m.Get("content").String() + content := m.Get("content") // Create function_call_output object funcOutput := []byte(`{}`) funcOutput, _ = sjson.SetBytes(funcOutput, "type", "function_call_output") funcOutput, _ = sjson.SetBytes(funcOutput, "call_id", toolCallID) - funcOutput, _ = sjson.SetBytes(funcOutput, "output", content) + + // Handle content: can be string, array, or any other JSON value. + // Always set output to avoid dropping tool payloads for null/non-array content. + if content.Type == gjson.String { + funcOutput, _ = sjson.SetBytes(funcOutput, "output", content.String()) + } else if content.IsArray() { + // Build output array from content items + outputArr := []byte(`[]`) + items := content.Array() + for j := 0; j < len(items); j++ { + it := items[j] + t := it.Get("type").String() + switch t { + case "text": + outputPart := []byte(`{}`) + outputPart, _ = sjson.SetBytes(outputPart, "type", "input_text") + outputPart, _ = sjson.SetBytes(outputPart, "text", it.Get("text").String()) + outputArr, _ = sjson.SetRawBytes(outputArr, "-1", outputPart) + case "image_url": + // Handle image_url content in tool message + imageURL := it.Get("image_url.url").String() + fileID := it.Get("image_url.file_id").String() + if imageURL != "" || fileID != "" { + outputPart := []byte(`{}`) + outputPart, _ = sjson.SetBytes(outputPart, "type", "input_image") + if imageURL != "" { + outputPart, _ = sjson.SetBytes(outputPart, "image_url", imageURL) + } + if fileID != "" { + outputPart, _ = sjson.SetBytes(outputPart, "file_id", fileID) + } + if detail := it.Get("image_url.detail").String(); detail != "" { + outputPart, _ = sjson.SetBytes(outputPart, "detail", detail) + } + outputArr, _ = sjson.SetRawBytes(outputArr, "-1", outputPart) + } else { + outputPart := []byte(`{}`) + outputPart, _ = sjson.SetBytes(outputPart, "type", "input_text") + outputPart, _ = sjson.SetBytes(outputPart, "text", it.Raw) + outputArr, _ = sjson.SetRawBytes(outputArr, "-1", outputPart) + } + case "file": + // Handle file content in tool message + fileID := it.Get("file.file_id").String() + fileData := it.Get("file.file_data").String() + filename := it.Get("file.filename").String() + fileUrl := it.Get("file.file_url").String() + if fileID != "" || fileData != "" || filename != "" || fileUrl != "" { + outputPart := []byte(`{}`) + outputPart, _ = sjson.SetBytes(outputPart, "type", "input_file") + if fileID != "" { + outputPart, _ = sjson.SetBytes(outputPart, "file_id", fileID) + } + if fileData != "" { + outputPart, _ = sjson.SetBytes(outputPart, "file_data", fileData) + } + if filename != "" { + outputPart, _ = sjson.SetBytes(outputPart, "filename", filename) + } + if fileUrl != "" { + outputPart, _ = sjson.SetBytes(outputPart, "file_url", fileUrl) + } + outputArr, _ = sjson.SetRawBytes(outputArr, "-1", outputPart) + } + + default: + // Fallback: convert unknown types to string representation + outputPart := []byte(`{}`) + outputPart, _ = sjson.SetBytes(outputPart, "type", "input_text") + outputPart, _ = sjson.SetBytes(outputPart, "text", it.Raw) + outputArr, _ = sjson.SetRawBytes(outputArr, "-1", outputPart) + } + } + funcOutput, _ = sjson.SetRawBytes(funcOutput, "output", outputArr) + } else { + fallbackOutput := content.Raw + if fallbackOutput == "" { + fallbackOutput = content.String() + } + funcOutput, _ = sjson.SetBytes(funcOutput, "output", fallbackOutput) + } + out, _ = sjson.SetRawBytes(out, "input.-1", funcOutput) default: @@ -177,16 +258,33 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b if u := it.Get("image_url.url"); u.Exists() { part, _ = sjson.SetBytes(part, "image_url", u.String()) } + if fid := it.Get("image_url.file_id").String(); fid != "" { + part, _ = sjson.SetBytes(part, "file_id", fid) + } + if detail := it.Get("image_url.detail").String(); detail != "" { + part, _ = sjson.SetBytes(part, "detail", detail) + } msg, _ = sjson.SetRawBytes(msg, "content.-1", part) } case "file": if role == "user" { + fileID := it.Get("file.file_id").String() fileData := it.Get("file.file_data").String() + fileURL := it.Get("file.file_url").String() filename := it.Get("file.filename").String() - if fileData != "" { + + if fileData != "" || fileURL != "" { part := []byte(`{}`) part, _ = sjson.SetBytes(part, "type", "input_file") - part, _ = sjson.SetBytes(part, "file_data", fileData) + if fileID != "" { + part, _ = sjson.SetBytes(part, "file_id", fileID) + } + if fileData != "" { + part, _ = sjson.SetBytes(part, "file_data", fileData) + } + if fileURL != "" { + part, _ = sjson.SetBytes(part, "file_url", fileURL) + } if filename != "" { part, _ = sjson.SetBytes(part, "filename", filename) } diff --git a/internal/translator/codex/openai/chat-completions/codex_openai_request_test.go b/internal/translator/codex/openai/chat-completions/codex_openai_request_test.go index 84c8dad2cc..8c8d3c7371 100644 --- a/internal/translator/codex/openai/chat-completions/codex_openai_request_test.go +++ b/internal/translator/codex/openai/chat-completions/codex_openai_request_test.go @@ -176,6 +176,333 @@ func TestToolCallWithContent(t *testing.T) { } } +func TestToolCallOutputWithImageContent(t *testing.T) { + input := []byte(`{ + "model": "gpt-4o", + "messages": [ + {"role": "user", "content": "Show me the generated image result."}, + { + "role": "assistant", + "content": null, + "tool_calls": [ + { + "id": "call_img_1", + "type": "function", + "function": { + "name": "render_image", + "arguments": "{}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_img_1", + "content": [ + {"type": "text", "text": "Rendered image attached."}, + {"type": "image_url", "image_url": {"url": "https://example.com/generated.png"}} + ] + } + ], + "tools": [ + { + "type": "function", + "function": { + "name": "render_image", + "description": "Render image", + "parameters": {"type": "object", "properties": {}} + } + } + ] + }`) + + out := ConvertOpenAIRequestToCodex("gpt-4o", input, true) + result := string(out) + + items := gjson.Get(result, "input").Array() + if len(items) != 3 { + t.Fatalf("expected 3 input items, got %d: %s", len(items), gjson.Get(result, "input").Raw) + } + + output := items[2].Get("output") + if !output.IsArray() { + t.Fatalf("expected tool output to be an array, got: %s", output.Raw) + } + + parts := output.Array() + if len(parts) != 2 { + t.Fatalf("expected 2 output parts, got %d: %s", len(parts), output.Raw) + } + + if parts[0].Get("type").String() != "input_text" { + t.Errorf("part 0: expected type 'input_text', got '%s'", parts[0].Get("type").String()) + } + if parts[0].Get("text").String() != "Rendered image attached." { + t.Errorf("part 0: unexpected text '%s'", parts[0].Get("text").String()) + } + if parts[1].Get("type").String() != "input_image" { + t.Errorf("part 1: expected type 'input_image', got '%s'", parts[1].Get("type").String()) + } + if parts[1].Get("image_url").String() != "https://example.com/generated.png" { + t.Errorf("part 1: unexpected image_url '%s'", parts[1].Get("image_url").String()) + } +} + +func TestToolCallOutputWithImageContentWithoutSourceFallsBackToText(t *testing.T) { + input := []byte(`{ + "model": "gpt-4o", + "messages": [ + {"role": "user", "content": "Show me the generated image result."}, + { + "role": "assistant", + "content": null, + "tool_calls": [ + { + "id": "call_img_1", + "type": "function", + "function": { + "name": "render_image", + "arguments": "{}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_img_1", + "content": [ + {"type": "image_url", "image_url": {"detail": "high"}} + ] + } + ], + "tools": [ + { + "type": "function", + "function": { + "name": "render_image", + "description": "Render image", + "parameters": {"type": "object", "properties": {}} + } + } + ] + }`) + + out := ConvertOpenAIRequestToCodex("gpt-4o", input, true) + result := string(out) + + parts := gjson.Get(result, "input.2.output").Array() + if len(parts) != 1 { + t.Fatalf("expected 1 output part, got %d: %s", len(parts), gjson.Get(result, "input.2.output").Raw) + } + if parts[0].Get("type").String() != "input_text" { + t.Errorf("part 0: expected type 'input_text', got '%s'", parts[0].Get("type").String()) + } + if parts[0].Get("text").String() != `{"type": "image_url", "image_url": {"detail": "high"}}` { + t.Errorf("part 0: unexpected fallback text '%s'", parts[0].Get("text").String()) + } +} + +func TestToolCallOutputWithFileContent(t *testing.T) { + input := []byte(`{ + "model": "gpt-4o", + "messages": [ + {"role": "user", "content": "Process these files"}, + { + "role": "assistant", + "content": null, + "tool_calls": [ + { + "id": "call_file_1", + "type": "function", + "function": {"name": "process_files", "arguments": "{}"} + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_file_1", + "content": [ + {"type": "text", "text": "Files processed"}, + {"type": "file", "file": {"file_id": "file-abc", "filename": "result.txt"}}, + {"type": "file", "file": {"file_url": "https://example.com/output.pdf", "filename": "output.pdf"}}, + {"type": "file", "file": {"file_data": "YW5vdGhlciBmaWxl", "filename": "data.bin"}} + ] + } + ], + "tools": [ + { + "type": "function", + "function": {"name": "process_files", "description": "Process", "parameters": {"type": "object", "properties": {}}} + } + ] + }`) + + out := ConvertOpenAIRequestToCodex("gpt-4o", input, true) + result := string(out) + + items := gjson.Get(result, "input").Array() + if len(items) != 3 { + t.Fatalf("expected 3 input items, got %d: %s", len(items), gjson.Get(result, "input").Raw) + } + + output := items[2].Get("output") + if !output.IsArray() { + t.Fatalf("expected tool output to be an array, got: %s", output.Raw) + } + + parts := output.Array() + if len(parts) != 4 { + t.Fatalf("expected 4 output parts, got %d: %s", len(parts), output.Raw) + } + + if parts[0].Get("type").String() != "input_text" { + t.Errorf("part 0: expected input_text, got %s", parts[0].Get("type").String()) + } + + if parts[1].Get("type").String() != "input_file" { + t.Errorf("part 1: expected input_file, got %s", parts[1].Get("type").String()) + } + if parts[1].Get("file_id").String() != "file-abc" { + t.Errorf("part 1: expected file_id file-abc, got %s", parts[1].Get("file_id").String()) + } + if parts[1].Get("filename").String() != "result.txt" { + t.Errorf("part 1: expected filename result.txt, got %s", parts[1].Get("filename").String()) + } + + if parts[2].Get("file_url").String() != "https://example.com/output.pdf" { + t.Errorf("part 2: expected file_url, got %s", parts[2].Get("file_url").String()) + } + if parts[2].Get("filename").String() != "output.pdf" { + t.Errorf("part 2: expected filename output.pdf, got %s", parts[2].Get("filename").String()) + } + + if parts[3].Get("file_data").String() != "YW5vdGhlciBmaWxl" { + t.Errorf("part 3: expected file_data, got %s", parts[3].Get("file_data").String()) + } + if parts[3].Get("filename").String() != "data.bin" { + t.Errorf("part 3: expected filename data.bin, got %s", parts[3].Get("filename").String()) + } +} + +func TestToolCallOutputWithNullContent(t *testing.T) { + input := []byte(`{ + "model": "gpt-4o", + "messages": [ + {"role": "user", "content": "Test null tool output"}, + { + "role": "assistant", + "content": null, + "tool_calls": [{"id": "call_null", "type": "function", "function": {"name": "test", "arguments": "{}"}}] + }, + { + "role": "tool", + "tool_call_id": "call_null", + "content": null + } + ], + "tools": [{"type": "function", "function": {"name": "test", "description": "Test", "parameters": {"type": "object", "properties": {}}}}] + }`) + + out := ConvertOpenAIRequestToCodex("gpt-4o", input, true) + result := string(out) + + items := gjson.Get(result, "input").Array() + output := items[2].Get("output") + if !output.Exists() { + t.Fatalf("expected output field to exist for null tool content: %s", items[2].Raw) + } + if output.String() != "null" { + t.Fatalf("expected null tool content to fall back to string 'null', got: %s", output.Raw) + } +} + +func TestToolCallOutputWithObjectContent(t *testing.T) { + input := []byte(`{ + "model": "gpt-4o", + "messages": [ + {"role": "user", "content": "Test object tool output"}, + { + "role": "assistant", + "content": null, + "tool_calls": [{"id": "call_obj", "type": "function", "function": {"name": "test", "arguments": "{}"}}] + }, + { + "role": "tool", + "tool_call_id": "call_obj", + "content": {"status":"ok","count":2} + } + ], + "tools": [{"type": "function", "function": {"name": "test", "description": "Test", "parameters": {"type": "object", "properties": {}}}}] + }`) + + out := ConvertOpenAIRequestToCodex("gpt-4o", input, true) + result := string(out) + + items := gjson.Get(result, "input").Array() + output := items[2].Get("output") + if !output.Exists() { + t.Fatalf("expected output field to exist for object tool content: %s", items[2].Raw) + } + if output.String() != `{"status":"ok","count":2}` { + t.Fatalf("expected object tool content to fall back to serialized JSON, got: %s", output.Raw) + } +} + +func TestToolCallOutputWithUnknownType(t *testing.T) { + input := []byte(`{ + "model": "gpt-4o", + "messages": [ + {"role": "user", "content": "Test"}, + { + "role": "assistant", + "content": null, + "tool_calls": [{"id": "call_x", "type": "function", "function": {"name": "test", "arguments": "{}"}}] + }, + { + "role": "tool", + "tool_call_id": "call_x", + "content": [ + {"type": "text", "text": "normal text"}, + {"type": "unknown_type", "foo": "bar", "nested": {"a": 1}}, + {"type": "another_unknown", "value": 42} + ] + } + ], + "tools": [{"type": "function", "function": {"name": "test", "description": "Test", "parameters": {"type": "object", "properties": {}}}}] + }`) + + out := ConvertOpenAIRequestToCodex("gpt-4o", input, true) + result := string(out) + + items := gjson.Get(result, "input").Array() + output := items[2].Get("output") + parts := output.Array() + + if len(parts) != 3 { + t.Fatalf("expected 3 output parts, got %d: %s", len(parts), output.Raw) + } + + // normal text + if parts[0].Get("type").String() != "input_text" { + t.Errorf("part 0: expected input_text, got %s", parts[0].Get("type").String()) + } + + // unknown type -> fallback to string + if parts[1].Get("type").String() != "input_text" { + t.Errorf("part 1: expected input_text (fallback), got %s", parts[1].Get("type").String()) + } + // should contain the raw JSON + fallbackText := parts[1].Get("text").String() + if !gjson.Valid(fallbackText) { + t.Errorf("part 1: fallback text should be valid JSON, got: %s", fallbackText) + } + + // another unknown + if parts[2].Get("type").String() != "input_text" { + t.Errorf("part 2: expected input_text (fallback), got %s", parts[2].Get("type").String()) + } +} + // Parallel tool calls: assistant invokes 3 tools at once, all call_ids // and outputs must be translated and paired correctly. func TestMultipleToolCalls(t *testing.T) { @@ -595,6 +922,144 @@ func TestCallIDsMatchBetweenCallAndOutput(t *testing.T) { } } +func TestUserFileContentTranslated(t *testing.T) { + input := []byte(`{ + "model": "gpt-4o", + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Analyze these files"}, + {"type": "file", "file": {"file_url": "https://example.com/b.pdf", "filename": "b.pdf"}}, + {"type": "file", "file": {"file_data": "ZmlsZSBjb250ZW50", "filename": "c.txt"}} + ] + } + ] + }`) + + out := ConvertOpenAIRequestToCodex("gpt-4o", input, true) + result := string(out) + + items := gjson.Get(result, "input").Array() + if len(items) != 1 { + t.Fatalf("expected 1 input item, got %d: %s", len(items), gjson.Get(result, "input").Raw) + } + + content := items[0].Get("content").Array() + if len(content) != 3 { + t.Fatalf("expected 3 content parts, got %d: %s", len(content), items[0].Get("content").Raw) + } + + if content[0].Get("type").String() != "input_text" { + t.Errorf("part 0: expected input_text, got %s", content[0].Get("type").String()) + } + + if content[1].Get("type").String() != "input_file" { + t.Errorf("part 1: expected input_file, got %s", content[1].Get("type").String()) + } + if content[1].Get("file_url").String() != "https://example.com/b.pdf" { + t.Errorf("part 1: expected file_url https://example.com/b.pdf, got %s", content[1].Get("file_url").String()) + } + if content[1].Get("filename").String() != "b.pdf" { + t.Errorf("part 1: expected filename b.pdf, got %s", content[1].Get("filename").String()) + } + + if content[2].Get("file_data").String() != "ZmlsZSBjb250ZW50" { + t.Errorf("part 2: expected file_data ZmlsZSBjb250ZW50, got %s", content[2].Get("file_data").String()) + } + if content[2].Get("filename").String() != "c.txt" { + t.Errorf("part 2: expected filename c.txt, got %s", content[2].Get("filename").String()) + } +} + +func TestUserFileContentSkipsFilenameOnlyAndFileIDOnlyParts(t *testing.T) { + input := []byte(`{ + "model": "gpt-4o", + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Analyze these files"}, + {"type": "file", "file": {"filename": "missing-source.pdf"}}, + {"type": "file", "file": {"file_id": "file-123", "filename": "a.txt"}}, + {"type": "file", "file": {"file_url": "https://example.com/b.pdf", "filename": "b.pdf"}} + ] + } + ] + }`) + + out := ConvertOpenAIRequestToCodex("gpt-4o", input, true) + result := string(out) + + items := gjson.Get(result, "input").Array() + if len(items) != 1 { + t.Fatalf("expected 1 input item, got %d: %s", len(items), gjson.Get(result, "input").Raw) + } + + content := items[0].Get("content").Array() + if len(content) != 2 { + t.Fatalf("expected 2 content parts, got %d: %s", len(content), items[0].Get("content").Raw) + } + + if content[0].Get("type").String() != "input_text" { + t.Errorf("part 0: expected input_text, got %s", content[0].Get("type").String()) + } + if content[1].Get("type").String() != "input_file" { + t.Errorf("part 1: expected input_file, got %s", content[1].Get("type").String()) + } + if content[1].Get("file_url").String() != "https://example.com/b.pdf" { + t.Errorf("part 1: expected file_url https://example.com/b.pdf, got %s", content[1].Get("file_url").String()) + } + if content[1].Get("filename").String() != "b.pdf" { + t.Errorf("part 1: expected filename b.pdf, got %s", content[1].Get("filename").String()) + } +} + +func TestUserImageWithDetail(t *testing.T) { + input := []byte(`{ + "model": "gpt-4o", + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Analyze this image"}, + {"type": "image_url", "image_url": {"url": "https://example.com/img.png", "detail": "high"}}, + {"type": "image_url", "image_url": {"file_id": "file-img-123", "detail": "low"}} + ] + } + ] + }`) + + out := ConvertOpenAIRequestToCodex("gpt-4o", input, true) + result := string(out) + + items := gjson.Get(result, "input").Array() + content := items[0].Get("content").Array() + + if len(content) != 3 { + t.Fatalf("expected 3 content parts, got %d", len(content)) + } + + // image with url and detail + if content[1].Get("type").String() != "input_image" { + t.Errorf("part 1: expected input_image, got %s", content[1].Get("type").String()) + } + if content[1].Get("image_url").String() != "https://example.com/img.png" { + t.Errorf("part 1: expected image_url, got %s", content[1].Get("image_url").String()) + } + if content[1].Get("detail").String() != "high" { + t.Errorf("part 1: expected detail high, got %s", content[1].Get("detail").String()) + } + + // image with file_id and detail + if content[2].Get("file_id").String() != "file-img-123" { + t.Errorf("part 2: expected file_id, got %s", content[2].Get("file_id").String()) + } + if content[2].Get("detail").String() != "low" { + t.Errorf("part 2: expected detail low, got %s", content[2].Get("detail").String()) + } +} + // Tools array should carry over to the Responses format output. func TestToolsDefinitionTranslated(t *testing.T) { input := []byte(`{