diff --git a/backend/internal/pkg/apicompat/chatcompletions_responses_test.go b/backend/internal/pkg/apicompat/chatcompletions_responses_test.go index f54a4a027f..464c26efab 100644 --- a/backend/internal/pkg/apicompat/chatcompletions_responses_test.go +++ b/backend/internal/pkg/apicompat/chatcompletions_responses_test.go @@ -181,6 +181,50 @@ func TestChatCompletionsToResponses_ImageURL(t *testing.T) { assert.Equal(t, "data:image/png;base64,abc123", parts[1].ImageURL) } +func TestChatCompletionsToResponses_EmptyBase64ImageURLSkipped(t *testing.T) { + content := `[{"type":"text","text":"Describe this"},{"type":"image_url","image_url":{"url":"data:image/png;base64,"}}]` + req := &ChatCompletionsRequest{ + Model: "gpt-4o", + Messages: []ChatMessage{ + {Role: "user", Content: json.RawMessage(content)}, + }, + } + resp, err := ChatCompletionsToResponses(req) + require.NoError(t, err) + + var items []ResponsesInputItem + require.NoError(t, json.Unmarshal(resp.Input, &items)) + require.Len(t, items, 1) + + var parts []ResponsesContentPart + require.NoError(t, json.Unmarshal(items[0].Content, &parts)) + require.Len(t, parts, 1) + assert.Equal(t, "input_text", parts[0].Type) + assert.Equal(t, "Describe this", parts[0].Text) +} + +func TestChatCompletionsToResponses_WhitespaceOnlyBase64ImageURLSkipped(t *testing.T) { + content := `[{"type":"text","text":"Describe this"},{"type":"image_url","image_url":{"url":"data:image/png;base64, "}}]` + req := &ChatCompletionsRequest{ + Model: "gpt-4o", + Messages: []ChatMessage{ + {Role: "user", Content: json.RawMessage(content)}, + }, + } + resp, err := ChatCompletionsToResponses(req) + require.NoError(t, err) + + var items []ResponsesInputItem + require.NoError(t, json.Unmarshal(resp.Input, &items)) + require.Len(t, items, 1) + + var parts []ResponsesContentPart + require.NoError(t, json.Unmarshal(items[0].Content, &parts)) + require.Len(t, parts, 1) + assert.Equal(t, "input_text", parts[0].Type) + assert.Equal(t, "Describe this", parts[0].Text) +} + func TestChatCompletionsToResponses_SystemArrayContent(t *testing.T) { req := &ChatCompletionsRequest{ Model: "gpt-4o", diff --git a/backend/internal/pkg/apicompat/chatcompletions_to_responses.go b/backend/internal/pkg/apicompat/chatcompletions_to_responses.go index 6cdd012a49..dc157a6d3c 100644 --- a/backend/internal/pkg/apicompat/chatcompletions_to_responses.go +++ b/backend/internal/pkg/apicompat/chatcompletions_to_responses.go @@ -339,7 +339,7 @@ func convertChatContentPartsToResponses(parts []ChatContentPart) []ResponsesCont }) } case "image_url": - if p.ImageURL != nil && p.ImageURL.URL != "" { + if p.ImageURL != nil && p.ImageURL.URL != "" && !isEmptyBase64DataURI(p.ImageURL.URL) { responseParts = append(responseParts, ResponsesContentPart{ Type: "input_image", ImageURL: p.ImageURL.URL, @@ -350,6 +350,22 @@ func convertChatContentPartsToResponses(parts []ChatContentPart) []ResponsesCont return responseParts } +func isEmptyBase64DataURI(raw string) bool { + if !strings.HasPrefix(raw, "data:") { + return false + } + rest := strings.TrimPrefix(raw, "data:") + semicolonIdx := strings.Index(rest, ";") + if semicolonIdx < 0 { + return false + } + rest = rest[semicolonIdx+1:] + if !strings.HasPrefix(rest, "base64,") { + return false + } + return strings.TrimSpace(strings.TrimPrefix(rest, "base64,")) == "" +} + func flattenChatContentParts(parts []ChatContentPart) string { var textParts []string for _, p := range parts { diff --git a/backend/internal/service/openai_gateway_service.go b/backend/internal/service/openai_gateway_service.go index 0a95961512..ff0a8968e9 100644 --- a/backend/internal/service/openai_gateway_service.go +++ b/backend/internal/service/openai_gateway_service.go @@ -1931,6 +1931,11 @@ func (s *OpenAIGatewayService) Forward(ctx context.Context, c *gin.Context, acco } } + if sanitizeEmptyBase64InputImagesInOpenAIRequestBodyMap(reqBody) { + bodyModified = true + disablePatch() + } + // Re-serialize body only if modified if bodyModified { serializedByPatch := false @@ -2358,6 +2363,14 @@ func (s *OpenAIGatewayService) forwardOpenAIPassthrough( reqStream = gjson.GetBytes(body, "stream").Bool() } + sanitizedBody, sanitized, err := sanitizeEmptyBase64InputImagesInOpenAIBody(body) + if err != nil { + return nil, err + } + if sanitized { + body = sanitizedBody + } + logger.LegacyPrintf("service.openai_gateway", "[OpenAI 自动透传] 命中自动透传分支: account=%d name=%s type=%s model=%s stream=%v", account.ID, @@ -4691,6 +4704,123 @@ func normalizeOpenAIServiceTier(raw string) *string { } } +func sanitizeEmptyBase64InputImagesInOpenAIBody(body []byte) ([]byte, bool, error) { + if len(body) == 0 || !bytes.Contains(body, []byte(`"image_url"`)) || !bytes.Contains(body, []byte(`base64,`)) { + return body, false, nil + } + + var reqBody map[string]any + if err := json.Unmarshal(body, &reqBody); err != nil { + return body, false, fmt.Errorf("sanitize request body: %w", err) + } + if !sanitizeEmptyBase64InputImagesInOpenAIRequestBodyMap(reqBody) { + return body, false, nil + } + normalized, err := json.Marshal(reqBody) + if err != nil { + return body, false, fmt.Errorf("serialize sanitized request body: %w", err) + } + return normalized, true, nil +} + +func sanitizeEmptyBase64InputImagesInOpenAIRequestBodyMap(reqBody map[string]any) bool { + if reqBody == nil { + return false + } + input, ok := reqBody["input"] + if !ok { + return false + } + normalizedInput, changed := sanitizeEmptyBase64InputImagesInOpenAIInput(input) + if !changed { + return false + } + reqBody["input"] = normalizedInput + return true +} + +func sanitizeEmptyBase64InputImagesInOpenAIInput(input any) (any, bool) { + items, ok := input.([]any) + if !ok { + return input, false + } + + normalizedItems := make([]any, 0, len(items)) + changed := false + for _, item := range items { + itemMap, ok := item.(map[string]any) + if !ok { + normalizedItems = append(normalizedItems, item) + continue + } + if shouldDropEmptyBase64InputImagePart(itemMap) { + changed = true + continue + } + content, ok := itemMap["content"] + if !ok { + normalizedItems = append(normalizedItems, itemMap) + continue + } + parts, ok := content.([]any) + if !ok { + normalizedItems = append(normalizedItems, itemMap) + continue + } + + normalizedParts := make([]any, 0, len(parts)) + itemChanged := false + for _, part := range parts { + if shouldDropEmptyBase64InputImagePart(part) { + changed = true + itemChanged = true + continue + } + normalizedParts = append(normalizedParts, part) + } + if itemChanged { + if len(normalizedParts) == 0 { + continue + } + itemMap["content"] = normalizedParts + } + normalizedItems = append(normalizedItems, itemMap) + } + if !changed { + return input, false + } + return normalizedItems, true +} + +func shouldDropEmptyBase64InputImagePart(part any) bool { + partMap, ok := part.(map[string]any) + if !ok { + return false + } + typeValue, _ := partMap["type"].(string) + if strings.TrimSpace(typeValue) != "input_image" { + return false + } + imageURL, _ := partMap["image_url"].(string) + return isEmptyBase64DataURI(imageURL) +} + +func isEmptyBase64DataURI(raw string) bool { + if !strings.HasPrefix(raw, "data:") { + return false + } + rest := strings.TrimPrefix(raw, "data:") + semicolonIdx := strings.Index(rest, ";") + if semicolonIdx < 0 { + return false + } + rest = rest[semicolonIdx+1:] + if !strings.HasPrefix(rest, "base64,") { + return false + } + return strings.TrimSpace(strings.TrimPrefix(rest, "base64,")) == "" +} + func getOpenAIRequestBodyMap(c *gin.Context, body []byte) (map[string]any, error) { if c != nil { if cached, ok := c.Get(OpenAIParsedRequestBodyKey); ok { diff --git a/backend/internal/service/openai_gateway_service_hotpath_test.go b/backend/internal/service/openai_gateway_service_hotpath_test.go index f73c06c5e1..234dee00cf 100644 --- a/backend/internal/service/openai_gateway_service_hotpath_test.go +++ b/backend/internal/service/openai_gateway_service_hotpath_test.go @@ -1,6 +1,7 @@ package service import ( + "encoding/json" "net/http/httptest" "testing" @@ -139,3 +140,61 @@ func TestGetOpenAIRequestBodyMap_WriteBackContextCache(t *testing.T) { require.True(t, ok) require.Equal(t, got, cachedMap) } + +func TestSanitizeEmptyBase64InputImagesInOpenAIRequestBodyMap(t *testing.T) { + var reqBody map[string]any + require.NoError(t, json.Unmarshal([]byte(`{ + "model":"gpt-5.4", + "input":[ + {"role":"user","content":[ + {"type":"input_text","text":"Describe this"}, + {"type":"input_image","image_url":"data:image/png;base64, "}, + {"type":"input_image","image_url":"data:image/png;base64,abc123"} + ]}, + {"role":"user","content":[ + {"type":"input_image","image_url":"data:image/png;base64,"} + ]}, + {"type":"input_image","image_url":"data:image/png;base64,"}, + {"type":"input_image","image_url":"data:image/png;base64,top-level-valid"} + ] + }`), &reqBody)) + + require.True(t, sanitizeEmptyBase64InputImagesInOpenAIRequestBodyMap(reqBody)) + + normalized, err := json.Marshal(reqBody) + require.NoError(t, err) + require.JSONEq(t, `{ + "model":"gpt-5.4", + "input":[ + {"role":"user","content":[ + {"type":"input_text","text":"Describe this"}, + {"type":"input_image","image_url":"data:image/png;base64,abc123"} + ]}, + {"type":"input_image","image_url":"data:image/png;base64,top-level-valid"} + ] + }`, string(normalized)) +} + +func TestSanitizeEmptyBase64InputImagesInOpenAIBody(t *testing.T) { + body, changed, err := sanitizeEmptyBase64InputImagesInOpenAIBody([]byte(`{ + "model":"gpt-5.4", + "stream":true, + "input":[ + {"role":"user","content":[ + {"type":"input_text","text":"Describe this"}, + {"type":"input_image","image_url":"data:image/png;base64,"} + ]} + ] + }`)) + require.NoError(t, err) + require.True(t, changed) + require.JSONEq(t, `{ + "model":"gpt-5.4", + "stream":true, + "input":[ + {"role":"user","content":[ + {"type":"input_text","text":"Describe this"} + ]} + ] + }`, string(body)) +}