Skip to content

Commit af07517

Browse files
sunyuhan1998ilayaperumalg
authored andcommitted
fix: Fixed GH-3557, Fixed the issue where the audio filename was lost when requesting the OpenAI /transcriptions and /translations interfaces.
Auto-cherry-pick to 1.0.x Fixes #3557 Signed-off-by: Sun Yuhan <[email protected]>
1 parent a84256a commit af07517

File tree

3 files changed

+39
-13
lines changed

3 files changed

+39
-13
lines changed

models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioTranscriptionModel.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,8 +167,10 @@ OpenAiAudioApi.TranscriptionRequest createRequest(AudioTranscriptionPrompt trans
167167
}
168168
}
169169

170+
Resource instructions = transcriptionPrompt.getInstructions();
170171
return OpenAiAudioApi.TranscriptionRequest.builder()
171-
.file(toBytes(transcriptionPrompt.getInstructions()))
172+
.file(toBytes(instructions))
173+
.fileName(instructions.getFilename())
172174
.responseFormat(options.getResponseFormat())
173175
.prompt(options.getPrompt())
174176
.temperature(options.getTemperature())

models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiAudioApi.java

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ public <T> ResponseEntity<T> createTranscription(TranscriptionRequest requestBod
160160

161161
@Override
162162
public String getFilename() {
163-
return "audio.webm";
163+
return requestBody.fileName();
164164
}
165165
});
166166
multipartBody.add("model", requestBody.model());
@@ -206,7 +206,7 @@ public <T> ResponseEntity<T> createTranslation(TranslationRequest requestBody, C
206206

207207
@Override
208208
public String getFilename() {
209-
return "audio.webm";
209+
return requestBody.fileName();
210210
}
211211
});
212212
multipartBody.add("model", requestBody.model());
@@ -496,6 +496,7 @@ public SpeechRequest build() {
496496
* Transcription</a>
497497
*
498498
* @param file The audio file to transcribe. Must be a valid audio file type.
499+
* @param fileName The audio file name.
499500
* @param model ID of the model to use. Only whisper-1 is currently available.
500501
* @param language The language of the input audio. Supplying the input language in
501502
* ISO-639-1 format will improve accuracy and latency.
@@ -517,6 +518,7 @@ public SpeechRequest build() {
517518
public record TranscriptionRequest(
518519
// @formatter:off
519520
@JsonProperty("file") byte[] file,
521+
@JsonProperty("fileName") String fileName,
520522
@JsonProperty("model") String model,
521523
@JsonProperty("language") String language,
522524
@JsonProperty("prompt") String prompt,
@@ -554,6 +556,8 @@ public static class Builder {
554556

555557
private byte[] file;
556558

559+
private String fileName;
560+
557561
private String model = WhisperModel.WHISPER_1.getValue();
558562

559563
private String language;
@@ -571,6 +575,11 @@ public Builder file(byte[] file) {
571575
return this;
572576
}
573577

578+
public Builder fileName(String fileName) {
579+
this.fileName = fileName;
580+
return this;
581+
}
582+
574583
public Builder model(String model) {
575584
this.model = model;
576585
return this;
@@ -603,11 +612,12 @@ public Builder granularityType(GranularityType granularityType) {
603612

604613
public TranscriptionRequest build() {
605614
Assert.notNull(this.file, "file must not be null");
615+
Assert.notNull(this.fileName, "fileName must not be null");
606616
Assert.hasText(this.model, "model must not be empty");
607617
Assert.notNull(this.responseFormat, "response_format must not be null");
608618

609-
return new TranscriptionRequest(this.file, this.model, this.language, this.prompt, this.responseFormat,
610-
this.temperature, this.granularityType);
619+
return new TranscriptionRequest(this.file, this.fileName, this.model, this.language, this.prompt,
620+
this.responseFormat, this.temperature, this.granularityType);
611621
}
612622

613623
}
@@ -619,6 +629,7 @@ public TranscriptionRequest build() {
619629
*
620630
* @param file The audio file object (not file name) to translate, in one of these
621631
* formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
632+
* @param fileName The audio file name.
622633
* @param model ID of the model to use. Only whisper-1 is currently available.
623634
* @param prompt An optional text to guide the model's style or continue a previous
624635
* audio segment. The prompt should be in English.
@@ -633,6 +644,7 @@ public TranscriptionRequest build() {
633644
public record TranslationRequest(
634645
// @formatter:off
635646
@JsonProperty("file") byte[] file,
647+
@JsonProperty("fileName") String fileName,
636648
@JsonProperty("model") String model,
637649
@JsonProperty("prompt") String prompt,
638650
@JsonProperty("response_format") TranscriptResponseFormat responseFormat,
@@ -647,6 +659,8 @@ public static class Builder {
647659

648660
private byte[] file;
649661

662+
private String fileName;
663+
650664
private String model = WhisperModel.WHISPER_1.getValue();
651665

652666
private String prompt;
@@ -660,6 +674,11 @@ public Builder file(byte[] file) {
660674
return this;
661675
}
662676

677+
public Builder fileName(String fileName) {
678+
this.fileName = fileName;
679+
return this;
680+
}
681+
663682
public Builder model(String model) {
664683
this.model = model;
665684
return this;
@@ -685,7 +704,7 @@ public TranslationRequest build() {
685704
Assert.hasText(this.model, "model must not be empty");
686705
Assert.notNull(this.responseFormat, "response_format must not be null");
687706

688-
return new TranslationRequest(this.file, this.model, this.prompt, this.responseFormat,
707+
return new TranslationRequest(this.file, this.fileName, this.model, this.prompt, this.responseFormat,
689708
this.temperature);
690709
}
691710

models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/api/OpenAiAudioApiIT.java

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,24 +63,29 @@ void speechTranscriptionAndTranslation() throws IOException {
6363
FileCopyUtils.copy(speech, new File("target/speech.mp3"));
6464

6565
StructuredResponse translation = this.audioApi
66-
.createTranslation(
67-
TranslationRequest.builder().model(WhisperModel.WHISPER_1.getValue()).file(speech).build(),
68-
StructuredResponse.class)
66+
.createTranslation(TranslationRequest.builder()
67+
.model(WhisperModel.WHISPER_1.getValue())
68+
.file(speech)
69+
.fileName("speech.mp3")
70+
.build(), StructuredResponse.class)
6971
.getBody();
7072

7173
assertThat(translation.text().replaceAll(",", "")).isEqualTo("Hello my name is Chris and I love Spring AI.");
7274

7375
StructuredResponse transcriptionEnglish = this.audioApi
74-
.createTranscription(
75-
TranscriptionRequest.builder().model(WhisperModel.WHISPER_1.getValue()).file(speech).build(),
76-
StructuredResponse.class)
76+
.createTranscription(TranscriptionRequest.builder()
77+
.model(WhisperModel.WHISPER_1.getValue())
78+
.file(speech)
79+
.fileName("speech.mp3")
80+
.build(), StructuredResponse.class)
7781
.getBody();
7882

7983
assertThat(transcriptionEnglish.text().replaceAll(",", ""))
8084
.isEqualTo("Hello my name is Chris and I love Spring AI.");
8185

8286
StructuredResponse transcriptionDutch = this.audioApi
83-
.createTranscription(TranscriptionRequest.builder().file(speech).language("nl").build(),
87+
.createTranscription(
88+
TranscriptionRequest.builder().file(speech).fileName("speech.mp3").language("nl").build(),
8489
StructuredResponse.class)
8590
.getBody();
8691

0 commit comments

Comments
 (0)