@@ -31,7 +31,7 @@ struct ContentView: View {
31
31
@State private var availableModels : [ String ] = [ ]
32
32
@State private var availableLanguages : [ String ] = [ ]
33
33
@State private var disabledModels : [ String ] = WhisperKit . recommendedModels ( ) . disabled
34
-
34
+ @ AppStorage ( " promptText " ) private var promptText : String ?
35
35
@AppStorage ( " selectedAudioInput " ) private var selectedAudioInput : String = " No Audio Input "
36
36
@AppStorage ( " selectedModel " ) private var selectedModel : String = WhisperKit . recommendedModels ( ) . default
37
37
@AppStorage ( " selectedTab " ) private var selectedTab : String = " Transcribe "
@@ -765,6 +765,11 @@ struct ContentView: View {
765
765
766
766
var settingsForm : some View {
767
767
List {
768
+
769
+
770
+
771
+
772
+
768
773
HStack {
769
774
Text ( " Show Timestamps " )
770
775
InfoButton ( " Toggling this will include/exclude timestamps in both the UI and the prefill tokens. \n Either <|notimestamps|> or <|0.00|> will be forced based on this setting unless \" Prompt Prefill \" is de-selected. " )
@@ -817,6 +822,14 @@ struct ContentView: View {
817
822
}
818
823
. padding ( . horizontal)
819
824
. padding ( . bottom)
825
+
826
+ TextField ( " Enter prompt text " , text: Binding (
827
+ get: { self . promptText ?? " " } ,
828
+ set: { self . promptText = $0. isEmpty ? nil : $0 }
829
+ ) )
830
+ . textFieldStyle ( . roundedBorder)
831
+ . padding ( . horizontal)
832
+ . padding ( . bottom)
820
833
821
834
VStack {
822
835
Text ( " Starting Temperature: " )
@@ -1303,7 +1316,7 @@ struct ContentView: View {
1303
1316
let task : DecodingTask = selectedTask == " transcribe " ? . transcribe : . translate
1304
1317
let seekClip : [ Float ] = [ lastConfirmedSegmentEndSeconds]
1305
1318
1306
- let options = DecodingOptions (
1319
+ var options = DecodingOptions (
1307
1320
verbose: true ,
1308
1321
task: task,
1309
1322
language: languageCode,
@@ -1318,6 +1331,19 @@ struct ContentView: View {
1318
1331
clipTimestamps: seekClip,
1319
1332
chunkingStrategy: chunkingStrategy
1320
1333
)
1334
+
1335
+ // Prompt
1336
+ if let promptText = promptText {
1337
+ guard whisperKit. tokenizer != nil else {
1338
+ throw WhisperError . tokenizerUnavailable ( )
1339
+ }
1340
+
1341
+ if promptText. count > 0 , let tokenizer = whisperKit. tokenizer {
1342
+ options. promptTokens = tokenizer. encode ( text: " " + promptText. trimmingCharacters ( in: . whitespaces) ) . filter { $0 < tokenizer. specialTokens. specialTokenBegin }
1343
+ options. usePrefillPrompt = true
1344
+ }
1345
+ }
1346
+
1321
1347
1322
1348
// Early stopping checks
1323
1349
let decodingCallback : ( ( TranscriptionProgress ) -> Bool ? ) = { ( progress: TranscriptionProgress ) in
@@ -1542,7 +1568,7 @@ struct ContentView: View {
1542
1568
print ( selectedLanguage)
1543
1569
print ( languageCode)
1544
1570
1545
- let options = DecodingOptions (
1571
+ var options = DecodingOptions (
1546
1572
verbose: true ,
1547
1573
task: task,
1548
1574
language: languageCode,
@@ -1556,6 +1582,18 @@ struct ContentView: View {
1556
1582
wordTimestamps: true , // required for eager mode
1557
1583
firstTokenLogProbThreshold: - 1.5 // higher threshold to prevent fallbacks from running to often
1558
1584
)
1585
+
1586
+ // Prompt
1587
+ if let promptText = promptText {
1588
+ guard whisperKit. tokenizer != nil else {
1589
+ throw WhisperError . tokenizerUnavailable ( )
1590
+ }
1591
+
1592
+ if promptText. count > 0 , let tokenizer = whisperKit. tokenizer {
1593
+ options. promptTokens = tokenizer. encode ( text: " " + promptText. trimmingCharacters ( in: . whitespaces) ) . filter { $0 < tokenizer. specialTokens. specialTokenBegin }
1594
+ options. usePrefillPrompt = true
1595
+ }
1596
+ }
1559
1597
1560
1598
// Early stopping checks
1561
1599
let decodingCallback : ( ( TranscriptionProgress ) -> Bool ? ) = { progress in
0 commit comments