@@ -169,6 +169,7 @@ def whisper(
169
169
ocr_provider : str = "advanced" ,
170
170
line_splitter_tolerance : float = 0.4 ,
171
171
horizontal_stretch_factor : float = 1.0 ,
172
+ encoding : str = "utf-8"
172
173
) -> dict :
173
174
"""
174
175
Sends a request to the LLMWhisperer API to process a document.
@@ -190,6 +191,7 @@ def whisper(
190
191
ocr_provider (str, optional): The OCR provider. Can be "advanced" or "basic". Defaults to "advanced".
191
192
line_splitter_tolerance (float, optional): The line splitter tolerance. Defaults to 0.4.
192
193
horizontal_stretch_factor (float, optional): The horizontal stretch factor. Defaults to 1.0.
194
+ encoding (str): The character encoding to use for processing the text. Defaults to "utf-8".
193
195
194
196
Returns:
195
197
dict: The response from the API as a dictionary.
@@ -268,6 +270,7 @@ def generate():
268
270
prepared = req .prepare ()
269
271
s = requests .Session ()
270
272
response = s .send (prepared , timeout = self .api_timeout , stream = should_stream )
273
+ response .encoding = encoding
271
274
if response .status_code != 200 and response .status_code != 202 :
272
275
message = json .loads (response .text )
273
276
message ["status_code" ] = response .status_code
@@ -318,7 +321,7 @@ def whisper_status(self, whisper_hash: str) -> dict:
318
321
message ["status_code" ] = response .status_code
319
322
return message
320
323
321
- def whisper_retrieve (self , whisper_hash : str ) -> dict :
324
+ def whisper_retrieve (self , whisper_hash : str , encoding : str = "utf-8" ) -> dict :
322
325
"""Retrieves the result of the whisper operation from the LLMWhisperer
323
326
API.
324
327
@@ -329,6 +332,7 @@ def whisper_retrieve(self, whisper_hash: str) -> dict:
329
332
330
333
Args:
331
334
whisper_hash (str): The hash of the whisper operation.
335
+ encoding (str): The character encoding to use for processing the text. Defaults to "utf-8".
332
336
333
337
Returns:
334
338
dict: A dictionary containing the status code and the extracted text from the whisper operation.
@@ -345,6 +349,7 @@ def whisper_retrieve(self, whisper_hash: str) -> dict:
345
349
prepared = req .prepare ()
346
350
s = requests .Session ()
347
351
response = s .send (prepared , timeout = self .api_timeout )
352
+ response .encoding = encoding
348
353
if response .status_code != 200 :
349
354
err = json .loads (response .text )
350
355
err ["status_code" ] = response .status_code
0 commit comments