Skip to content

Commit 0075b70

Browse files
authored
Merge pull request #15 from Zipstack/v2
feat: LLMW V2 client changes
2 parents 0395464 + 3e44930 commit 0075b70

24 files changed

+3823
-22
lines changed

.github/workflows/ci_test.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ jobs:
3535
- name: Create test env
3636
shell: bash
3737
run: |
38-
cp tests/sample.env tests/.env
39-
sed -i "s|LLMWHISPERER_API_KEY=|LLMWHISPERER_API_KEY=${{ secrets.LLMWHISPERER_API_KEY }}|" tests/.env
38+
cp sample.env .env
39+
sed -i "s|LLMWHISPERER_API_KEY=|LLMWHISPERER_API_KEY=${{ secrets.LLMWHISPERER_API_KEY }}|" .env
4040
4141
- name: Run tox
4242
id: tox

.pre-commit-config.yaml

+2-3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ repos:
1717
exclude_types:
1818
- "markdown"
1919
- id: end-of-file-fixer
20+
exclude: "tests/test_data/.*"
2021
- id: check-yaml
2122
args: [--unsafe]
2223
- id: check-added-large-files
@@ -65,9 +66,7 @@ repos:
6566
args: [--max-line-length=120]
6667
exclude: |
6768
(?x)^(
68-
.*migrations/.*\.py|
69-
unstract-core/tests/.*|
70-
pkgs/unstract-flags/src/unstract/flags/evaluation_.*\.py|
69+
tests/test_data/.*|
7170
)$
7271
- repo: https://github.com/pycqa/isort
7372
rev: 5.13.2

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
LLMs are powerful, but their output is as good as the input you provide. LLMWhisperer is a technology that presents data from complex documents (different designs and formats) to LLMs in a way that they can best understand. LLMWhisperer features include Layout Preserving Mode, Auto-switching between native text and OCR modes, proper representation of radio buttons and checkboxes in PDF forms as raw text, among other features. You can now extract raw text from complex PDF documents or images without having to worry about whether the document is a native text document, a scanned image or just a picture clicked on a smartphone. Extraction of raw text from invoices, purchase orders, bank statements, etc works easily for structured data extraction with LLMs powered by LLMWhisperer's Layout Preserving mode.
99

10-
Refer to the client documentation for more information: [LLMWhisperer Client Documentation](https://docs.unstract.com/llm_whisperer/python_client/llm_whisperer_python_client_intro)
10+
Refer to the client documentation for more information: [LLMWhisperer Client Documentation](https://docs.unstract.com/llmwhisperer/index.html)
1111

1212
## Features
1313

pyproject.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ dependencies = [
1010
"requests>=2",
1111
]
1212
readme = "README.md"
13-
urls = { Homepage = "https://llmwhisperer.unstract.com", Source = "https://github.com/Zipstack/llm-whisperer-python-client" }
13+
urls = { Homepage = "https://unstract.com/llmwhisperer/", Source = "https://github.com/Zipstack/llm-whisperer-python-client" }
1414
license = {text = "AGPL v3"}
1515
authors = [
1616
{name = "Zipstack Inc", email = "[email protected]"},
@@ -69,7 +69,7 @@ includes = ["src"]
6969
package-dir = "src"
7070

7171
[tool.pytest.ini_options]
72-
env_files = ["tests/.env"]
72+
env_files = [".env"]
7373
addopts = "-s"
7474
log_level = "INFO"
7575
log_cli = true
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
LLMWHISPERER_BASE_URL=https://llmwhisperer-api.unstract.com/v1
2+
LLMWHISPERER_BASE_URL_V2=https://llmwhisperer-api.us-central.unstract.com/api/v2
23
LLMWHISPERER_LOG_LEVEL=DEBUG
34
LLMWHISPERER_API_KEY=

src/unstract/llmwhisperer/__init__.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
__version__ = "0.22.0"
1+
__version__ = "0.23.0"
22

33
from .client import LLMWhispererClient # noqa: F401
4+
from .client_v2 import LLMWhispererClientV2 # noqa: F401
45

56

6-
def get_sdk_version():
7+
def get_llmw_py_client_version():
78
"""Returns the SDK version."""
89
return __version__

src/unstract/llmwhisperer/client.py

+5-11
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,7 @@ class LLMWhispererClient:
5858
client's activities and errors.
5959
"""
6060

61-
formatter = logging.Formatter(
62-
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
63-
)
61+
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
6462
logger = logging.getLogger(__name__)
6563
log_stream_handler = logging.StreamHandler()
6664
log_stream_handler.setFormatter(formatter)
@@ -117,9 +115,7 @@ def __init__(
117115
self.api_key = os.getenv("LLMWHISPERER_API_KEY", "")
118116
else:
119117
self.api_key = api_key
120-
self.logger.debug(
121-
"api_key set to %s", LLMWhispererUtils.redact_key(self.api_key)
122-
)
118+
self.logger.debug("api_key set to %s", LLMWhispererUtils.redact_key(self.api_key))
123119

124120
self.api_timeout = api_timeout
125121

@@ -169,7 +165,7 @@ def whisper(
169165
ocr_provider: str = "advanced",
170166
line_splitter_tolerance: float = 0.4,
171167
horizontal_stretch_factor: float = 1.0,
172-
encoding: str = "utf-8"
168+
encoding: str = "utf-8",
173169
) -> dict:
174170
"""
175171
Sends a request to the LLMWhisperer API to process a document.
@@ -240,12 +236,10 @@ def whisper(
240236
should_stream = False
241237
if url == "":
242238
if stream is not None:
243-
244239
should_stream = True
245240

246241
def generate():
247-
for chunk in stream:
248-
yield chunk
242+
yield from stream
249243

250244
req = requests.Request(
251245
"POST",
@@ -269,7 +263,7 @@ def generate():
269263
req = requests.Request("POST", api_url, params=params, headers=self.headers)
270264
prepared = req.prepare()
271265
s = requests.Session()
272-
response = s.send(prepared, timeout=self.api_timeout, stream=should_stream)
266+
response = s.send(prepared, timeout=timeout, stream=should_stream)
273267
response.encoding = encoding
274268
if response.status_code != 200 and response.status_code != 202:
275269
message = json.loads(response.text)

0 commit comments

Comments
 (0)