-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrequirements.txt
More file actions
63 lines (59 loc) · 1.81 KB
/
requirements.txt
File metadata and controls
63 lines (59 loc) · 1.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# Core libraries
playwright>=1.54.0
selenium>=4.40.0
rich>=14.1.0
flask-socketio>=5.5.1
Flask-Limiter>=3.8.0
gunicorn>=23.0.0
# kombu: message-queue abstraction used by python-socketio for PostgreSQL/SQLAlchemy backends.
# Enables SOCKETIO_USE_DB_QUEUE=true (SQLAlchemy/PostgreSQL broker).
kombu>=5.6.2
python-dotenv>=1.2.1
selectolax>=0.3.32
langdetect>=1.0.9
azure_identity>=1.24.0
# Pinned for Python 3.12 stability (spaCy stack)
spacy==3.8.11
datasets==2.19.2
https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
# Optional CAPTCHA visual bypass (install separately when needed)
# seleniumbase>=4.40.8
# File handling
pandas==2.2.3
numpy==2.2.6
pytesseract>=0.3.13
pdf2image>=1.17.0
PyMuPDF>=1.26.5
sqlalchemy>=2.0.42
psycopg2>=2.9.10
openpyxl>=3.1.5
pdfminer.six>=20221105
camelot-py[cv]>=1.0.9
ghostscript>=0.8.1
# opencv-python is pulled by camelot-py[cv], but keep explicit if you prefer
opencv-python>=4.12.0.88
# CLI and ML utilities
rapidfuzz>=3.13.0
matplotlib>=3.10.5
sentence_transformers>=5.1.0
torch==2.9.1
--extra-index-url https://download.pytorch.org/whl/cpu
nltk>=3.9.1
# For JSON and structured parsing
# ⚠️ CRITICAL: orjson 3.9.5 compatible with Python 3.12+
# Python 3.13 has ABI breaking changes; DO NOT upgrade without testing
# See: https://github.com/ijl/orjson/issues/564 (3.13 support tracked)
jsonschema>=4.22.0
orjson==3.9.5
prometheus_client>=0.16.0
# For browser fingerprint masking (via Playwright)
# fake-useragent>=2.2.0
# NLP and ML enhancements
spacy-lookups-data>=1.0.5
# shap>=0.48.0
# flask-login for user session management, is not used in the current version
# optional future implementation
# flask-login>=0.6.2
# flask-wtf for form handling, is not used in the current version
# optional future implementation
# flask-wtf>=1.0.0