diff --git a/poetry.lock b/poetry.lock index 12525474..9db17fa9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -152,7 +152,7 @@ files = [ name = "anyio" version = "4.4.0" description = "High level compatibility layer for multiple asynchronous event loop implementations" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "anyio-4.4.0-py3-none-any.whl", hash = "sha256:c1b2d8f46a8a812513012e1107cb0e68c17159a7a594208005a57dc776e1bdc7"}, @@ -905,7 +905,7 @@ toolkit = ["deepsearch-toolkit (>=0.31.0)"] name = "deprecated" version = "1.2.14" description = "Python @deprecated decorator to deprecate old python classes, functions or methods." -optional = true +optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"}, @@ -937,7 +937,7 @@ profile = ["gprof2dot (>=2022.7.29)"] name = "dirtyjson" version = "1.0.8" description = "JSON decoder for Python that can extract data from the muck" -optional = true +optional = false python-versions = "*" files = [ {file = "dirtyjson-1.0.8-py3-none-any.whl", hash = "sha256:125e27248435a58acace26d5c2c4c11a1c0de0a9c5124c5a94ba78e517d74f53"}, @@ -1089,7 +1089,7 @@ torchvision = ">=0.5" name = "environs" version = "9.5.0" description = "simplified environment variable parsing" -optional = true +optional = false python-versions = ">=3.6" files = [ {file = "environs-9.5.0-py2.py3-none-any.whl", hash = "sha256:1e549569a3de49c05f856f40bce86979e7d5ffbbc4398e7f338574c220189124"}, @@ -1448,7 +1448,7 @@ test = ["coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", name = "greenlet" version = "3.1.0" description = "Lightweight in-process concurrent programming" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "greenlet-3.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a814dc3100e8a046ff48faeaa909e80cdb358411a3d6dd5293158425c684eda8"}, @@ -1527,7 +1527,7 @@ test = ["objgraph", "psutil"] name = "grpcio" version = "1.66.1" description = "HTTP/2-based RPC framework" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "grpcio-1.66.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:4877ba180591acdf127afe21ec1c7ff8a5ecf0fe2600f0d3c50e8c4a1cbc6492"}, @@ -1585,7 +1585,7 @@ protobuf = ["grpcio-tools (>=1.66.1)"] name = "h11" version = "0.14.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, @@ -1606,7 +1606,7 @@ files = [ name = "httpcore" version = "1.0.5" description = "A minimal low-level HTTP client." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5"}, @@ -1627,7 +1627,7 @@ trio = ["trio (>=0.22.0,<0.26.0)"] name = "httpx" version = "0.27.2" description = "The next generation HTTP client." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"}, @@ -2019,7 +2019,7 @@ i18n = ["Babel (>=2.7)"] name = "joblib" version = "1.4.2" description = "Lightweight pipelining with Python functions" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"}, @@ -2067,7 +2067,7 @@ attrs = ">=19.2.0" name = "jsonpatch" version = "1.33" description = "Apply JSON-Patches (RFC 6902)" -optional = true +optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ {file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"}, @@ -2081,7 +2081,7 @@ jsonpointer = ">=1.9" name = "jsonpointer" version = "3.0.0" description = "Identify specific nodes in a JSON document (RFC 6901)" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942"}, @@ -2343,7 +2343,7 @@ files = [ name = "langchain-core" version = "0.2.40" description = "Building applications with LLMs through composability" -optional = true +optional = false python-versions = "<4.0,>=3.8.1" files = [ {file = "langchain_core-0.2.40-py3-none-any.whl", hash = "sha256:71fff5cafa4b9c82a3a716e985f071383be452c35d8cc3169b3a393e6857fc99"}, @@ -2366,7 +2366,7 @@ typing-extensions = ">=4.7" name = "langchain-huggingface" version = "0.0.3" description = "An integration package connecting Hugging Face and LangChain" -optional = true +optional = false python-versions = "<4.0,>=3.8.1" files = [ {file = "langchain_huggingface-0.0.3-py3-none-any.whl", hash = "sha256:d6827adf3c7c8fcc0bca8c43c7e900c3bf68af9a1532a83d4b8ace137e02887e"}, @@ -2384,7 +2384,7 @@ transformers = ">=4.39.0" name = "langchain-milvus" version = "0.1.5" description = "An integration package connecting Milvus and LangChain" -optional = true +optional = false python-versions = "<4.0,>=3.8.1" files = [ {file = "langchain_milvus-0.1.5-py3-none-any.whl", hash = "sha256:74aa487738afde4c3e1346433ef26f9556e599826161562b308d3357d86529fd"}, @@ -2403,7 +2403,7 @@ scipy = [ name = "langchain-text-splitters" version = "0.2.4" description = "LangChain text splitting utilities" -optional = true +optional = false python-versions = "<4.0,>=3.8.1" files = [ {file = "langchain_text_splitters-0.2.4-py3-none-any.whl", hash = "sha256:2702dee5b7cbdd595ccbe43b8d38d01a34aa8583f4d6a5a68ad2305ae3e7b645"}, @@ -2417,7 +2417,7 @@ langchain-core = ">=0.2.38,<0.3.0" name = "langsmith" version = "0.1.121" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." -optional = true +optional = false python-versions = "<4.0,>=3.8.1" files = [ {file = "langsmith-0.1.121-py3-none-any.whl", hash = "sha256:fdb1ac8a671d3904201bfeea197d87bded46a10d08f1034af464211872e29893"}, @@ -2502,7 +2502,7 @@ files = [ name = "llama-index-core" version = "0.11.10" description = "Interface between LLMs and your data" -optional = true +optional = false python-versions = "<4.0,>=3.8.1" files = [ {file = "llama_index_core-0.11.10-py3-none-any.whl", hash = "sha256:2dddd7cb4ccee89fdbbddd62e5fe3c7ae7fc431130e0a0a7155daee052874191"}, @@ -2536,7 +2536,7 @@ wrapt = "*" name = "llama-index-embeddings-huggingface" version = "0.3.1" description = "llama-index embeddings huggingface integration" -optional = true +optional = false python-versions = "<4.0,>=3.8.1" files = [ {file = "llama_index_embeddings_huggingface-0.3.1-py3-none-any.whl", hash = "sha256:71708240b1aec183c80f20d531b39a75d0cce774586e11bb0798f3ecb270749c"}, @@ -2552,7 +2552,7 @@ sentence-transformers = ">=2.6.1" name = "llama-index-llms-huggingface-api" version = "0.2.0" description = "llama-index llms huggingface api integration" -optional = true +optional = false python-versions = "<4.0,>=3.8.1" files = [ {file = "llama_index_llms_huggingface_api-0.2.0-py3-none-any.whl", hash = "sha256:ed12bcf6986becee1dc5f7644ff22aa96070de3b463add22c3b6ceabc2bd2f43"}, @@ -2567,7 +2567,7 @@ llama-index-core = ">=0.11.0,<0.12.0" name = "llama-index-vector-stores-milvus" version = "0.2.3" description = "llama-index vector_stores milvus integration" -optional = true +optional = false python-versions = "<4.0,>=3.8.1" files = [ {file = "llama_index_vector_stores_milvus-0.2.3-py3-none-any.whl", hash = "sha256:287c3b2b8d886eac11b07db3ddf31b92dee55ac4f00fe7dc047879e2f7d79d67"}, @@ -2947,7 +2947,7 @@ pandas = "*" name = "milvus-lite" version = "2.4.10" description = "A lightweight version of Milvus wrapped with Python." -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "milvus_lite-2.4.10-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:fc4246d3ed7d1910847afce0c9ba18212e93a6e9b8406048436940578dfad5cb"}, @@ -2963,7 +2963,7 @@ tqdm = "*" name = "minijinja" version = "2.2.0" description = "An experimental Python binding of the Rust MiniJinja template engine." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "minijinja-2.2.0-cp38-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:e4154fcf72e81be01c2733b770e6cb3e584851cb2fa73c58e347b04967d3d7c0"}, @@ -3311,7 +3311,7 @@ test = ["codecov (>=2.0.5)", "coverage (>=4.2)", "flake8 (>=3.0.4)", "pytest (>= name = "nltk" version = "3.9.1" description = "Natural Language Toolkit" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "nltk-3.9.1-py3-none-any.whl", hash = "sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1"}, @@ -3635,7 +3635,7 @@ numpy = [ name = "orjson" version = "3.10.7" description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "orjson-3.10.7-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:74f4544f5a6405b90da8ea724d15ac9c36da4d72a738c64685003337401f5c12"}, @@ -4408,7 +4408,7 @@ testutils = ["gitpython (>3)"] name = "pymilvus" version = "2.4.6" description = "Python Sdk for Milvus" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "pymilvus-2.4.6-py3-none-any.whl", hash = "sha256:b4c43472edc313b845d313be50610e19054e6954b2c5c3b515565c596c2d3d97"}, @@ -5009,7 +5009,7 @@ rpds-py = ">=0.7.0" name = "regex" version = "2024.9.11" description = "Alternative regular expression module, to replace re." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "regex-2024.9.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1494fa8725c285a81d01dc8c06b55287a1ee5e0e382d8413adc0a9197aac6408"}, @@ -5310,7 +5310,7 @@ files = [ name = "safetensors" version = "0.4.5" description = "" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "safetensors-0.4.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:a63eaccd22243c67e4f2b1c3e258b257effc4acd78f3b9d397edc8cf8f1298a7"}, @@ -5490,7 +5490,7 @@ test = ["asv", "numpydoc (>=1.7)", "pooch (>=1.6.0)", "pytest (>=7.0)", "pytest- name = "scikit-learn" version = "1.5.2" description = "A set of python modules for machine learning and data mining" -optional = true +optional = false python-versions = ">=3.9" files = [ {file = "scikit_learn-1.5.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:299406827fb9a4f862626d0fe6c122f5f87f8910b86fe5daa4c32dcd742139b6"}, @@ -5611,7 +5611,7 @@ files = [ name = "sentence-transformers" version = "3.1.0" description = "Multilingual text embeddings" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "sentence_transformers-3.1.0-py3-none-any.whl", hash = "sha256:94ff19c478ada7340ee8bd5ed46db1e322f8c98a16b04a337634956ac9d4530d"}, @@ -5866,7 +5866,7 @@ files = [ name = "sniffio" version = "1.3.1" description = "Sniff out which async library your code is running under" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, @@ -5877,7 +5877,7 @@ files = [ name = "sqlalchemy" version = "2.0.35" description = "Database Abstraction Library" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "SQLAlchemy-2.0.35-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:67219632be22f14750f0d1c70e62f204ba69d28f62fd6432ba05ab295853de9b"}, @@ -6014,7 +6014,7 @@ widechars = ["wcwidth"] name = "tenacity" version = "8.5.0" description = "Retry code until it succeeds" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "tenacity-8.5.0-py3-none-any.whl", hash = "sha256:b594c2a5945830c267ce6b79a166228323ed52718f30302c1359836112346687"}, @@ -6029,7 +6029,7 @@ test = ["pytest", "tornado (>=4.5)", "typeguard"] name = "threadpoolctl" version = "3.5.0" description = "threadpoolctl" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467"}, @@ -6062,7 +6062,7 @@ zarr = ["fsspec", "zarr"] name = "tiktoken" version = "0.7.0" description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "tiktoken-0.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:485f3cc6aba7c6b6ce388ba634fbba656d9ee27f766216f45146beb4ac18b25f"}, @@ -6125,7 +6125,7 @@ files = [ name = "tokenizers" version = "0.19.1" description = "" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "tokenizers-0.19.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:952078130b3d101e05ecfc7fc3640282d74ed26bcf691400f872563fca15ac97"}, @@ -6511,7 +6511,7 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0, name = "transformers" version = "4.44.2" description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" -optional = true +optional = false python-versions = ">=3.8.0" files = [ {file = "transformers-4.44.2-py3-none-any.whl", hash = "sha256:1c02c65e7bfa5e52a634aff3da52138b583fc6f263c1f28d547dc144ba3d412d"}, @@ -6610,11 +6610,6 @@ files = [ {file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"}, {file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"}, {file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"}, - {file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"}, - {file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"}, - {file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"}, - {file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"}, - {file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"}, ] [package.dependencies] @@ -6731,7 +6726,7 @@ files = [ name = "ujson" version = "5.10.0" description = "Ultra fast JSON encoder and decoder for Python" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "ujson-5.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2601aa9ecdbee1118a1c2065323bda35e2c5a2cf0797ef4522d485f9d3ef65bd"}, @@ -7222,10 +7217,7 @@ enabler = ["pytest-enabler (>=2.2)"] test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"] type = ["pytest-mypy"] -[extras] -examples = ["langchain-huggingface", "langchain-milvus", "langchain-text-splitters", "llama-index-embeddings-huggingface", "llama-index-llms-huggingface-api", "llama-index-vector-stores-milvus", "python-dotenv"] - [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "d6ede0493d8d2d0e250ba391d9ad32ced98541fbd4795b2b955d6f640736b3bc" +content-hash = "f8a98ea2e66c8fdf190e0e33b074fbe9399fb90508427aad01a78ed4f4749d17" diff --git a/pyproject.toml b/pyproject.toml index b70658e5..7ad287c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,20 @@ keywords= ["docling", "convert", "document", "pdf", "layout model", "segmentatio packages = [{include = "docling"}] [tool.poetry.dependencies] +############## +# constraints: +############## +torch = [ + {version = "^2.2.2", optional = true, markers = "sys_platform != 'darwin' or platform_machine != 'x86_64'"}, + {version = "~2.2.2", optional = true, markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'"} +] +torchvision = [ + {version = "^0", optional = true, markers = "sys_platform != 'darwin' or platform_machine != 'x86_64'"}, + {version = "~0.17.2", optional = true, markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'"} +] +###################### +# actual dependencies: +###################### python = "^3.10" pydantic = "^2.0.0" docling-core = "^1.6.2" @@ -37,29 +51,6 @@ certifi = ">=2024.7.4" rtree = "^1.3.0" scipy = "^1.14.1" pyarrow = "^16.1.0" - -######### -# extras: -######### -python-dotenv = { version = "^1.0.1", optional = true } -llama-index-embeddings-huggingface = { version = "^0.3.1", optional = true } -llama-index-llms-huggingface-api = { version = "^0.2.0", optional = true } -llama-index-vector-stores-milvus = { version = "^0.2.1", optional = true } -langchain-huggingface = { version = "^0.0.3", optional = true} -langchain-milvus = { version = "^0.1.4", optional = true } -langchain-text-splitters = { version = "^0.2.4", optional = true } - -############## -# constraints: -############## -torch = [ - {version = "^2.2.2", optional = true, markers = "sys_platform != 'darwin' or platform_machine != 'x86_64'"}, - {version = "~2.2.2", optional = true, markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'"} -] -torchvision = [ - {version = "^0", optional = true, markers = "sys_platform != 'darwin' or platform_machine != 'x86_64'"}, - {version = "~0.17.2", optional = true, markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'"} -] typer = "^0.12.5" [tool.poetry.group.dev.dependencies] @@ -82,20 +73,13 @@ nbqa = "^1.9.0" [tool.poetry.group.examples.dependencies] datasets = "^2.21.0" - -[tool.poetry.extras] -examples = [ - "python-dotenv", - # LlamaIndex examples: - "llama-index-embeddings-huggingface", - "llama-index-llms-huggingface-api", - "llama-index-vector-stores-milvus", - # LangChain examples: - "langchain-huggingface", - "langchain-milvus", - "langchain-text-splitters", -] - +python-dotenv = "^1.0.1" +llama-index-embeddings-huggingface = "^0.3.1" +llama-index-llms-huggingface-api = "^0.2.0" +llama-index-vector-stores-milvus = "^0.2.1" +langchain-huggingface = "^0.0.3" +langchain-milvus = "^0.1.4" +langchain-text-splitters = "^0.2.4" [tool.poetry.scripts] docling = "docling.cli.main:app"