diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..b6cd8a584 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,10 @@ +# Ignore files that are already ignored by git +.gitignore + +scripts/ +tests/ +examples/ +*.md +*.pyc +.dockerignore +Dockerfile diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..952eee2b5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,138 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# .vscode files +.vscode/* + +# Pycharm +.idea/ + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +myvenv/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# macOS .DS_Store files +.DS_Store \ No newline at end of file diff --git a/.well-known/ai-plugin.json b/.well-known/ai-plugin.json new file mode 100644 index 000000000..adf33a33f --- /dev/null +++ b/.well-known/ai-plugin.json @@ -0,0 +1,19 @@ +{ + "schema_version": "v1", + "name_for_model": "retrieval", + "name_for_human": "Retrieval Plugin", + "description_for_model": "Plugin for searching through the user's documents (such as files, emails, and more) to find answers to questions and retrieve relevant information. Use it whenever a user asks something that might be found in their personal information.", + "description_for_human": "Search through your documents", + "auth": { + "type": "user_http", + "authorization_type": "bearer" + }, + "api": { + "type": "openapi", + "url": "https://your-app-url.com/.well-known/openapi.yaml", + "has_user_authentication": false + }, + "logo_url": "https://your-app-url.com/.well-known/logo.png", + "contact_email": "hello@contact.com", + "legal_info_url": "hello@legal.com" +} \ No newline at end of file diff --git a/.well-known/logo.png b/.well-known/logo.png new file mode 100644 index 000000000..af562f798 Binary files /dev/null and b/.well-known/logo.png differ diff --git a/.well-known/openapi.yaml b/.well-known/openapi.yaml new file mode 100644 index 000000000..13ba380ac --- /dev/null +++ b/.well-known/openapi.yaml @@ -0,0 +1,196 @@ +openapi: 3.0.2 +info: + title: Retrieval Plugin API + description: A retrieval API for querying and filtering documents based on natural language queries and metadata + version: 1.0.0 + servers: + - url: https://your-app-url.com +paths: + /query: + post: + summary: Query + description: This endpoint accepts an array of search query objects, each containing a natural language query string ("query") and an optional metadata filter ("filter"). Filters can help refine search results based on criteria such as document source or time period, but are not necessary in most cases. You can send multiple queries to compare information from different sources or break down complex questions into sub-questions. If you receive a ResponseTooLargeError, try splitting up the queries into multiple calls to this endpoint. + operationId: query_query_post + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/QueryRequest" + required: true + responses: + "200": + description: Successful Response + content: + application/json: + schema: + $ref: "#/components/schemas/QueryResponse" + "422": + description: Validation Error + content: + application/json: + schema: + $ref: "#/components/schemas/HTTPValidationError" + security: + - HTTPBearer: [] +components: + schemas: + DocumentChunkMetadata: + title: DocumentChunkMetadata + type: object + properties: + source: + $ref: "#/components/schemas/Source" + source_id: + title: Source Id + type: string + url: + title: Url + type: string + created_at: + title: Created At + type: string + author: + title: Author + type: string + document_id: + title: Document Id + type: string + DocumentChunkWithScore: + title: DocumentChunkWithScore + required: + - text + - metadata + - score + type: object + properties: + id: + title: Id + type: string + text: + title: Text + type: string + metadata: + $ref: "#/components/schemas/DocumentChunkMetadata" + embedding: + title: Embedding + type: array + items: + type: number + score: + title: Score + type: number + DocumentMetadataFilter: + title: DocumentMetadataFilter + type: object + properties: + document_id: + title: Document Id + type: string + source: + $ref: "#/components/schemas/Source" + source_id: + title: Source Id + type: string + author: + title: Author + type: string + start_date: + title: Start Date + type: string + end_date: + title: End Date + type: string + HTTPValidationError: + title: HTTPValidationError + type: object + properties: + detail: + title: Detail + type: array + items: + $ref: "#/components/schemas/ValidationError" + Query: + title: Query + required: + - query + type: object + properties: + query: + title: Query + type: string + filter: + $ref: "#/components/schemas/DocumentMetadataFilter" + top_k: + title: Top K + type: integer + default: 3 + QueryRequest: + title: QueryRequest + required: + - queries + type: object + properties: + queries: + title: Queries + type: array + items: + $ref: "#/components/schemas/Query" + QueryResponse: + title: QueryResponse + required: + - results + type: object + properties: + results: + title: Results + type: array + items: + $ref: "#/components/schemas/QueryResult" + QueryResult: + title: QueryResult + required: + - query + - results + type: object + properties: + query: + title: Query + type: string + results: + title: Results + type: array + items: + $ref: "#/components/schemas/DocumentChunkWithScore" + Source: + title: Source + enum: + - email + - file + - chat + type: string + description: An enumeration. + ValidationError: + title: ValidationError + required: + - loc + - msg + - type + type: object + properties: + loc: + title: Location + type: array + items: + anyOf: + - type: string + - type: integer + msg: + title: Message + type: string + type: + title: Error Type + type: string + securitySchemes: + HTTPBearer: + type: http + scheme: bearer diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..dbd5ec9de --- /dev/null +++ b/Dockerfile @@ -0,0 +1,24 @@ + +FROM python:3.10 as requirements-stage + +WORKDIR /tmp + +RUN pip install poetry + +COPY ./pyproject.toml ./poetry.lock* /tmp/ + + +RUN poetry export -f requirements.txt --output requirements.txt --without-hashes + +FROM python:3.10 + +WORKDIR /code + +COPY --from=requirements-stage /tmp/requirements.txt /code/requirements.txt + +RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt + +COPY . /code/ + +# Heroku uses PORT, Azure App Services uses WEBSITES_PORT, Fly.io uses 8080 by default +CMD ["sh", "-c", "uvicorn server.main:app --host 0.0.0.0 --port ${PORT:-${WEBSITES_PORT:-8080}}"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..b3841f631 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 OpenAI + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..3aaafbf26 --- /dev/null +++ b/Makefile @@ -0,0 +1,14 @@ +# Heroku +# make heroku-login +# make heroku-push + +HEROKU_APP = + +heroku-push: + docker buildx build --platform linux/amd64 -t ${HEROKU_APP} . + docker tag ${HEROKU_APP} registry.heroku.com/${HEROKU_APP}/web + docker push registry.heroku.com/${HEROKU_APP}/web + heroku container:release web -a ${HEROKU_APP} + +heroku-login: + heroku container:login diff --git a/README.md b/README.md index ec4ea085c..c883491bd 100644 --- a/README.md +++ b/README.md @@ -1 +1,823 @@ -# chatgpt-retrieval-plugin \ No newline at end of file +# ChatGPT Retrieval Plugin + +## Introduction + +The ChatGPT Retrieval Plugin repository provides a flexible solution for semantic search and retrieval of personal or organizational documents using natural language queries. The repository is organized into several directories: + +| Directory | Description | +| ----------------------------- | ---------------------------------------------------------------------------------------------------------------- | +| [`datastore`](/datastore) | Contains the core logic for storing and querying document embeddings using various vector database providers. | +| [`examples`](/examples) | Includes example configurations, authentication methods, and provider-specific examples. | +| [`models`](/models) | Contains the data models used by the plugin, such as document and metadata models. | +| [`scripts`](/scripts) | Provides scripts for processing and uploading documents from different data sources. | +| [`server`](/server) | Houses the main FastAPI server implementation. | +| [`services`](/services) | Contains utility services for tasks like chunking, metadata extraction, and PII detection. | +| [`tests`](/tests) | Includes integration tests for various vector database providers. | +| [`.well-known`](/.well-known) | Stores the plugin manifest file and OpenAPI schema, which define the plugin configuration and API specification. | + +This README provides detailed information on how to set up, develop, and deploy the ChatGPT Retrieval Plugin. + +## Table of Contents + +- [About](#about) + - [Plugins](#plugins) + - [Retrieval Plugin](#retrieval-plugin) + - [Memory Feature](#memory-feature) + - [Security](#security) + - [API Endpoints](#api-endpoints) +- [Development](#development) + - [Setup](#setup) + - [General Environment Variables](#general-environment-variables) + - [Choosing a Vector Database](#choosing-a-vector-database) + - [Pinecone](#pinecone) + - [Weaviate](#weaviate) + - [Zilliz](#zilliz) + - [Milvus](#milvus) + - [Qdrant](#qdrant) + - [Redis](#redis) + - [Running the API Locally](#running-the-api-locally) + - [Personalization](#personalization) + - [Authentication Methods](#authentication-methods) +- [Deployment](#deployment) + - [Deploying to Fly.io](#deploying-to-flyio) + - [Deploying to Heroku](#deploying-to-heroku) + - [Other Deployment Options](#other-deployment-options) +- [Webhooks](#webhooks) +- [Scripts](#scripts) +- [Limitations](#limitations) +- [Contributors](#contributors) +- [Future Directions](#future-directions) + +## About + +### Plugins + +Plugins are chat extensions designed specifically for language models like ChatGPT, enabling them to access up-to-date information, run computations, or interact with third-party services in response to a user's request. They unlock a wide range of potential use cases and enhance the capabilities of language models. + +Developers can create a plugin by exposing an API through their website and providing a standardized manifest file that describes the API. ChatGPT consumes these files and allows the AI models to make calls to the API defined by the developer. + +A plugin consists of: + +- An API +- An API schema (OpenAPI JSON or YAML format) +- A manifest (JSON file) that defines relevant metadata for the plugin + +The Retrieval Plugin already contains all of these components. Read the Chat Plugins blogpost [here](https://openai.com/blog/chat-plugins), and find the docs [here](https://platform.openai.com/docs/plugins/introduction). + +### Retrieval Plugin + +This is a plugin for ChatGPT that enables semantic search and retrieval of personal or organizational documents. It allows users to obtain the most relevant document snippets from their data sources, such as files, notes, or emails, by asking questions or expressing needs in natural language. Enterprises can make their internal documents available to their employees through ChatGPT using this plugin. + +The plugin uses OpenAI's `text-embedding-ada-002` embeddings model to generate embeddings of document chunks, and then stores and queries them using a vector database on the backend. As an open-source and self-hosted solution, developers can deploy their own Retrieval Plugin and register it with ChatGPT. The Retrieval Plugin supports several vector database providers, allowing developers to choose their preferred one from a list. + +A FastAPI server exposes the plugin's endpoints for upserting, querying, and deleting documents. Users can refine their search results by using metadata filters by source, date, author, or other criteria. The plugin can be hosted on any cloud platform that supports Docker containers, such as Fly.io, Heroku or Azure Container Apps. To keep the vector database updated with the latest documents, the plugin can process and store documents from various data sources continuously, using incoming webhooks to the upsert and delete endpoints. Tools like [Zapier](https://zapier.com) or [Make](https://www.make.com) can help configure the webhooks based on events or schedules. + +### Memory Feature + +A notable feature of the Retrieval Plugin is its capacity to provide ChatGPT with memory. By utilizing the plugin's upsert endpoint, ChatGPT can save snippets from the conversation to the vector database for later reference (only when prompted to do so by the user). This functionality contributes to a more context-aware chat experience by allowing ChatGPT to remember and retrieve information from previous conversations. Learn how to configure the Retrieval Plugin with memory [here](/examples/memory). + +### Security + +The Retrieval Plugin allows ChatGPT to search a vector database of content, and then add the best results into the ChatGPT session. This means it doesn’t have any external effects, and the main risk consideration is data authorization and privacy. Developers should only add content into their Retrieval Plugin that they have authorization for and that they are fine with appearing in users’ ChatGPT sessions. You can choose from a number of different authentication methods to secure the plugin (more information [here](#authentication-methods)). + +### API Endpoints + +The Retrieval Plugin is built using FastAPI, a web framework for building APIs with Python. FastAPI allows for easy development, validation, and documentation of API endpoints. Find the FastAPI documentation [here](https://fastapi.tiangolo.com/). + +One of the benefits of using FastAPI is the automatic generation of interactive API documentation with Swagger UI. When the API is running locally, Swagger UI at `/docs` can be used to interact with the API endpoints, test their functionality, and view the expected request and response models. + +The plugin exposes the following endpoints for upserting, querying, and deleting documents from the vector database. All requests and responses are in JSON format, and require a valid bearer token as an authorization header. + +- `/upsert`: This endpoint allows uploading one or more documents and storing their text and metadata in the vector database. The documents are split into chunks of around 200 tokens, each with a unique ID. The endpoint expects a list of documents in the request body, each with a `text` field, and optional `id` and `metadata` fields. The `metadata` field can contain the following optional subfields: `source`, `source_id`, `url`, `created_at`, and `author`. The endpoint returns a list of the IDs of the inserted documents (an ID is generated if not initially provided). + +- `/upsert-file`: This endpoint allows uploading a single file (PDF, TXT, DOCX, PPTX, or MD) and store its text and metadata in the vector database. The file is converted to plain text and split into chunks of around 200 tokens, each with a unique ID. The endpoint returns a list containing the generated id of the inserted file. + +- `/query`: This endpoint allows querying the vector database using one or more natural language queries and optional metadata filters. The endpoint expects a list of queries in the request body, each with a `query` and optional `filter` and `top_k` fields. The `filter` field should contain a subset of the following subfields: `source`, `source_id`, `document_id`, `url`, `created_at`, and `author`. The `top_k` field specifies how many results to return for a given query, and the default value is 3. The endpoint returns a list of objects that each contain a list of the most relevant document chunks for the given query, along with their text, metadata and similarity scores. + +- `/delete`: This endpoint allows deleting one or more documents from the vector database using their IDs, a metadata filter, or a delete_all flag. The endpoint expects at least one of the following parameters in the request body: `ids`, `filter`, or `delete_all`. The `ids` parameter should be a list of document IDs to delete; all document chunks for the document with these IDS will be deleted. The `filter` parameter should contain a subset of the following subfields: `source`, `source_id`, `document_id`, `url`, `created_at`, and `author`. The `delete_all` parameter should be a boolean indicating whether to delete all documents from the vector database. The endpoint returns a boolean indicating whether the deletion was successful. + +The detailed specifications and examples of the request and response models can be found by running the app locally and navigating to http://0.0.0.0:8000/openapi.json, or in the OpenAPI schema [here](/.well-known/openapi.yaml). Note that the OpenAPI schema only contains the `/query` endpoint, because that is the only function that ChatGPT needs to access. This way, ChatGPT can use the plugin only to retrieve relevant documents based on natural language queries or needs. However, if developers want to also give ChatGPT the ability to remember things for later, they can use the `/upsert` endpoint to save snippets from the conversation to the vector database. An example of a manifest and OpenAPI schema that give ChatGPT access to the `/upsert` endpoint can be found [here](/examples/memory). + +To include custom metadata fields, edit the `DocumentMetadata` and `DocumentMetadataFilter` data models [here](/models/models.py), and update the OpenAPI schema [here](/.well-known/openapi.yaml). You can update this easily by running the app locally, copying the json found at http://0.0.0.0:8000/sub/openapi.json, and converting it to YAML format with [Swagger Editor](https://editor.swagger.io/). Alternatively, you can replace the `openapi.yaml` file with an `openapi.json` file. + +## Development + +### Setup + +This app uses Python 3.10, and [poetry](https://python-poetry.org/) for dependency management. + +Install Python 3.10 on your machine if it isn't already installed. It can be downloaded from the official [Python website](https://www.python.org/downloads/) or with a package manager like `brew` or `apt`, depending on your system. + +Clone the repository from GitHub: + +``` +git clone https://github.com/openai/chatgpt-retrieval-plugin.git +``` + +Navigate to the cloned repository directory: + +``` +cd /path/to/chatgpt-retrieval-plugin +``` + +Install poetry: + +``` +pip install poetry +``` + +Create a new virtual environment that uses Python 3.10: + +``` +poetry env use python3.10 +poetry shell +``` + +Install app dependencies using poetry: + +``` +poetry install +``` + +**Note:** If adding dependencies in the `pyproject.toml`, make sure to run `poetry lock` and `poetry install`. + +#### General Environment Variables + +The API requires the following environment variables to work: + +| Name | Required | Description | +| ---------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `DATASTORE` | Yes | This specifies the vector database provider you want to use to store and query embeddings. You can choose from `pinecone`, `weaviate`, `zilliz`, `milvus`, `qdrant`, or `redis`. | +| `BEARER_TOKEN` | Yes | This is a secret token that you need to authenticate your requests to the API. You can generate one using any tool or method you prefer, such as [jwt.io](https://jwt.io/). | +| `OPENAI_API_KEY` | Yes | This is your OpenAI API key that you need to generate embeddings using the `text-embedding-ada-002` model. You can get an API key by creating an account on [OpenAI](https://openai.com/). | + +### Choosing a Vector Database + +The plugin supports several vector database providers, each with different features, performance, and pricing. Depending on which one you choose, you will need to use a different docker file and set different environment variables. The following sections provide detailed information and instructions on using each vector database provider. + +#### Pinecone + +[Pinecone](https://www.pinecone.io) is a managed vector database built for speed, scale, and shipping to production sooner. To use Pinecone as your vector database provider, first get an API key by [signing up for an account](https://app.pinecone.io/). You can access your API key from the "API Keys" section in the sidebar of your dashboard. + +The app will create a Pinecone index for you automatically when you run it for the first time. Just pick a name for your index and set it as an environment variable. + +Environment Variables: + +| Name | Required | Description | +| ---------------------- | -------- | -------------------------------------------------------------------------------------------------------------------------------- | +| `DATASTORE` | Yes | Datastore name, set this to `pinecone` | +| `BEARER_TOKEN` | Yes | Your secret token for authenticating requests to the API | +| `OPENAI_API_KEY` | Yes | Your OpenAI API key for generating embeddings with the `text-embedding-ada-002` model | +| `PINECONE_API_KEY` | Yes | Your Pinecone API key, found in the [Pinecone console](https://app.pinecone.io/) | +| `PINECONE_ENVIRONMENT` | Yes | Your Pinecone environment, found in the [Pinecone console](https://app.pinecone.io/), e.g. `us-west1-gcp`, `us-east-1-aws`, etc. | +| `PINECONE_INDEX` | Yes | Your chosen Pinecone index name. **Note:** Index name must consist of lower case alphanumeric characters or '-' | + +If you want to create your own index with custom configurations, you can do so using the Pinecone SDK, API, or web interface ([see docs](https://docs.pinecone.io/docs/manage-indexes)). Make sure to use a dimensionality of 1536 for the embeddings and avoid indexing on the text field in the metadata, as this will reduce the performance significantly. + +```python +# Creating index with Pinecone SDK - use only if you wish to create the index manually. + +import os, pinecone + +pinecone.init(api_key=os.environ['PINECONE_API_KEY'], + environment=os.environ['PINECONE_ENVIRONMENT']) + +pinecone.create_index(name=os.environ['PINECONE_INDEX'], + dimension=1536, + metric='cosine', + metadata_config={ + "indexed": ['source', 'source_id', 'url', 'created_at', 'author', 'document_id']}) +``` + +#### Weaviate + +##### Set up a Weaviate Instance + +Weaviate is an open-source vector search engine designed to scale seamlessly into billions of data objects. This implementation supports hybrid search out-of-the-box (meaning it will perform better for keyword searches). + +You can run Weaviate in 4 ways: + +- **SaaS** – with [Weaviate Cloud Services (WCS)](https://weaviate.io/pricing). + + WCS is a fully managed service that takes care of hosting, scaling, and updating your Weaviate instance. You can try it out for free with a sandbox that lasts for 30 days. + + To set up a SaaS Weaviate instance with WCS: + + 1. Navigate to [Weaviate Cloud Console](https://console.weaviate.io/). + 2. Register or sign in to your WCS account. + 3. Create a new cluster with the following settings: + - `Name` – a unique name for your cluster. The name will become part of the URL used to access this instance. + - `Subscription Tier` – Sandbox for a free trial, or contact [hello@weaviate.io](mailto:hello@weaviate.io) for other options. + - `Weaviate Version` - The latest version by default. + - `OIDC Authentication` – Enabled by default. This requires a username and password to access your instance. + 4. Wait for a few minutes until your cluster is ready. You will see a green tick ✔️ when it's done. Copy your cluster URL. + +- **Hybrid SaaS** + + > If you need to keep your data on-premise for security or compliance reasons, Weaviate also offers a Hybrid SaaS option: Weaviate runs within your cloud instances, but the cluster is managed remotely by Weaviate. This gives you the benefits of a managed service without sending data to an external party. + + The Weaviate Hybrid SaaS is a custom solution. If you are interested in this option, please reach out to [hello@weaviate.io](mailto:hello@weaviate.io). + +- **Self-hosted** – with a Docker container + + To set up a Weaviate instance with Docker: + + 1. Download a `docker-compose.yml` file with this `curl` command: + + ``` + curl -o docker-compose.yml "https://configuration.weaviate.io/v2/docker-compose/docker-compose.yml?modules=standalone&runtime=docker-compose&weaviate_version=v1.18.0" + ``` + + Alternatively, you can use Weaviate's docker compose [configuration tool](https://weaviate.io/developers/weaviate/installation/docker-compose) to generate your own `docker-compose.yml` file. + + 2. Run `docker-compose up -d` to spin up a Weaviate instance. + + > To shut it down, run `docker-compose down`. + +- **Self-hosted** – with a Kubernetes cluster + + To configure a self-hosted instance with Kubernetes, follow Weaviate's [documentation](https://weaviate.io/developers/weaviate/installation/kubernetes). + +##### Configure Weaviate Environment Variables + +You need to set some environment variables to connect to your Weaviate instance. + +**Retrieval App Environment Variables** + +| Name | Required | Description | +| ---------------- | -------- | -------------------------------------- | +| `DATASTORE` | Yes | Datastore name. Set this to `weaviate` | +| `BEARER_TOKEN` | Yes | Your secret token | +| `OPENAI_API_KEY` | Yes | Your OpenAI API key | + +**Weaviate Datastore Environment Variables** + +| Name | Required | Description | Default | +| ---------------- | -------- | ------------------------------------------------------------------ | ------------------ | +| `WEAVIATE_HOST` | Optional | Your Weaviate instance host address (see notes below) | `http://127.0.0.1` | +| `WEAVIATE_PORT` | Optional | Your Weaviate port number | 8080 | +| `WEAVIATE_INDEX` | Optional | Your chosen Weaviate class/collection name to store your documents | OpenAIDocument | + +> For **WCS instances**, set `WEAVIATE_HOST` to `https://(wcs-instance-name).weaviate.network`. For example: `https://my-project.weaviate.network/`. + +> For **self-hosted instances**, if your instance is not at 127.0.0.1:8080, set `WEAVIATE_HOST` and `WEAVIATE_PORT` accordingly. For example: `WEAVIATE_HOST=http://localhost/` and `WEAVIATE_PORT=4040`. + +**Weaviate Auth Environment Variables** + +If you enabled OIDC authentication for your Weaviate instance (recommended for WCS instances), set the following environment variables. If you enabled anonymous access, skip this section. + +| Name | Required | Description | +| ------------------- | -------- | ------------------------------ | +| `WEAVIATE_USERNAME` | Yes | Your OIDC or WCS username | +| `WEAVIATE_PASSWORD` | Yes | Your OIDC or WCS password | +| `WEAVIATE_SCOPES` | Optional | Space-separated list of scopes | + +Learn more about [authentication in Weaviate](https://weaviate.io/developers/weaviate/configuration/authentication#overview) and the [Python client authentication](https://weaviate-python-client.readthedocs.io/en/stable/weaviate.auth.html). + +**Weaviate Batch Import Environment Variables** + +Weaviate uses a batching mechanism to perform operations in bulk. This makes importing and updating your data faster and more efficient. You can adjust the batch settings with these optional environment variables: + +| Name | Required | Description | Default | +| -------------------------------- | -------- | ------------------------------------------------------------ | ------- | +| `WEAVIATE_BATCH_SIZE` | Optional | Number of insert/updates per batch operation | 20 | +| `WEAVIATE_BATCH_DYNAMIC` | Optional | Lets the batch process decide the batch size | False | +| `WEAVIATE_BATCH_TIMEOUT_RETRIES` | Optional | Number of retry-on-timeout attempts | 3 | +| `WEAVIATE_BATCH_NUM_WORKERS` | Optional | The max number of concurrent threads to run batch operations | 1 | + +> **Note:** The optimal `WEAVIATE_BATCH_SIZE` depends on the available resources (RAM, CPU). A higher value means faster bulk operations, but also higher demand for RAM and CPU. If you experience failures during the import process, reduce the batch size. + +> Setting `WEAVIATE_BATCH_SIZE` to `None` means no limit to the batch size. All insert or update operations would be sent to Weaviate in a single operation. This might be risky, as you lose control over the batch size. + +Learn more about [batch configuration in Weaviate](https://weaviate.io/developers/weaviate/client-libraries/python#batch-configuration). + +#### Zilliz + +Zilliz is a managed cloud-native vector database designed for the billion scale. Zilliz offers many key features, such as: + +- Multiple indexing algorithms +- Multiple distance metrics +- Scalar filtering +- Time travel searches +- Rollback and with snapshots +- Full RBAC +- 99.9% uptime +- Separated storage and compute +- Multi-language SDK's + +Find more information [here](www.zilliz.com). + +**Self Hosted vs SaaS** + +Zilliz is a SaaS database, but offers an open source solution, Milvus. Both options offer fast searches at the billion scale, but Zilliz handles data management for you. It automatically scales compute and storage resources and creates optimal indexes for your data. See the comparison [here](https://zilliz.com/doc/about_zilliz_cloud). + +##### Deploying the Database + +Zilliz Cloud is deployable in a few simple steps. First, create an account [here](https://cloud.zilliz.com/signup). Once you have an account set up, follow the guide [here](https://zilliz.com/doc/quick_start) to setup a database and get the parameters needed for this application. + +Environment Variables: + +| Name | Required | Description | +| ------------------- | -------- | ------------------------------------------------- | +| `DATASTORE` | Yes | Datastore name, set to `zilliz` | +| `BEARER_TOKEN` | Yes | Your secret token | +| `OPENAI_API_KEY` | Yes | Your OpenAI API key | +| `ZILLIZ_COLLECTION` | Optional | Zilliz collection name. Defaults to a random UUID | +| `ZILLIZ_URI` | Yes | URI for the Zilliz instance | +| `ZILLIZ_USER` | Yes | Zilliz username | +| `ZILLIZ_PASSWORD` | Yes | Zilliz password | + +#### Running Zilliz Integration Tests + +A suite of integration tests is available to verify the Zilliz integration. To run the tests, create a Zilliz database and update the environment variables. + +Then, launch the test suite with this command: + +```bash +pytest ./tests/datastore/providers/zilliz/test_zilliz_datastore.py +``` + +#### Milvus + +Milvus is the open-source, cloud-native vector database that scales to billions of vectors. It's the open-source version of Zilliz. It supports: + +- Various indexing algorithms and distance metrics +- Scalar filtering and time travel searches +- Rollback and snapshots +- Multi-language SDKs +- Storage and compute separation +- Cloud scalability +- A developer-first community with multi-language support + +Visit the [Github](https://github.com/milvus-io/milvus) to learn more. + +##### Deploying the Database + +You can deploy and manage Milvus using Docker Compose, Helm, K8's Operator, or Ansible. Follow the instructions [here](https://milvus.io/docs) to get started. + +Environment Variables: + +| Name | Required | Description | +| ------------------- | -------- | ------------------------------------------------------ | +| `DATASTORE` | Yes | Datastore name, set to `milvus` | +| `BEARER_TOKEN` | Yes | Your bearer token | +| `OPENAI_API_KEY` | Yes | Your OpenAI API key | +| `MILVUS_COLLECTION` | Optional | Milvus collection name, defaults to a random UUID | +| `MILVUS_HOST` | Optional | Milvus host IP, defaults to `localhost` | +| `MILVUS_PORT` | Optional | Milvus port, defaults to `19530` | +| `MILVUS_USER` | Optional | Milvus username if RBAC is enabled, defaults to `None` | +| `MILVUS_PASSWORD` | Optional | Milvus password if required, defaults to `None` | + +#### Running Milvus Integration Tests + +A suite of integration tests is available to verify the Milvus integration. To run the tests, run the milvus docker compose found in the examples folder. + +Then, launch the test suite with this command: + +```bash +pytest ./tests/datastore/providers/milvus/test_milvus_datastore.py +``` + +#### Qdrant + +Qdrant is a vector database that can store documents and vector embeddings. It can run as a self-hosted version or a managed [Qdrant Cloud](https://cloud.qdrant.io/) +solution. The configuration is almost identical for both options, except for the API key that [Qdrant Cloud](https://cloud.qdrant.io/) provides. + +Environment Variables: + +| Name | Required | Description | Default | +| ------------------- | -------- | ----------------------------------------------------------- | ------------------ | +| `DATASTORE` | Yes | Datastore name, set to `qdrant` | | +| `BEARER_TOKEN` | Yes | Secret token | | +| `OPENAI_API_KEY` | Yes | OpenAI API key | | +| `QDRANT_URL` | Yes | Qdrant instance URL | `http://localhost` | +| `QDRANT_PORT` | Optional | TCP port for Qdrant HTTP communication | `6333` | +| `QDRANT_GRPC_PORT` | Optional | TCP port for Qdrant GRPC communication | `6334` | +| `QDRANT_API_KEY` | Optional | Qdrant API key for [Qdrant Cloud](https://cloud.qdrant.io/) | | +| `QDRANT_COLLECTION` | Optional | Qdrant collection name | `document_chunks` | + +##### Qdrant Cloud + +For a hosted [Qdrant Cloud](https://cloud.qdrant.io/) version, provide the Qdrant instance +URL and the API key from the [Qdrant Cloud UI](https://cloud.qdrant.io/). + +**Example:** + +```bash +QDRANT_URL="https://YOUR-CLUSTER-URL.aws.cloud.qdrant.io" +QDRANT_API_KEY="" +``` + +The other parameters are optional and can be changed if needed. + +##### Self-hosted Qdrant Instance + +For a self-hosted version, use Docker containers or the official Helm chart for deployment. The only +required parameter is the `QDRANT_URL` that points to the Qdrant server URL. + +**Example:** + +```bash +QDRANT_URL="http://YOUR_HOST.example.com:6333" +``` + +The other parameters are optional and can be changed if needed. + +##### Running Qdrant Integration Tests + +A suite of integration tests verifies the Qdrant integration. To run it, start a local Qdrant instance in a Docker container. + +```bash +docker run -p "6333:6333" -p "6334:6334" qdrant/qdrant:v1.0.3 +``` + +Then, launch the test suite with this command: + +```bash +pytest ./tests/datastore/providers/test_qdrant_datastore.py +``` + +#### Redis + +Use Redis as a low-latency vector engine by creating a Redis database with the [Redis Stack docker container](/examples/docker/redis/docker-compose.yml). For a hosted/managed solution, try [Redis Cloud](https://app.redislabs.com/#/). + +- The database needs the RediSearch module (v 2.6 ++), which is included in the self-hosted docker compose above. +- Run the App with the Redis docker image: `docker compose up -d` in [this dir](/examples/docker/redis/). +- The app automatically creates a Redis vector search index on the first run. Optionally, create a custom index with a specific name and set it as an environment variable (see below). +- To enable more hybrid searching capabilities, adjust the document schema [here](/datastore/providers/redis_datastore.py). + +Environment Variables: + +| Name | Required | Description | Default | +| ----------------------- | -------- | ---------------------------------------------------------------------------------------------------------------------- | ----------- | +| `DATASTORE` | Yes | Datastore name, set to `redis` | | +| `BEARER_TOKEN` | Yes | Secret token | | +| `OPENAI_API_KEY` | Yes | OpenAI API key | | +| `REDIS_HOST` | Optional | Redis host url | `localhost` | +| `REDIS_PORT` | Optional | Redis port | `6379` | +| `REDIS_PASSWORD` | Optional | Redis password | none | +| `REDIS_INDEX_NAME` | Optional | Redis vector index name | `index` | +| `REDIS_DOC_PREFIX` | Optional | Redis key prefix for the index | `doc` | +| `REDIS_DISTANCE_METRIC` | Optional | Vector similarity distance metric | `COSINE` | +| `REDIS_INDEX_TYPE` | Optional | [Vector index algorithm type](https://redis.io/docs/stack/search/reference/vectors/#creation-attributes-per-algorithm) | `FLAT` | + +### Running the API locally + +To run the API locally, you first need to set the requisite environment variables with the `export` command: + +``` +export DATASTORE= +export BEARER_TOKEN= +export OPENAI_API_KEY= + +``` + +Start the API with: + +``` +poetry run start +``` + +Append `docs` to the URL shown in the terminal and open it in a browser to access the API documentation and try out the endpoints (i.e. http://0.0.0.0:8000/docs). Make sure to enter your bearer token and test the API endpoints. + +**Note:** If you add new dependencies to the pyproject.toml file, you need to run `poetry lock` and `poetry install` to update the lock file and install the new dependencies. + +### Personalization + +You can personalize the Retrieval Plugin for your own use case by doing the following: + +- **Replace the logo**: Replace the image in [logo.png](/.well-known/logo.png) with your own logo. + +- **Edit the data models**: Edit the `DocumentMetadata` and `DocumentMetadataFilter` data models in [models.py](/models/models.py) to add custom metadata fields. Update the OpenAPI schema in [openapi.yaml](/.well-known/openapi.yaml) accordingly. To update the OpenAPI schema more easily, you can run the app locally, then navigate to `http://0.0.0.0:8000/sub/openapi.json` and copy the contents of the webpage. Then go to [Swagger Editor](https://editor.swagger.io/) and paste in the JSON to convert it to a YAML format. You could also replace the openapi.yaml file with an openapi.json file in the [.well-known](/.well-known) folder. + +- **Change the plugin name, description, and usage instructions**: Update the plugin name, user-facing description, and usage instructions for the model. You can either edit the descriptions in the [main.py](/server/main.py) file or update the [openapi.yaml](/.well-known/openapi.yaml) file. Follow the same instructions as in the previous step to update the OpenAPI schema. + +- **Enable ChatGPT to save information from conversations**: See the instructions in the [memory example folder](/examples/memory). + +### Authentication Methods + +You can choose from four options for authenticating requests to your plugin: + +1. **No Authentication**: Anyone can add your plugin and use its API without any credentials. This option is suitable if you are only exposing documents that are not sensitive or already public. It provides no security for your data. If using this method, copy the contents of this [main.py](/examples/authentication-methods/no-auth/main.py) into the [actual main.py file](/server/main.py). Example manifest [here](/examples/authentication-methods/no-auth/ai-plugin.json). + +2. **HTTP Bearer**: You can use a secret token as a header to authorize requests to your plugin. There are two variants of this option: + + - **User Level** (default for this implementation): Each user who adds your plugin to ChatGPT must provide the bearer token when adding the plugin. You can generate and distribute these tokens using any tool or method you prefer, such as [jwt.io](https://jwt.io/). This method provides better security as each user has to enter the shared access token. If you require a unique access token for each user, you will need to implement this yourself in the [main.py](/server/main.py) file. Example manifest [here](/examples/authentication-methods/user-http/ai-plugin.json). + + - **Service Level**: Anyone can add your plugin and use its API without credentials, but you must add a bearer token when registering the plugin. When you install your plugin, you need to add your bearer token, and will then receive a token from ChatGPT that you must include in your hosted manifest file. Your token will be used by ChatGPT to authorize requests to your plugin on behalf of all users who add it. This method is more convenient for users, but it may be less secure as all users share the same token and do not need to add a token to install the plugin. Example manifest [here](/examples/authentication-methods/service-http/ai-plugin.json). + +3. **OAuth**: Users must go through an OAuth flow to add your plugin. You can use an OAuth provider to authenticate users who add your plugin and grant them access to your API. This method offers the highest level of security and control, as users authenticate through a trusted third-party provider. However, you will need to implement the OAuth flow yourself in the [main.py](/server/main.py) file and provide the necessary parameters in your manifest file. Example manifest [here](/examples/authentication-methods/oauth/ai-plugin.json). + +Consider the benefits and drawbacks of each authentication method before choosing the one that best suits your use case and security requirements. If you choose to use a method different to the default (User Level HTTP), make sure to update the manifest file [here](/.well-known/ai-plugin.json). + +## Deployment + +You can deploy your app to different cloud providers, depending on your preferences and requirements. However, regardless of the provider you choose, you will need to update two files in your app: [openapi.yaml](/.well-known/openapi.yaml) and [ai-plugin.json](/.well-known/ai-plugin.json). As outlined above, these files define the API specification and the AI plugin configuration for your app, respectively. You need to change the url field in both files to match the address of your deployed app. + +Before deploying your app, you might want to remove unused dependencies from your [pyproject.toml](/pyproject.toml) file to reduce the size of your app and improve its performance. Depending on the vector database provider you choose, you can remove the packages that are not needed for your specific provider. + +Here are the packages you can remove for each vector database provider: + +- **Pinecone:** Remove `weaviate-client`, `pymilvus`, `qdrant-client`, and `redis`. +- **Weaviate:** Remove `pinecone-client`, `pymilvus`, `qdrant-client`, and `redis`. +- **Zilliz:** Remove `pinecone-client`, `weaviate-client`, `qdrant-client`, and `redis`. +- **Milvus:** Remove `pinecone-client`, `weaviate-client`, `qdrant-client`, and `redis`. +- **Qdrant:** Remove `pinecone-client`, `weaviate-client`, `pymilvus`, and `redis`. +- **Redis:** Remove `pinecone-client`, `weaviate-client`, `pymilvus`, and `qdrant-client`. + +After removing the unnecessary packages from the `pyproject.toml` file, you don't need to run `poetry lock` and `poetry install` manually. The provided Dockerfile takes care of installing the required dependencies using the `requirements.txt` file generated by the `poetry export` command. + +Once you have deployed your app, consider uploading an initial batch of documents using one of [these scripts](/scripts) or by calling the `/upsert` endpoint, for example: + +```bash +curl -X POST https://your-app-url.com/upsert \ + -H "Authorization: Bearer " \ + -H "Content-type: application/json" \ + -d '{"documents": [{"id": "doc1", "text": "Hello world", "metadata": {"source_id": "12345", "source": "file"}}, {"text": "How are you?", "metadata": {"source_id": "23456"}}]}' +``` + +### Deploying to Fly.io + +To deploy the Docker container from this repository to Fly.io, follow +these steps: + +Install Docker on your local machine if it is not already installed. + +Install the [Fly.io CLI](https://fly.io/docs/getting-started/installing-flyctl/) on your local machine. + +Clone the repository from GitHub: + +``` +git clone https://github.com/openai/chatgpt-retrieval-plugin.git +``` + +Navigate to the cloned repository directory: + +``` +cd path/to/chatgpt-retrieval-plugin +``` + +Log in to the Fly.io CLI: + +``` +flyctl auth login +``` + +Create and launch your Fly.io app: + +``` +flyctl launch +``` + +Follow the instructions in your terminal: + +- Choose your app name +- Choose your app region +- Don't add any databases +- Don't deploy yet (if you do, the first deploy might fail as the environment variables are not yet set) + +Set the required environment variables: + +``` +flyctl secrets set DATASTORE=your_datastore \ +OPENAI_API_KEY=your_openai_api_key \ +BEARER_TOKEN=your_bearer_token \ + +``` + +Alternatively, you could set environment variables in the [Fly.io Console](https://fly.io/dashboard). + +At this point, you can change the plugin url in your plugin manifest file [here](/.well-known/ai-plugin.json), and in your OpenAPI schema [here](/.well-known/openapi.yaml) to the url for your Fly.io app, which will be https://.fly.dev. + +Deploy your app with: + +``` +flyctl deploy +``` + +After completing these steps, your Docker container should be deployed to Fly.io and running with the necessary environment variables set. You can view your app by running: + +``` +flyctl open +``` + +which will open your app url. You should be able to find the OpenAPI schema at `/.well-known/openapi.yaml` and the manifest at `/.well-known/ai-plugin.json`. + +To view your app logs: + +``` +flyctl logs +``` + +Now, make sure you have changed the plugin url in your plugin manifest file [here](/.well-known/ai-plugin.json), and in your OpenAPI schema [here](/.well-known/openapi.yaml), and redeploy with `flyctl deploy`. This url will be `https://.fly.dev`. + +**Debugging tips:** +Fly.io uses port 8080 by default. + +If your app fails to deploy, check if the environment variables are set correctly, and then check if your port is configured correctly. You could also try using the [`-e` flag](https://fly.io/docs/flyctl/launch/) with the `flyctl launch` command to set the environment variables at launch. + +### Deploying to Heroku + +To deploy the Docker container from this repository to Heroku and set the required environment variables, follow these steps: + +Install Docker on your local machine if it is not already installed. + +Install the [Heroku CLI](https://devcenter.heroku.com/articles/heroku-cli) on your local machine. + +Clone the repository from GitHub: + +``` +git clone https://github.com/openai/chatgpt-retrieval-plugin.git +``` + +Navigate to the cloned repository directory: + +``` +cd path/to/chatgpt-retrieval-plugin +``` + +Log in to the Heroku CLI: + +``` +heroku login +``` + +Create a Heroku app: + +``` +heroku create [app-name] +``` + +Log in to the Heroku Container Registry: + +``` +heroku container:login +``` + +Alternatively, you can use a command from the Makefile to log in to the Heroku Container Registry by running: + +``` +make heroku-login +``` + +Build the Docker image using the Dockerfile: + +``` +docker buildx build --platform linux/amd64 -t [image-name] . +``` + +(Replace `[image-name]` with the name you want to give your Docker image) + +Push the Docker image to the Heroku Container Registry, and release the newly pushed image to your Heroku app. + +``` +docker tag [image-name] registry.heroku.com/[app-name]/web +docker push registry.heroku.com/[app-name]/web +heroku container:release web -a [app-name] +``` + +Alternatively, you can use a command from the to push the Docker image to the Heroku Container Registry by running: + +``` +make heroku-push +``` + +**Note:** You will need to edit the Makefile and replace `` with your actual app name. + +Set the required environment variables for your Heroku app: + +``` +heroku config:set DATASTORE=your_datastore \ +OPENAI_API_KEY=your_openai_api_key \ +BEARER_TOKEN=your_bearer_token \ + \ +-a [app-name] +``` + +You could also set environment variables in the [Heroku Console](https://dashboard.heroku.com/apps). + +After completing these steps, your Docker container should be deployed to Heroku and running with the necessary environment variables set. You can view your app by running: + +``` +heroku open -a [app-name] +``` + +which will open your app url. You should be able to find the OpenAPI schema at `/.well-known/openapi.yaml` and the manifest at `/.well-known/ai-plugin.json`. + +To view your app logs: + +``` +heroku logs --tail -a [app-name] +``` + +Now make sure to change the plugin url in your plugin manifest file [here](/.well-known/ai-plugin.json), and in your OpenAPI schema [here](/.well-known/openapi.yaml), and redeploy with `make heroku-push`. This url will be `https://.herokuapp.com`. + +### Other Deployment Options + +Some possible other options for deploying the app are: + +- Azure Container Apps: This is a cloud platform that allows you to deploy and manage web apps using Docker containers. You can use the Azure CLI or the Azure Portal to create and configure your app service, and then push your Docker image to a container registry and deploy it to your app service. You can also set environment variables and scale your app using the Azure Portal. Learn more [here](https://learn.microsoft.com/en-us/azure/container-apps/get-started-existing-container-image-portal?pivots=container-apps-private-registry). +- Google Cloud Run: This is a serverless platform that allows you to run stateless web apps using Docker containers. You can use the Google Cloud Console or the gcloud command-line tool to create and deploy your Cloud Run service, and then push your Docker image to the Google Container Registry and deploy it to your service. You can also set environment variables and scale your app using the Google Cloud Console. Learn more [here](https://cloud.google.com/run/docs/quickstarts/build-and-deploy). +- AWS Elastic Container Service: This is a cloud platform that allows you to run and manage web apps using Docker containers. You can use the AWS CLI or the AWS Management Console to create and configure your ECS cluster, and then push your Docker image to the Amazon Elastic Container Registry and deploy it to your cluster. You can also set environment variables and scale your app using the AWS Management Console. Learn more [here](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/docker-basics.html). + +After you create your app, make sure to change the plugin url in your plugin manifest file [here](/.well-known/ai-plugin.json), and in your OpenAPI schema [here](/.well-known/openapi.yaml), and redeploy. + +## Installing a Developer Plugin + +To install a developer plugin, follow the steps below: + +- First, create your developer plugin by deploying it to your preferred hosting platform (e.g. Fly.io, Heroku, etc.) and updating the plugin URL in the manifest file and OpenAPI schema. + +- Go to [ChatGPT](https://chat.openai.com/) and select "Plugins" from the model picker. + +- From the plugins picker, scroll to the bottom and click on "Plugin store." + +- Go to "Develop your own plugin" and follow the instructions provided. You will need to enter the domain where your plugin is deployed. + +- Follow the instructions based on the authentication type you have chosen for your plugin (e.g. if your plugin uses Service Level HTTP, you will have to paste in your access token, then paste the new access token you receive from the plugin flow into your [ai-plugin.json](/.well-known/ai-plugin.json) file and redeploy your app). + +- Next, you must add your plugin. Go to the "Plugin store" again and click on "Install an unverified plugin." + +- Follow the instructions provided, which will require you to enter the domain where your plugin is deployed. + +- Follow the instructions based on the authentication type you have chosen for your plugin (e.g. if your plugin uses User Level HTTP, you will have to paste in your bearer token). + +After completing these steps, your developer plugin should be installed and ready to use in ChatGPT. + +## Webhooks + +To keep the documents stored in the vector database up-to-date, consider using tools like [Zapier](https://zapier.com) or [Make](https://www.make.com) to configure incoming webhooks to your plugin's API based on events or schedules. For example, this could allow you to sync new information as you update your notes or receive emails. You can also use a [Zapier Transfer](https://zapier.com/blog/zapier-transfer-guide/) to batch process a collection of existing documents and upload them to the vector database. + +If you need to pass custom fields from these tools to your plugin, you might want to create an additional Retrieval Plugin API endpoint that calls the datastore's upsert function, such as `upsert-email`. This custom endpoint can be designed to accept specific fields from the webhook and process them accordingly. + +To set up an incoming webhook, follow these general steps: + +- Choose a webhook tool like Zapier or Make and create an account. +- Set up a new webhook or transfer in the tool, and configure it to trigger based on events or schedules. +- Specify the target URL for the webhook, which should be the API endpoint of your retrieval plugin (e.g. `https://your-plugin-url.com/upsert`). +- Configure the webhook payload to include the necessary data fields and format them according to your retrieval plugin's API requirements. +- Test the webhook to ensure it's working correctly and sending data to your retrieval plugin as expected. + +After setting up the webhook, you may want to run a backfill to ensure that any previously missed data is included in the vector database. + +Remember that if you want to use incoming webhooks to continuously sync data, you should consider running a backfill after setting these up to avoid missing any data. + +In addition to using tools like Zapier and Make, you can also build your own custom integrations to sync data with your Retrieval Plugin. This allows you to have more control over the data flow and tailor the integration to your specific needs and requirements. + +## Scripts + +The `scripts` folder contains scripts to batch upsert or process text documents from different data sources, such as a zip file, JSON file, or JSONL file. These scripts use the plugin's upsert utility functions to upload the documents and their metadata to the vector database, after converting them to plain text and splitting them into chunks. Each script folder has a README file that explains how to use it and what parameters it requires. You can also optionally screen the documents for personally identifiable information (PII) using a language model and skip them if detected, with the [`services.pii_detection`](/services/pii_detection.py) module. This can be helpful if you want to avoid uploading sensitive or private documents to the vector database unintentionally. Additionally, you can optionally extract metadata from the document text using a language model, with the [`services.extract_metadata`](/services/extract_metadata.py) module. This can be useful if you want to enrich the document metadata. **Note:** if using incoming webhooks to continuously sync data, consider running a backfill after setting these up to avoid missing any data. + +The scripts are: + +- [`process_json`](scripts/process_json/): This script processes a file dump of documents in a JSON format and stores them in the vector database with some metadata. The format of the JSON file should be a list of JSON objects, where each object represents a document. The JSON object should have a `text` field and optionally other fields to populate the metadata. You can provide custom metadata as a JSON string and flags to screen for PII and extract metadata. +- [`process_jsonl`](scripts/process_jsonl/): This script processes a file dump of documents in a JSONL format and stores them in the vector database with some metadata. The format of the JSONL file should be a newline-delimited JSON file, where each line is a valid JSON object representing a document. The JSON object should have a `text` field and optionally other fields to populate the metadata. You can provide custom metadata as a JSON string and flags to screen for PII and extract metadata. +- [`process_zip`](scripts/process_zip/): This script processes a file dump of documents in a zip file and stores them in the vector database with some metadata. The format of the zip file should be a flat zip file folder of docx, pdf, txt, md, pptx or csv files. You can provide custom metadata as a JSON string and flags to screen for PII and extract metadata. + +## Limitations + +While the ChatGPT Retrieval Plugin is designed to provide a flexible solution for semantic search and retrieval, it does have some limitations: + +- **Keyword search limitations**: The embeddings generated by the `text-embedding-ada-002` model may not always be effective at capturing exact keyword matches. As a result, the plugin might not return the most relevant results for queries that rely heavily on specific keywords. Some vector databases, like Weaviate, use hybrid search and might perform better for keyword searches. +- **Sensitive data handling**: The plugin does not automatically detect or filter sensitive data. It is the responsibility of the developers to ensure that they have the necessary authorization to include content in the Retrieval Plugin and that the content complies with data privacy requirements. +- **Scalability**: The performance of the plugin may vary depending on the chosen vector database provider and the size of the dataset. Some providers may offer better scalability and performance than others. +- **Language support**: The plugin currently uses OpenAI's `text-embedding-ada-002` model, which is optimized for use in English. However, it is still robust enough to generate good results for a variety of languages. +- **Metadata extraction**: The optional metadata extraction feature relies on a language model to extract information from the document text. This process may not always be accurate, and the quality of the extracted metadata may vary depending on the document content and structure. +- **PII detection**: The optional PII detection feature is not foolproof and may not catch all instances of personally identifiable information. Use this feature with caution and verify its effectiveness for your specific use case. + +## Future Directions + +The ChatGPT Retrieval Plugin provides a flexible solution for semantic search and retrieval, but there is always potential for further development. We encourage users to contribute to the project by submitting pull requests for new features or enhancements. Notable contributions may be acknowledged with OpenAI credits. + +Some ideas for future directions include: + +- **More vector database providers**: If you are interested in integrating another vector database provider with the ChatGPT Retrieval Plugin, feel free to submit an implementation. +- **Additional scripts**: Expanding the range of scripts available for processing and uploading documents from various data sources would make the plugin even more versatile. +- **User Interface**: Developing a user interface for managing documents and interacting with the plugin could improve the user experience. +- **Hybrid search / TF-IDF option**: Enhancing the [datastore's upsert function](/datastore/datastore.py#L18) with an option to use hybrid search or TF-IDF indexing could improve the plugin's performance for keyword-based queries. +- **Advanced chunking strategies and embeddings calculations**: Implementing more sophisticated chunking strategies and embeddings calculations, such as embedding document titles and summaries, performing weighted averaging of document chunks and summaries, or calculating the average embedding for a document, could lead to better search results. +- **Custom metadata**: Allowing users to add custom metadata to document chunks, such as titles or other relevant information, might improve the retrieved results in some use cases. +- **Additional optional services**: Integrating more optional services, such as summarizing documents or pre-processing documents before embedding them, could enhance the plugin's functionality and quality of retrieved results. These services could be implemented using language models and integrated directly into the plugin, rather than just being available in the scripts. + +We welcome contributions from the community to help improve the ChatGPT Retrieval Plugin and expand its capabilities. If you have an idea or feature you'd like to contribute, please submit a pull request to the repository. + +## Contributors + +We would like to extend our gratitude to the following contributors for their code / documentation contributions, and support in integrating various vector database providers with the ChatGPT Retrieval Plugin: + +- [Pinecone](https://www.pinecone.io/) + - [acatav](https://github.com/acatav) + - [gkogan](https://github.com/gkogan) + - [jamescalam](https://github.com/jamescalam) +- [Weaviate](https://www.semi.technology/) + - [hsm207](https://github.com/hsm207) + - [sebawita](https://github.com/sebawita) + - [byronvoorbach](https://github.com/byronvoorbach) +- [Zilliz](https://zilliz.com/) + - [filip-halt](https://github.com/filip-halt) +- [Milvus](https://milvus.io/) + - [filip-halt](https://github.com/filip-halt) +- [Qdrant](https://qdrant.tech/) + - [kacperlukawski](https://github.com/kacperlukawski) +- [Redis](https://redis.io/) + - [tylerhutcherson](https://github.com/tylerhutcherson) diff --git a/datastore/__init__.py b/datastore/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/datastore/datastore.py b/datastore/datastore.py new file mode 100644 index 000000000..ff0c79dd8 --- /dev/null +++ b/datastore/datastore.py @@ -0,0 +1,86 @@ +from abc import ABC, abstractmethod +from typing import Dict, List, Optional +import asyncio + +from models.models import ( + Document, + DocumentChunk, + DocumentMetadataFilter, + Query, + QueryResult, + QueryWithEmbedding, +) +from services.chunks import get_document_chunks +from services.openai import get_embeddings + + +class DataStore(ABC): + async def upsert( + self, documents: List[Document], chunk_token_size: Optional[int] = None + ) -> List[str]: + """ + Takes in a list of documents and inserts them into the database. + First deletes all the existing vectors with the document id (if necessary, depends on the vector db), then inserts the new ones. + Return a list of document ids. + """ + # Delete any existing vectors for documents with the input document ids + await asyncio.gather( + *[ + self.delete( + filter=DocumentMetadataFilter( + document_id=document.id, + ), + delete_all=False, + ) + for document in documents + if document.id + ] + ) + + chunks = get_document_chunks(documents, chunk_token_size) + + return await self._upsert(chunks) + + @abstractmethod + async def _upsert(self, chunks: Dict[str, List[DocumentChunk]]) -> List[str]: + """ + Takes in a list of list of document chunks and inserts them into the database. + Return a list of document ids. + """ + + raise NotImplementedError + + async def query(self, queries: List[Query]) -> List[QueryResult]: + """ + Takes in a list of queries and filters and returns a list of query results with matching document chunks and scores. + """ + # get a list of of just the queries from the Query list + query_texts = [query.query for query in queries] + query_embeddings = get_embeddings(query_texts) + # hydrate the queries with embeddings + queries_with_embeddings = [ + QueryWithEmbedding(**query.dict(), embedding=embedding) + for query, embedding in zip(queries, query_embeddings) + ] + return await self._query(queries_with_embeddings) + + @abstractmethod + async def _query(self, queries: List[QueryWithEmbedding]) -> List[QueryResult]: + """ + Takes in a list of queries with embeddings and filters and returns a list of query results with matching document chunks and scores. + """ + raise NotImplementedError + + @abstractmethod + async def delete( + self, + ids: Optional[List[str]] = None, + filter: Optional[DocumentMetadataFilter] = None, + delete_all: Optional[bool] = None, + ) -> bool: + """ + Removes vectors by ids, filter, or everything in the datastore. + Multiple parameters can be used at once. + Returns whether the operation was successful. + """ + raise NotImplementedError diff --git a/datastore/factory.py b/datastore/factory.py new file mode 100644 index 000000000..732fc40cf --- /dev/null +++ b/datastore/factory.py @@ -0,0 +1,35 @@ +from datastore.datastore import DataStore +import os + + +async def get_datastore() -> DataStore: + datastore = os.environ.get("DATASTORE") + assert datastore is not None + + match datastore: + case "pinecone": + from datastore.providers.pinecone_datastore import PineconeDataStore + + return PineconeDataStore() + case "weaviate": + from datastore.providers.weaviate_datastore import WeaviateDataStore + + return WeaviateDataStore() + case "milvus": + from datastore.providers.milvus_datastore import MilvusDataStore + + return MilvusDataStore() + case "zilliz": + from datastore.providers.zilliz_datastore import ZillizDataStore + + return ZillizDataStore() + case "redis": + from datastore.providers.redis_datastore import RedisDataStore + + return await RedisDataStore.init() + case "qdrant": + from datastore.providers.qdrant_datastore import QdrantDataStore + + return QdrantDataStore() + case _: + raise ValueError(f"Unsupported vector database: {datastore}") diff --git a/datastore/providers/milvus_datastore.py b/datastore/providers/milvus_datastore.py new file mode 100644 index 000000000..68bbbcd2e --- /dev/null +++ b/datastore/providers/milvus_datastore.py @@ -0,0 +1,475 @@ +import os +import asyncio + +from typing import Dict, List, Optional +from pymilvus import ( + Collection, + connections, + utility, + FieldSchema, + DataType, + CollectionSchema, + MilvusException, +) +from uuid import uuid4 + + +from services.date import to_unix_timestamp +from datastore.datastore import DataStore +from models.models import ( + DocumentChunk, + DocumentChunkMetadata, + Source, + DocumentMetadataFilter, + QueryResult, + QueryWithEmbedding, + DocumentChunkWithScore, +) + +MILVUS_COLLECTION = os.environ.get("MILVUS_COLLECTION") or "c" + uuid4().hex +MILVUS_HOST = os.environ.get("MILVUS_HOST") or "localhost" +MILVUS_PORT = os.environ.get("MILVUS_PORT") or 19530 +MILVUS_USER = os.environ.get("MILVUS_USER") +MILVUS_PASSWORD = os.environ.get("MILVUS_PASSWORD") +MILVUS_USE_SECURITY = False if MILVUS_PASSWORD is None else True + +UPSERT_BATCH_SIZE = 100 +OUTPUT_DIM = 1536 + + +class Required: + pass + + +# The fields names that we are going to be storing within Milvus, the field declaration for schema creation, and the default value +SCHEMA = [ + ( + "pk", + FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True), + Required, + ), + ( + "embedding", + FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=OUTPUT_DIM), + Required, + ), + ( + "text", + FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=65535), + Required, + ), + ( + "document_id", + FieldSchema(name="document_id", dtype=DataType.VARCHAR, max_length=65535), + "", + ), + ( + "source_id", + FieldSchema(name="source_id", dtype=DataType.VARCHAR, max_length=65535), + "", + ), + ( + "id", + FieldSchema( + name="id", + dtype=DataType.VARCHAR, + max_length=65535, + ), + "", + ), + ( + "source", + FieldSchema(name="source", dtype=DataType.VARCHAR, max_length=65535), + "", + ), + ("url", FieldSchema(name="url", dtype=DataType.VARCHAR, max_length=65535), ""), + ("created_at", FieldSchema(name="created_at", dtype=DataType.INT64), -1), + ( + "author", + FieldSchema(name="author", dtype=DataType.VARCHAR, max_length=65535), + "", + ), +] + + +class MilvusDataStore(DataStore): + def __init__( + self, + create_new: Optional[bool] = False, + index_params: Optional[dict] = None, + search_params: Optional[dict] = None, + ): + """Create a Milvus DataStore. + + The Milvus Datastore allows for storing your indexes and metadata within a Milvus instance. + + Args: + create_new (Optional[bool], optional): Whether to overwrite if collection already exists. Defaults to True. + index_params (Optional[dict], optional): Custom index params to use. Defaults to None. + search_params (Optional[dict], optional): Custom search params to use. Defaults to None. + """ + + # # TODO: Auto infer the fields + # non_string_fields = [('embedding', List[float]), ('created_at', int)] + # fields_to_index = list(DocumentChunkMetadata.__fields__.keys()) + # fields_to_index = list(DocumentChunk.__fields__.keys()) + + # Set the index_params to passed in or the default + self.index_params = index_params + + # The default search params + self.default_search_params = { + "IVF_FLAT": {"metric_type": "L2", "params": {"nprobe": 10}}, + "IVF_SQ8": {"metric_type": "L2", "params": {"nprobe": 10}}, + "IVF_PQ": {"metric_type": "L2", "params": {"nprobe": 10}}, + "HNSW": {"metric_type": "L2", "params": {"ef": 10}}, + "RHNSW_FLAT": {"metric_type": "L2", "params": {"ef": 10}}, + "RHNSW_SQ": {"metric_type": "L2", "params": {"ef": 10}}, + "RHNSW_PQ": {"metric_type": "L2", "params": {"ef": 10}}, + "IVF_HNSW": {"metric_type": "L2", "params": {"nprobe": 10, "ef": 10}}, + "ANNOY": {"metric_type": "L2", "params": {"search_k": 10}}, + "AUTOINDEX": {"metric_type": "L2", "params": {}}, + } + + # Check if the connection already exists + try: + i = [ + connections.get_connection_addr(x[0]) + for x in connections.list_connections() + ].index({"host": MILVUS_HOST, "port": MILVUS_PORT}) + self.alias = connections.list_connections()[i][0] + except ValueError: + # Connect to the Milvus instance using the passed in Enviroment variables + self.alias = uuid4().hex + connections.connect( + alias=self.alias, + host=MILVUS_HOST, + port=MILVUS_PORT, + user=MILVUS_USER, # type: ignore + password=MILVUS_PASSWORD, # type: ignore + secure=MILVUS_USE_SECURITY, + ) + + self._create_collection(create_new) # type: ignore + + index_params = self.index_params or {} + + # Use in the passed in search params or the default for the specified index + self.search_params = ( + search_params or self.default_search_params[index_params["index_type"]] + ) + + def _create_collection(self, create_new: bool) -> None: + """Create a collection based on enviroment and passed in variables. + + Args: + create_new (bool): Whether to overwrite if collection already exists. + """ + + # If the collection exists and create_new is True, drop the existing collection + if utility.has_collection(MILVUS_COLLECTION, using=self.alias) and create_new: + utility.drop_collection(MILVUS_COLLECTION, using=self.alias) + + # Check if the collection doesnt exist + if utility.has_collection(MILVUS_COLLECTION, using=self.alias) is False: + # If it doesnt exist use the field params from init to create a new schem + schema = [field[1] for field in SCHEMA] + schema = CollectionSchema(schema) + # Use the schema to create a new collection + self.col = Collection( + MILVUS_COLLECTION, + schema=schema, + consistency_level="Strong", + using=self.alias, + ) + else: + # If the collection exists, point to it + self.col = Collection( + MILVUS_COLLECTION, consistency_level="Strong", using=self.alias + ) # type: ignore + + # If no index on the collection, create one + if len(self.col.indexes) == 0: + if self.index_params != None: + # Create an index on the 'embedding' field with the index params found in init + self.col.create_index("embedding", index_params=self.index_params) + else: + # If no index param supplied, to first create an HNSW index for Milvus + try: + print("Attempting creation of Milvus default index") + i_p = { + "metric_type": "L2", + "index_type": "HNSW", + "params": {"M": 8, "efConstruction": 64}, + } + + self.col.create_index("embedding", index_params=i_p) + self.index_params = i_p + print("Creation of Milvus default index succesful") + # If create fails, most likely due to being Zilliz Cloud instance, try to create an AutoIndex + except MilvusException: + print("Attempting creation of Zilliz Cloud default index") + i_p = {"metric_type": "L2", "index_type": "AUTOINDEX", "params": {}} + self.col.create_index("embedding", index_params=i_p) + self.index_params = i_p + print("Creation of Zilliz Cloud default index succesful") + # If an index already exists, grab its params + else: + self.index_params = self.col.indexes[0].to_dict()['index_param'] + + + + self.col.load() + + async def _upsert(self, chunks: Dict[str, List[DocumentChunk]]) -> List[str]: + """Upsert chunks into the datastore. + + Args: + chunks (Dict[str, List[DocumentChunk]]): A list of DocumentChunks to insert + + Raises: + e: Error in upserting data. + + Returns: + List[str]: The document_id's that were inserted. + """ + # The doc id's to return for the upsert + doc_ids: List[str] = [] + # List to collect all the insert data + insert_data = [[] for _ in range(len(SCHEMA) - 1)] + # Go through each document chunklist and grab the data + for doc_id, chunk_list in chunks.items(): + # Append the doc_id to the list we are returning + doc_ids.append(doc_id) + # Examine each chunk in the chunklist + for chunk in chunk_list: + # Extract data from the chunk + list_of_data = self._get_values(chunk) + # Check if the data is valid + if list_of_data is not None: + # Append each field to the insert_data + for x in range(len(insert_data)): + insert_data[x].append(list_of_data[x]) + # Slice up our insert data into batches + batches = [ + insert_data[i : i + UPSERT_BATCH_SIZE] + for i in range(0, len(insert_data), UPSERT_BATCH_SIZE) + ] + + # Attempt to insert each batch into our collection + for batch in batches: + if len(batch[0]) != 0: + try: + print(f"Upserting batch of size {len(batch[0])}") + self.col.insert(batch) + print(f"Upserted batch successfully") + except Exception as e: + print(f"Error upserting batch: {e}") + raise e + + # This setting perfoms flushes after insert. Small insert == bad to use + # self.col.flush() + + return doc_ids + + def _get_values(self, chunk: DocumentChunk) -> List[any] | None: # type: ignore + """Convert the chunk into a list of values to insert whose indexes align with fields. + + Args: + chunk (DocumentChunk): The chunk to convert. + + Returns: + List (any): The values to insert. + """ + # Convert DocumentChunk and its sub models to dict + values = chunk.dict() + # Unpack the metadata into the same dict + meta = values.pop("metadata") + values.update(meta) + + # Convert date to int timestamp form + if values["created_at"]: + values["created_at"] = to_unix_timestamp(values["created_at"]) + + # If source exists, change from Source object to the string value it holds + if values["source"]: + values["source"] = values["source"].value + # List to collect data we will return + ret = [] + # Grab data responding to each field excluding the hidden auto pk field + for key, _, default in SCHEMA[1:]: + # Grab the data at the key and default to our defaults set in init + x = values.get(key) or default + # If one of our required fields is missing, ignore the entire entry + if x is Required: + print("Chunk " + values["id"] + " missing " + key + " skipping") + return None + # Add the corresponding value if it passes the tests + ret.append(x) + return ret + + async def _query( + self, + queries: List[QueryWithEmbedding], + ) -> List[QueryResult]: + """Query the QueryWithEmbedding against the MilvusDocumentSearch + + Search the embedding and its filter in the collection. + + Args: + queries (List[QueryWithEmbedding]): The list of searches to perform. + + Returns: + List[QueryResult]: Results for each search. + """ + # Async to perform the query, adapted from pinecone implementation + async def _single_query(query: QueryWithEmbedding) -> QueryResult: + + filter = None + # Set the filter to expression that is valid for Milvus + if query.filter != None: + # Either a valid filter or None will be returned + filter = self._get_filter(query.filter) + + # Perform our search + res = self.col.search( + data=[query.embedding], + anns_field="embedding", + param=self.search_params, + limit=query.top_k, + expr=filter, + output_fields=[ + field[0] for field in SCHEMA[2:] + ], # Ignoring pk, embedding + ) + # Results that will hold our DocumentChunkWithScores + results = [] + # Parse every result for our search + for hit in res[0]: # type: ignore + # The distance score for the search result, falls under DocumentChunkWithScore + score = hit.score + # Our metadata info, falls under DocumentChunkMetadata + metadata = {} + # Grab the values that correspond to our fields, ignore pk and embedding. + for x in [field[0] for field in SCHEMA[2:]]: + metadata[x] = hit.entity.get(x) + # If the source isnt valid, conver to None + if metadata["source"] not in Source.__members__: + metadata["source"] = None + # Text falls under the DocumentChunk + text = metadata.pop("text") + # Id falls under the DocumentChunk + ids = metadata.pop("id") + chunk = DocumentChunkWithScore( + id=ids, + score=score, + text=text, + metadata=DocumentChunkMetadata(**metadata), + ) + results.append(chunk) + + # TODO: decide on doing queries to grab the embedding itself, slows down performance as double query occurs + + return QueryResult(query=query.query, results=results) + + results: List[QueryResult] = await asyncio.gather( + *[_single_query(query) for query in queries] + ) + return results + + async def delete( + self, + ids: Optional[List[str]] = None, + filter: Optional[DocumentMetadataFilter] = None, + delete_all: Optional[bool] = None, + ) -> bool: + """Delete the entities based either on the chunk_id of the vector, + + Args: + ids (Optional[List[str]], optional): The document_ids to delete. Defaults to None. + filter (Optional[DocumentMetadataFilter], optional): The filter to delet by. Defaults to None. + delete_all (Optional[bool], optional): Whether to drop the collection and recreate it. Defaults to None. + """ + # If deleting all, drop and create the new collection + if delete_all: + # Release the collection from memory + self.col.release() + # Drop the collection + self.col.drop() + # Recreate the new collection + self._create_collection(True) + return True + + # Keep track of how many we have deleted for later printing + delete_count = 0 + + # Check if empty ids + if ids != None: + if len(ids) != 0: + # Add quotation marks around the string format id + ids = ['"' + str(id) + '"' for id in ids] + # Query for the pk's of entries that match id's + ids = self.col.query(f"document_id in [{','.join(ids)}]") + # Convert to list of pks + ids = [str(entry["pk"]) for entry in ids] # type: ignore + # Check to see if there are valid pk's to delete + if len(ids) != 0: + # Delete the entries for each pk + res = self.col.delete(f"pk in [{','.join(ids)}]") + # Incremet our deleted count + delete_count += int(res.delete_count) # type: ignore + + # Check if empty filter + if filter != None: + # Convert filter to milvus expression + filter = self._get_filter(filter) # type: ignore + # Check if there is anything to filter + if len(filter) != 0: # type: ignore + # Query for the pk's of entries that match filter + filter = self.col.query(filter) # type: ignore + # Convert to list of pks + filter = [str(entry["pk"]) for entry in filter] # type: ignore + # Check to see if there are valid pk's to delete + if len(filter) != 0: # type: ignore + # Delete the entries + res = self.col.delete(f"pk in [{','.join(filter)}]") # type: ignore + # Increment our delete count + delete_count += int(res.delete_count) # type: ignore + + # This setting perfoms flushes after delete. Small delete == bad to use + # self.col.flush() + + return True + + def _get_filter(self, filter: DocumentMetadataFilter) -> Optional[str]: + """Converts a DocumentMetdataFilter to the expression that Milvus takes. + + Args: + filter (DocumentMetadataFilter): The Filter to convert to Milvus expression. + + Returns: + Optional[str]: The filter if valid, otherwise None. + """ + filters = [] + # Go through all the fields and thier values + for field, value in filter.dict().items(): + # Check if the Value is empty + if value is not None: + # Convert start_date to int and add greater than or equal logic + if field == "start_date": + filters.append( + "(created_at >= " + str(to_unix_timestamp(value)) + ")" + ) + # Convert end_date to int and add less than or equal logic + elif field == "end_date": + filters.append( + "(created_at <= " + str(to_unix_timestamp(value)) + ")" + ) + # Convert Source to its string value and check equivalency + elif field == "source": + filters.append("(" + field + ' == "' + str(value.value) + '")') + # Check equivalency of rest of string fields + else: + filters.append("(" + field + ' == "' + str(value) + '")') + # Join all our expressions with `and`` + return " and ".join(filters) diff --git a/datastore/providers/pinecone_datastore.py b/datastore/providers/pinecone_datastore.py new file mode 100644 index 000000000..208ed5684 --- /dev/null +++ b/datastore/providers/pinecone_datastore.py @@ -0,0 +1,261 @@ +import os +from typing import Any, Dict, List, Optional +import pinecone +from tenacity import retry, wait_random_exponential, stop_after_attempt +import asyncio + +from datastore.datastore import DataStore +from models.models import ( + DocumentChunk, + DocumentChunkMetadata, + DocumentChunkWithScore, + DocumentMetadataFilter, + QueryResult, + QueryWithEmbedding, + Source, +) +from services.date import to_unix_timestamp + +# Read environment variables for Pinecone configuration +PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY") +PINECONE_ENVIRONMENT = os.environ.get("PINECONE_ENVIRONMENT") +PINECONE_INDEX = os.environ.get("PINECONE_INDEX") +assert PINECONE_API_KEY is not None +assert PINECONE_ENVIRONMENT is not None +assert PINECONE_INDEX is not None + +# Initialize Pinecone with the API key and environment +pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENVIRONMENT) + +# Set the batch size for upserting vectors to Pinecone +UPSERT_BATCH_SIZE = 100 + + +class PineconeDataStore(DataStore): + def __init__(self): + # Check if the index name is specified and exists in Pinecone + if PINECONE_INDEX and PINECONE_INDEX not in pinecone.list_indexes(): + + # Get all fields in the metadata object in a list + fields_to_index = list(DocumentChunkMetadata.__fields__.keys()) + + # Create a new index with the specified name, dimension, and metadata configuration + try: + print( + f"Creating index {PINECONE_INDEX} with metadata config {fields_to_index}" + ) + pinecone.create_index( + PINECONE_INDEX, + dimension=1536, # dimensionality of OpenAI ada v2 embeddings + metadata_config={"indexed": fields_to_index}, + ) + self.index = pinecone.Index(PINECONE_INDEX) + print(f"Index {PINECONE_INDEX} created successfully") + except Exception as e: + print(f"Error creating index {PINECONE_INDEX}: {e}") + raise e + elif PINECONE_INDEX and PINECONE_INDEX in pinecone.list_indexes(): + # Connect to an existing index with the specified name + try: + print(f"Connecting to existing index {PINECONE_INDEX}") + self.index = pinecone.Index(PINECONE_INDEX) + print(f"Connected to index {PINECONE_INDEX} successfully") + except Exception as e: + print(f"Error connecting to index {PINECONE_INDEX}: {e}") + raise e + + @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(3)) + async def _upsert(self, chunks: Dict[str, List[DocumentChunk]]) -> List[str]: + """ + Takes in a dict from document id to list of document chunks and inserts them into the index. + Return a list of document ids. + """ + # Initialize a list of ids to return + doc_ids: List[str] = [] + # Initialize a list of vectors to upsert + vectors = [] + # Loop through the dict items + for doc_id, chunk_list in chunks.items(): + # Append the id to the ids list + doc_ids.append(doc_id) + print(f"Upserting document_id: {doc_id}") + for chunk in chunk_list: + # Create a vector tuple of (id, embedding, metadata) + # Convert the metadata object to a dict with unix timestamps for dates + pinecone_metadata = self._get_pinecone_metadata(chunk.metadata) + # Add the text and document id to the metadata dict + pinecone_metadata["text"] = chunk.text + pinecone_metadata["document_id"] = doc_id + vector = (chunk.id, chunk.embedding, pinecone_metadata) + vectors.append(vector) + + # Split the vectors list into batches of the specified size + batches = [ + vectors[i : i + UPSERT_BATCH_SIZE] + for i in range(0, len(vectors), UPSERT_BATCH_SIZE) + ] + # Upsert each batch to Pinecone + for batch in batches: + try: + print(f"Upserting batch of size {len(batch)}") + self.index.upsert(vectors=batch) + print(f"Upserted batch successfully") + except Exception as e: + print(f"Error upserting batch: {e}") + raise e + + return doc_ids + + @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(3)) + async def _query( + self, + queries: List[QueryWithEmbedding], + ) -> List[QueryResult]: + """ + Takes in a list of queries with embeddings and filters and returns a list of query results with matching document chunks and scores. + """ + + # Define a helper coroutine that performs a single query and returns a QueryResult + async def _single_query(query: QueryWithEmbedding) -> QueryResult: + print(f"Query: {query.query}") + + # Convert the metadata filter object to a dict with pinecone filter expressions + pinecone_filter = self._get_pinecone_filter(query.filter) + + try: + # Query the index with the query embedding, filter, and top_k + query_response = self.index.query( + # namespace=namespace, + top_k=query.top_k, + vector=query.embedding, + filter=pinecone_filter, + include_metadata=True, + ) + except Exception as e: + print(f"Error querying index: {e}") + raise e + + query_results: List[DocumentChunkWithScore] = [] + for result in query_response.matches: + score = result.score + metadata = result.metadata + # Remove document id and text from metadata and store it in a new variable + metadata_without_text = ( + {key: value for key, value in metadata.items() if key != "text"} + if metadata + else None + ) + + # If the source is not a valid Source in the Source enum, set it to None + if ( + metadata_without_text + and "source" in metadata_without_text + and metadata_without_text["source"] not in Source.__members__ + ): + metadata_without_text["source"] = None + + # Create a document chunk with score object with the result data + result = DocumentChunkWithScore( + id=result.id, + score=score, + text=metadata["text"] if metadata and "text" in metadata else None, + metadata=metadata_without_text, + ) + query_results.append(result) + return QueryResult(query=query.query, results=query_results) + + # Use asyncio.gather to run multiple _single_query coroutines concurrently and collect their results + results: List[QueryResult] = await asyncio.gather( + *[_single_query(query) for query in queries] + ) + + return results + + @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(3)) + async def delete( + self, + ids: Optional[List[str]] = None, + filter: Optional[DocumentMetadataFilter] = None, + delete_all: Optional[bool] = None, + ) -> bool: + """ + Removes vectors by ids, filter, or everything from the index. + """ + # Delete all vectors from the index if delete_all is True + if delete_all == True: + try: + print(f"Deleting all vectors from index") + self.index.delete(delete_all=True) + print(f"Deleted all vectors successfully") + return True + except Exception as e: + print(f"Error deleting all vectors: {e}") + raise e + + # Convert the metadata filter object to a dict with pinecone filter expressions + pinecone_filter = self._get_pinecone_filter(filter) + # Delete vectors that match the filter from the index if the filter is not empty + if pinecone_filter != {}: + try: + print(f"Deleting vectors with filter {pinecone_filter}") + self.index.delete(filter=pinecone_filter) + print(f"Deleted vectors with filter successfully") + except Exception as e: + print(f"Error deleting vectors with filter: {e}") + raise e + + # Delete vectors that match the document ids from the index if the ids list is not empty + if ids != None and len(ids) > 0: + try: + print(f"Deleting vectors with ids {ids}") + pinecone_filter = {"document_id": {"$in": ids}} + self.index.delete(filter=pinecone_filter) # type: ignore + print(f"Deleted vectors with ids successfully") + except Exception as e: + print(f"Error deleting vectors with ids: {e}") + raise e + + return True + + def _get_pinecone_filter( + self, filter: Optional[DocumentMetadataFilter] = None + ) -> Dict[str, Any]: + if filter is None: + return {} + + pinecone_filter = {} + + # For each field in the MetadataFilter, check if it has a value and add the corresponding pinecone filter expression + # For start_date and end_date, uses the $gte and $lte operators respectively + # For other fields, uses the $eq operator + for field, value in filter.dict().items(): + if value is not None: + if field == "start_date": + pinecone_filter["date"] = pinecone_filter.get("date", {}) + pinecone_filter["date"]["$gte"] = to_unix_timestamp(value) + elif field == "end_date": + pinecone_filter["date"] = pinecone_filter.get("date", {}) + pinecone_filter["date"]["$lte"] = to_unix_timestamp(value) + else: + pinecone_filter[field] = value + + return pinecone_filter + + def _get_pinecone_metadata( + self, metadata: Optional[DocumentChunkMetadata] = None + ) -> Dict[str, Any]: + if metadata is None: + return {} + + pinecone_metadata = {} + + # For each field in the Metadata, check if it has a value and add it to the pinecone metadata dict + # For fields that are dates, convert them to unix timestamps + for field, value in metadata.dict().items(): + if value is not None: + if field in ["created_at"]: + pinecone_metadata[field] = to_unix_timestamp(value) + else: + pinecone_metadata[field] = value + + return pinecone_metadata diff --git a/datastore/providers/qdrant_datastore.py b/datastore/providers/qdrant_datastore.py new file mode 100644 index 000000000..af413d53d --- /dev/null +++ b/datastore/providers/qdrant_datastore.py @@ -0,0 +1,296 @@ +import os +import uuid +from typing import Dict, List, Optional + +from grpc._channel import _InactiveRpcError +from qdrant_client.http.exceptions import UnexpectedResponse +from qdrant_client.http.models import PayloadSchemaType + +from datastore.datastore import DataStore +from models.models import ( + DocumentChunk, + DocumentMetadataFilter, + QueryResult, + QueryWithEmbedding, + DocumentChunkWithScore, +) +from qdrant_client.http import models as rest + +import qdrant_client + +from services.date import to_unix_timestamp + +QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost") +QDRANT_PORT = os.environ.get("QDRANT_PORT", "6333") +QDRANT_GRPC_PORT = os.environ.get("QDRANT_GRPC_PORT", "6334") +QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY") +QDRANT_COLLECTION = os.environ.get("QDRANT_COLLECTION", "document_chunks") + + +class QdrantDataStore(DataStore): + UUID_NAMESPACE = uuid.UUID("3896d314-1e95-4a3a-b45a-945f9f0b541d") + + def __init__( + self, + collection_name: Optional[str] = None, + vector_size: int = 1536, + distance: str = "Cosine", + recreate_collection: bool = False, + ): + """ + Args: + collection_name: Name of the collection to be used + vector_size: Size of the embedding stored in a collection + distance: + Any of "Cosine" / "Euclid" / "Dot". Distance function to measure + similarity + """ + self.client = qdrant_client.QdrantClient( + url=QDRANT_URL, + port=int(QDRANT_PORT), + grpc_port=int(QDRANT_GRPC_PORT), + api_key=QDRANT_API_KEY, + prefer_grpc=True, + ) + self.collection_name = collection_name or QDRANT_COLLECTION + + # Set up the collection so the points might be inserted or queried + self._set_up_collection(vector_size, distance, recreate_collection) + + async def _upsert(self, chunks: Dict[str, List[DocumentChunk]]) -> List[str]: + """ + Takes in a list of document chunks and inserts them into the database. + Return a list of document ids. + """ + points = [ + self._convert_document_chunk_to_point(chunk) + for _, chunks in chunks.items() + for chunk in chunks + ] + self.client.upsert( + collection_name=self.collection_name, + points=points, # type: ignore + wait=True, + ) + return list(chunks.keys()) + + async def _query( + self, + queries: List[QueryWithEmbedding], + ) -> List[QueryResult]: + """ + Takes in a list of queries with embeddings and filters and returns a list of query results with matching document chunks and scores. + """ + search_requests = [ + self._convert_query_to_search_request(query) for query in queries + ] + results = self.client.search_batch( + collection_name=self.collection_name, + requests=search_requests, + ) + return [ + QueryResult( + query=query.query, + results=[ + self._convert_scored_point_to_document_chunk_with_score(point) + for point in result + ], + ) + for query, result in zip(queries, results) + ] + + async def delete( + self, + ids: Optional[List[str]] = None, + filter: Optional[DocumentMetadataFilter] = None, + delete_all: Optional[bool] = None, + ) -> bool: + """ + Removes vectors by ids, filter, or everything in the datastore. + Returns whether the operation was successful. + """ + if ids is None and filter is None and delete_all is None: + raise ValueError( + "Please provide one of the parameters: ids, filter or delete_all." + ) + + if delete_all: + points_selector = rest.Filter() + else: + points_selector = self._convert_metadata_filter_to_qdrant_filter( + filter, ids + ) + + response = self.client.delete( + collection_name=self.collection_name, + points_selector=points_selector, # type: ignore + ) + return "COMPLETED" == response.status + + def _convert_document_chunk_to_point( + self, document_chunk: DocumentChunk + ) -> rest.PointStruct: + created_at = ( + to_unix_timestamp(document_chunk.metadata.created_at) + if document_chunk.metadata.created_at is not None + else None + ) + return rest.PointStruct( + id=self._create_document_chunk_id(document_chunk.id), + vector=document_chunk.embedding, # type: ignore + payload={ + "id": document_chunk.id, + "text": document_chunk.text, + "metadata": document_chunk.metadata.dict(), + "created_at": created_at, + }, + ) + + def _create_document_chunk_id(self, external_id: Optional[str]) -> str: + if external_id is None: + return uuid.uuid4().hex + return uuid.uuid5(self.UUID_NAMESPACE, external_id).hex + + def _convert_query_to_search_request( + self, query: QueryWithEmbedding + ) -> rest.SearchRequest: + return rest.SearchRequest( + vector=query.embedding, + filter=self._convert_metadata_filter_to_qdrant_filter(query.filter), + limit=query.top_k, # type: ignore + with_payload=True, + with_vector=False, + ) + + def _convert_metadata_filter_to_qdrant_filter( + self, + metadata_filter: Optional[DocumentMetadataFilter] = None, + ids: Optional[List[str]] = None, + ) -> Optional[rest.Filter]: + if metadata_filter is None and ids is None: + return None + + must_conditions, should_conditions = [], [] + + # Filtering by document ids + if ids and len(ids) > 0: + for document_id in ids: + should_conditions.append( + rest.FieldCondition( + key="metadata.document_id", + match=rest.MatchValue(value=document_id), + ) + ) + + # Equality filters for the payload attributes + if metadata_filter: + meta_attributes_keys = { + "document_id": "metadata.document_id", + "source": "metadata.source", + "source_id": "metadata.source_id", + "author": "metadata.author", + } + + for meta_attr_name, payload_key in meta_attributes_keys.items(): + attr_value = getattr(metadata_filter, meta_attr_name) + if attr_value is None: + continue + + must_conditions.append( + rest.FieldCondition( + key=payload_key, match=rest.MatchValue(value=attr_value) + ) + ) + + # Date filters use range filtering + start_date = metadata_filter.start_date + end_date = metadata_filter.end_date + if start_date or end_date: + gte_filter = ( + to_unix_timestamp(start_date) if start_date is not None else None + ) + lte_filter = ( + to_unix_timestamp(end_date) if end_date is not None else None + ) + must_conditions.append( + rest.FieldCondition( + key="created_at", + range=rest.Range( + gte=gte_filter, + lte=lte_filter, + ), + ) + ) + + if 0 == len(must_conditions) and 0 == len(should_conditions): + return None + + return rest.Filter(must=must_conditions, should=should_conditions) + + def _convert_scored_point_to_document_chunk_with_score( + self, scored_point: rest.ScoredPoint + ) -> DocumentChunkWithScore: + payload = scored_point.payload or {} + return DocumentChunkWithScore( + id=payload.get("id"), + text=scored_point.payload.get("text"), # type: ignore + metadata=scored_point.payload.get("metadata"), # type: ignore + embedding=scored_point.vector, # type: ignore + score=scored_point.score, + ) + + def _set_up_collection( + self, vector_size: int, distance: str, recreate_collection: bool + ): + distance = rest.Distance[distance.upper()] + + if recreate_collection: + self._recreate_collection(distance, vector_size) + + try: + collection_info = self.client.get_collection(self.collection_name) + current_distance = collection_info.config.params.vectors.distance # type: ignore + current_vector_size = collection_info.config.params.vectors.size # type: ignore + + if current_distance != distance: + raise ValueError( + f"Collection '{self.collection_name}' already exists in Qdrant, " + f"but it is configured with a similarity '{current_distance.name}'. " + f"If you want to use that collection, but with a different " + f"similarity, please set `recreate_collection=True` argument." + ) + + if current_vector_size != vector_size: + raise ValueError( + f"Collection '{self.collection_name}' already exists in Qdrant, " + f"but it is configured with a vector size '{current_vector_size}'. " + f"If you want to use that collection, but with a different " + f"vector size, please set `recreate_collection=True` argument." + ) + except (UnexpectedResponse, _InactiveRpcError): + self._recreate_collection(distance, vector_size) + + def _recreate_collection(self, distance: rest.Distance, vector_size: int): + self.client.recreate_collection( + self.collection_name, + vectors_config=rest.VectorParams( + size=vector_size, + distance=distance, + ), + ) + + # Create the payload index for the document_id metadata attribute, as it is + # used to delete the document related entries + self.client.create_payload_index( + self.collection_name, + field_name="metadata.document_id", + field_type=PayloadSchemaType.KEYWORD, + ) + + # Create the payload index for the created_at attribute, to make the lookup + # by range filters faster + self.client.create_payload_index( + self.collection_name, + field_name="created_at", + field_schema=PayloadSchemaType.INTEGER, + ) diff --git a/datastore/providers/redis_datastore.py b/datastore/providers/redis_datastore.py new file mode 100644 index 000000000..06263f00d --- /dev/null +++ b/datastore/providers/redis_datastore.py @@ -0,0 +1,382 @@ +import asyncio +import logging +import os +import re +import json +import redis.asyncio as redis +import numpy as np + +from redis.commands.search.query import Query as RediSearchQuery +from redis.commands.search.indexDefinition import IndexDefinition, IndexType +from redis.commands.search.field import ( + TagField, + TextField, + NumericField, + VectorField, +) +from typing import Dict, List, Optional +from datastore.datastore import DataStore +from models.models import ( + DocumentChunk, + DocumentMetadataFilter, + DocumentChunkWithScore, + DocumentMetadataFilter, + QueryResult, + QueryWithEmbedding, +) +from services.date import to_unix_timestamp + +# Read environment variables for Redis +REDIS_HOST = os.environ.get("REDIS_HOST", "localhost") +REDIS_PORT = int(os.environ.get("REDIS_PORT", 6379)) +REDIS_PASSWORD = os.environ.get("REDIS_PASSWORD") +REDIS_INDEX_NAME = os.environ.get("REDIS_INDEX_NAME", "index") +REDIS_DOC_PREFIX = os.environ.get("REDIS_DOC_PREFIX", "doc") +REDIS_DISTANCE_METRIC = os.environ.get("REDIS_DISTANCE_METRIC", "COSINE") +REDIS_INDEX_TYPE = os.environ.get("REDIS_INDEX_TYPE", "FLAT") +assert REDIS_INDEX_TYPE in ("FLAT", "HNSW") + +# OpenAI Ada Embeddings Dimension +VECTOR_DIMENSION = 1536 + +# RediSearch constants +REDIS_DEFAULT_ESCAPED_CHARS = re.compile(r"[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ]") +REDIS_SEARCH_SCHEMA = { + "document_id": TagField("$.document_id", as_name="document_id"), + "metadata": { + # "source_id": TagField("$.metadata.source_id", as_name="source_id"), + "source": TagField("$.metadata.source", as_name="source"), + # "author": TextField("$.metadata.author", as_name="author"), + # "created_at": NumericField("$.metadata.created_at", as_name="created_at"), + }, + "embedding": VectorField( + "$.embedding", + REDIS_INDEX_TYPE, + { + "TYPE": "FLOAT64", + "DIM": VECTOR_DIMENSION, + "DISTANCE_METRIC": REDIS_DISTANCE_METRIC, + "INITIAL_CAP": 500, + }, + as_name="embedding", + ), +} + + +def unpack_schema(d: dict): + for v in d.values(): + if isinstance(v, dict): + yield from unpack_schema(v) + else: + yield v + + +class RedisDataStore(DataStore): + def __init__(self, client: redis.Redis): + self.client = client + # Init default metadata with sentinal values in case the document written has no metadata + self._default_metadata = { + field: "_null_" for field in REDIS_SEARCH_SCHEMA["metadata"] + } + + ### Redis Helper Methods ### + + @classmethod + async def init(cls): + """ + Setup the index if it does not exist. + """ + try: + # Connect to the Redis Client + logging.info("Connecting to Redis") + client = redis.Redis( + host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD + ) + except Exception as e: + logging.error(f"Error setting up Redis: {e}") + raise e + + try: + # Check for existence of RediSearch Index + await client.ft(REDIS_INDEX_NAME).info() + logging.info(f"RediSearch index {REDIS_INDEX_NAME} already exists") + except: + # Create the RediSearch Index + logging.info(f"Creating new RediSearch index {REDIS_INDEX_NAME}") + definition = IndexDefinition( + prefix=[REDIS_DOC_PREFIX], index_type=IndexType.JSON + ) + fields = list(unpack_schema(REDIS_SEARCH_SCHEMA)) + await client.ft(REDIS_INDEX_NAME).create_index( + fields=fields, definition=definition + ) + return cls(client) + + @staticmethod + def _redis_key(document_id: str, chunk_id: str) -> str: + """ + Create the JSON key for document chunks in Redis. + + Args: + document_id (str): Document Identifier + chunk_id (str): Chunk Identifier + + Returns: + str: JSON key string. + """ + return f"doc:{document_id}:chunk:{chunk_id}" + + @staticmethod + def _escape(value: str) -> str: + """ + Escape filter value. + + Args: + value (str): Value to escape. + + Returns: + str: Escaped filter value for RediSearch. + """ + + def escape_symbol(match) -> str: + value = match.group(0) + return f"\\{value}" + + return REDIS_DEFAULT_ESCAPED_CHARS.sub(escape_symbol, value) + + def _get_redis_chunk(self, chunk: DocumentChunk) -> dict: + """ + Convert DocumentChunk into a JSON object for storage + in Redis. + + Args: + chunk (DocumentChunk): Chunk of a Document. + + Returns: + dict: JSON object for storage in Redis. + """ + # Convert chunk -> dict + data = chunk.__dict__ + metadata = chunk.metadata.__dict__ + data["chunk_id"] = data.pop("id") + + # Prep Redis Metadata + redis_metadata = dict(self._default_metadata) + if metadata: + for field, value in metadata.items(): + if value: + if field == "created_at": + redis_metadata[field] = to_unix_timestamp(value) # type: ignore + else: + redis_metadata[field] = value + data["metadata"] = redis_metadata + return data + + def _get_redis_query(self, query: QueryWithEmbedding) -> RediSearchQuery: + """ + Convert a QueryWithEmbedding into a RediSearchQuery. + + Args: + query (QueryWithEmbedding): Search query. + + Returns: + RediSearchQuery: Query for RediSearch. + """ + query_str: str = "" + filter_str: str = "" + + # RediSearch field type to query string + def _typ_to_str(typ, field, value) -> str: # type: ignore + if isinstance(typ, TagField): + return f"@{field}:{{{self._escape(value)}}} " + elif isinstance(typ, TextField): + return f"@{field}:{self._escape(value)} " + elif isinstance(typ, NumericField): + num = to_unix_timestamp(value) + match field: + case "start_date": + return f"@{field}:[{num} +inf] " + case "end_date": + return f"@{field}:[-inf {num}] " + + # Build filter + if query.filter: + for field, value in query.filter.__dict__.items(): + if not value: + continue + if field in REDIS_SEARCH_SCHEMA: + filter_str += _typ_to_str(REDIS_SEARCH_SCHEMA[field], field, value) + elif field in REDIS_SEARCH_SCHEMA["metadata"]: + if field == "source": # handle the enum + value = value.value + filter_str += _typ_to_str( + REDIS_SEARCH_SCHEMA["metadata"][field], field, value + ) + elif field in ["start_date", "end_date"]: + filter_str += _typ_to_str( + REDIS_SEARCH_SCHEMA["metadata"]["created_at"], field, value + ) + + # Postprocess filter string + filter_str = filter_str.strip() + filter_str = filter_str if filter_str else "*" + + # Prepare query string + query_str = ( + f"({filter_str})=>[KNN {query.top_k} @embedding $embedding as score]" + ) + return ( + RediSearchQuery(query_str) + .sort_by("score") + .paging(0, query.top_k) + .dialect(2) + ) + + async def _redis_delete(self, keys: List[str]): + """ + Delete a list of keys from Redis. + + Args: + keys (List[str]): List of keys to delete. + """ + # Delete the keys + await asyncio.gather(*[self.client.delete(key) for key in keys]) + + ####### + + async def _upsert(self, chunks: Dict[str, List[DocumentChunk]]) -> List[str]: + """ + Takes in a list of list of document chunks and inserts them into the database. + Return a list of document ids. + """ + # Initialize a list of ids to return + doc_ids: List[str] = [] + + # Loop through the dict items + for doc_id, chunk_list in chunks.items(): + + # Append the id to the ids list + doc_ids.append(doc_id) + + # Write all chunks associated with a document + n = min(len(chunk_list), 50) + semaphore = asyncio.Semaphore(n) + + async def _write(chunk: DocumentChunk): + async with semaphore: + # Get redis key and chunk object + key = self._redis_key(doc_id, chunk.id) # type: ignore + data = self._get_redis_chunk(chunk) + await self.client.json().set(key, "$", data) + + # Concurrently gather writes + await asyncio.gather(*[_write(chunk) for i, chunk in enumerate(chunk_list)]) + return doc_ids + + async def _query( + self, + queries: List[QueryWithEmbedding], + ) -> List[QueryResult]: + """ + Takes in a list of queries with embeddings and filters and + returns a list of query results with matching document chunks and scores. + """ + # Prepare results object + results: List[QueryResult] = [] + + # Use asyncio for concurrent search + n = min(len(queries), 50) + semaphore = asyncio.Semaphore(n) + + async def _single_query(query: QueryWithEmbedding) -> QueryResult: + logging.info(f"Query: {query.query}") + # Extract Redis query + redis_query: RediSearchQuery = self._get_redis_query(query) + # Perform a single query + async with semaphore: + embedding = np.array(query.embedding, dtype=np.float64).tobytes() + # Get vector query response from Redis + query_response = await self.client.ft(REDIS_INDEX_NAME).search( + redis_query, {"embedding": embedding} # type: ignore + ) + return query_response + + # Concurrently gather query results + logging.info(f"Gathering {len(queries)} query results", flush=True) # type: ignore + query_responses = await asyncio.gather( + *[_single_query(query) for query in queries] + ) + + # Iterate through responses and construct results + for query, query_response in zip(queries, query_responses): + + # Iterate through nearest neighbor documents + query_results: List[DocumentChunkWithScore] = [] + for doc in query_response.docs: + # Create a document chunk with score object with the result data + doc_json = json.loads(doc.json) + result = DocumentChunkWithScore( + id=doc_json["metadata"]["document_id"], + score=doc.score, + text=doc_json["text"], + metadata=doc_json["metadata"], + ) + query_results.append(result) + + # Add to overall results + results.append(QueryResult(query=query.query, results=query_results)) + return results + + async def _find_keys(self, pattern: str) -> List[str]: + return await self.client.keys(pattern=pattern) + + async def delete( + self, + ids: Optional[List[str]] = None, + filter: Optional[DocumentMetadataFilter] = None, + delete_all: Optional[bool] = None, + ) -> bool: + """ + Removes vectors by ids, filter, or everything in the datastore. + Returns whether the operation was successful. + """ + # Delete all vectors from the index if delete_all is True + if delete_all: + try: + logging.info(f"Deleting all documents from index") + await self.client.ft(REDIS_INDEX_NAME).dropindex(True) + logging.info(f"Deleted all documents successfully") + return True + except Exception as e: + logging.info(f"Error deleting all documents: {e}") + raise e + + # Delete by filter + if filter: + # TODO - extend this to work with other metadata filters? + if filter.document_id: + try: + keys = await self._find_keys(f"{REDIS_DOC_PREFIX}:{filter.document_id}:*") + await self._redis_delete(keys) + logging.info(f"Deleted document {filter.document_id} successfully") + except Exception as e: + logging.info(f"Error deleting document {filter.document_id}: {e}") + raise e + + # Delete by explicit ids (Redis keys) + if ids: + try: + logging.info(f"Deleting document ids {ids}") + keys = [] + # find all keys associated with the document ids + for document_id in ids: + doc_keys = await self._find_keys(pattern=f"{REDIS_DOC_PREFIX}:{document_id}:*") + keys.extend(doc_keys) + # delete all keys + logging.info(f"Deleting {len(keys)} keys from Redis") + await self._redis_delete(keys) + except Exception as e: + logging.info(f"Error deleting ids: {e}") + raise e + + return True diff --git a/datastore/providers/weaviate_datastore.py b/datastore/providers/weaviate_datastore.py new file mode 100644 index 000000000..1ee9d0ae4 --- /dev/null +++ b/datastore/providers/weaviate_datastore.py @@ -0,0 +1,372 @@ +# TODO +import asyncio +from typing import Dict, List, Optional +from loguru import logger +from weaviate import Client +import weaviate +import os +import uuid + +from weaviate.util import generate_uuid5 + +from datastore.datastore import DataStore +from models.models import ( + DocumentChunk, + DocumentChunkMetadata, + DocumentMetadataFilter, + QueryResult, + QueryWithEmbedding, + DocumentChunkWithScore, + Source, +) + + +WEAVIATE_HOST = os.environ.get("WEAVIATE_HOST", "http://127.0.0.1") +WEAVIATE_PORT = os.environ.get("WEAVIATE_PORT", "8080") +WEAVIATE_USERNAME = os.environ.get("WEAVIATE_USERNAME", None) +WEAVIATE_PASSWORD = os.environ.get("WEAVIATE_PASSWORD", None) +WEAVIATE_SCOPES = os.environ.get("WEAVIATE_SCOPE", None) +WEAVIATE_INDEX = os.environ.get("WEAVIATE_INDEX", "OpenAIDocument") + +WEAVIATE_BATCH_SIZE = int(os.environ.get("WEAVIATE_BATCH_SIZE", 20)) +WEAVIATE_BATCH_DYNAMIC = os.environ.get("WEAVIATE_BATCH_DYNAMIC", False) +WEAVIATE_BATCH_TIMEOUT_RETRIES = int(os.environ.get("WEAVIATE_TIMEOUT_RETRIES", 3)) +WEAVIATE_BATCH_NUM_WORKERS = int(os.environ.get("WEAVIATE_BATCH_NUM_WORKERS", 1)) + +SCHEMA = { + "class": WEAVIATE_INDEX, + "description": "The main class", + "properties": [ + { + "name": "chunk_id", + "dataType": ["string"], + "description": "The chunk id", + }, + { + "name": "document_id", + "dataType": ["string"], + "description": "The document id", + }, + { + "name": "text", + "dataType": ["text"], + "description": "The chunk's text", + }, + { + "name": "source", + "dataType": ["string"], + "description": "The source of the data", + }, + { + "name": "source_id", + "dataType": ["string"], + "description": "The source id", + }, + { + "name": "url", + "dataType": ["string"], + "description": "The source url", + }, + { + "name": "created_at", + "dataType": ["date"], + "description": "Creation date of document", + }, + { + "name": "author", + "dataType": ["string"], + "description": "Document author", + }, + ], +} + + +def extract_schema_properties(schema): + properties = schema["properties"] + + return {property["name"] for property in properties} + + +class WeaviateDataStore(DataStore): + def handle_errors(self, results: Optional[List[dict]]) -> List[str]: + if not self or not results: + return [] + + error_messages = [] + for result in results: + if ( + "result" not in result + or "errors" not in result["result"] + or "error" not in result["result"]["errors"] + ): + continue + for message in result["result"]["errors"]["error"]: + error_messages.append(message["message"]) + logger.exception(message["message"]) + + return error_messages + + def __init__(self): + auth_credentials = self._build_auth_credentials() + + url = f"{WEAVIATE_HOST}:{WEAVIATE_PORT}" + + logger.debug( + f"Connecting to weaviate instance at {url} with credential type {type(auth_credentials).__name__}" + ) + self.client = Client(url, auth_client_secret=auth_credentials) + self.client.batch.configure( + batch_size=WEAVIATE_BATCH_SIZE, + dynamic=WEAVIATE_BATCH_DYNAMIC, # type: ignore + callback=self.handle_errors, # type: ignore + timeout_retries=WEAVIATE_BATCH_TIMEOUT_RETRIES, + num_workers=WEAVIATE_BATCH_NUM_WORKERS, + ) + + if self.client.schema.contains(SCHEMA): + current_schema = self.client.schema.get(WEAVIATE_INDEX) + current_schema_properties = extract_schema_properties(current_schema) + + logger.debug( + f"Found index {WEAVIATE_INDEX} with properties {current_schema_properties}" + ) + logger.debug("Will reuse this schema") + else: + new_schema_properties = extract_schema_properties(SCHEMA) + logger.debug( + f"Creating index {WEAVIATE_INDEX} with properties {new_schema_properties}" + ) + self.client.schema.create_class(SCHEMA) + + @staticmethod + def _build_auth_credentials(): + if WEAVIATE_USERNAME and WEAVIATE_PASSWORD: + return weaviate.auth.AuthClientPassword( + WEAVIATE_USERNAME, WEAVIATE_PASSWORD, WEAVIATE_SCOPES + ) + else: + return None + + async def _upsert(self, chunks: Dict[str, List[DocumentChunk]]) -> List[str]: + """ + Takes in a list of list of document chunks and inserts them into the database. + Return a list of document ids. + """ + doc_ids = [] + + with self.client.batch as batch: + for doc_id, doc_chunks in chunks.items(): + logger.debug(f"Upserting {doc_id} with {len(doc_chunks)} chunks") + for doc_chunk in doc_chunks: + # we generate a uuid regardless of the format of the document_id because + # weaviate needs a uuid to store each document chunk and + # a document chunk cannot share the same uuid + doc_uuid = generate_uuid5(doc_chunk, WEAVIATE_INDEX) + metadata = doc_chunk.metadata + doc_chunk_dict = doc_chunk.dict() + doc_chunk_dict.pop("metadata") + for key, value in metadata.dict().items(): + doc_chunk_dict[key] = value + doc_chunk_dict["chunk_id"] = doc_chunk_dict.pop("id") + doc_chunk_dict["source"] = ( + doc_chunk_dict.pop("source").value + if doc_chunk_dict["source"] + else None + ) + embedding = doc_chunk_dict.pop("embedding") + + batch.add_data_object( + uuid=doc_uuid, + data_object=doc_chunk_dict, + class_name=WEAVIATE_INDEX, + vector=embedding, + ) + + doc_ids.append(doc_id) + batch.flush() + return doc_ids + + async def _query( + self, + queries: List[QueryWithEmbedding], + ) -> List[QueryResult]: + """ + Takes in a list of queries with embeddings and filters and returns a list of query results with matching document chunks and scores. + """ + + async def _single_query(query: QueryWithEmbedding) -> QueryResult: + logger.debug(f"Query: {query.query}") + if not hasattr(query, "filter") or not query.filter: + result = ( + self.client.query.get( + WEAVIATE_INDEX, + [ + "chunk_id", + "document_id", + "text", + "source", + "source_id", + "url", + "created_at", + "author", + ], + ) + .with_hybrid(query=query.query, alpha=0.5, vector=query.embedding) + .with_limit(query.top_k) # type: ignore + .with_additional(["score", "vector"]) + .do() + ) + else: + filters_ = self.build_filters(query.filter) + result = ( + self.client.query.get( + WEAVIATE_INDEX, + [ + "chunk_id", + "document_id", + "text", + "source", + "source_id", + "url", + "created_at", + "author", + ], + ) + .with_hybrid(query=query.query, alpha=0.5, vector=query.embedding) + .with_where(filters_) + .with_limit(query.top_k) # type: ignore + .with_additional(["score", "vector"]) + .do() + ) + + query_results: List[DocumentChunkWithScore] = [] + response = result["data"]["Get"][WEAVIATE_INDEX] + + for resp in response: + result = DocumentChunkWithScore( + id=resp["chunk_id"], + text=resp["text"], + embedding=resp["_additional"]["vector"], + score=resp["_additional"]["score"], + metadata=DocumentChunkMetadata( + document_id=resp["document_id"] if resp["document_id"] else "", + source=Source(resp["source"]), + source_id=resp["source_id"], + url=resp["url"], + created_at=resp["created_at"], + author=resp["author"], + ), + ) + query_results.append(result) + return QueryResult(query=query.query, results=query_results) + + return await asyncio.gather(*[_single_query(query) for query in queries]) + + async def delete( + self, + ids: Optional[List[str]] = None, + filter: Optional[DocumentMetadataFilter] = None, + delete_all: Optional[bool] = None, + ) -> bool: + # TODO + """ + Removes vectors by ids, filter, or everything in the datastore. + Returns whether the operation was successful. + """ + if delete_all: + logger.debug(f"Deleting all vectors in index {WEAVIATE_INDEX}") + self.client.schema.delete_all() + return True + + if ids: + operands = [ + {"path": ["document_id"], "operator": "Equal", "valueString": id} + for id in ids + ] + + where_clause = {"operator": "Or", "operands": operands} + + logger.debug(f"Deleting vectors from index {WEAVIATE_INDEX} with ids {ids}") + result = self.client.batch.delete_objects( + class_name=WEAVIATE_INDEX, where=where_clause, output="verbose" + ) + + if not bool(result["results"]["successful"]): + logger.debug( + f"Failed to delete the following objects: {result['results']['objects']}" + ) + + if filter: + where_clause = self.build_filters(filter) + + logger.debug( + f"Deleting vectors from index {WEAVIATE_INDEX} with filter {where_clause}" + ) + result = self.client.batch.delete_objects( + class_name=WEAVIATE_INDEX, where=where_clause + ) + + if not bool(result["results"]["successful"]): + logger.debug( + f"Failed to delete the following objects: {result['results']['objects']}" + ) + + return True + + @staticmethod + def build_filters(filter): + if filter.source: + filter.source = filter.source.value + + operands = [] + filter_conditions = { + "source": { + "operator": "Equal", + "value": "query.filter.source.value", + "value_key": "valueString", + }, + "start_date": {"operator": "GreaterThanEqual", "value_key": "valueDate"}, + "end_date": {"operator": "LessThanEqual", "value_key": "valueDate"}, + "default": {"operator": "Equal", "value_key": "valueString"}, + } + + for attr, value in filter.__dict__.items(): + if value is not None: + filter_condition = filter_conditions.get( + attr, filter_conditions["default"] + ) + value_key = filter_condition["value_key"] + + operand = { + "path": [ + attr + if not (attr == "start_date" or attr == "end_date") + else "created_at" + ], + "operator": filter_condition["operator"], + value_key: value, + } + + operands.append(operand) + + return {"operator": "And", "operands": operands} + + @staticmethod + def _is_valid_weaviate_id(candidate_id: str) -> bool: + """ + Check if candidate_id is a valid UUID for weaviate's use + + Weaviate supports UUIDs of version 3, 4 and 5. This function checks if the candidate_id is a valid UUID of one of these versions. + See https://weaviate.io/developers/weaviate/more-resources/faq#q-are-there-restrictions-on-uuid-formatting-do-i-have-to-adhere-to-any-standards + for more information. + """ + acceptable_version = [3, 4, 5] + + try: + result = uuid.UUID(candidate_id) + if result.version not in acceptable_version: + return False + else: + return True + except ValueError: + return False diff --git a/datastore/providers/zilliz_datastore.py b/datastore/providers/zilliz_datastore.py new file mode 100644 index 000000000..5c5c90a53 --- /dev/null +++ b/datastore/providers/zilliz_datastore.py @@ -0,0 +1,409 @@ +import os +import asyncio + +from typing import Dict, List, Optional +from pymilvus import ( + Collection, + connections, + utility, + FieldSchema, + DataType, + CollectionSchema, +) +from uuid import uuid4 + + +from services.date import to_unix_timestamp +from datastore.datastore import DataStore +from models.models import ( + DocumentChunk, + DocumentChunkMetadata, + Source, + DocumentMetadataFilter, + QueryResult, + QueryWithEmbedding, + DocumentChunkWithScore, +) + +ZILLIZ_COLLECTION = os.environ.get("ZILLIZ_COLLECTION") or "c" + uuid4().hex +ZILLIZ_URI = os.environ.get("ZILLIZ_URI") +ZILLIZ_USER = os.environ.get("ZILLIZ_USER") +ZILLIZ_PASSWORD = os.environ.get("ZILLIZ_PASSWORD") +ZILLIZ_USE_SECURITY = False if ZILLIZ_PASSWORD is None else True + + +UPSERT_BATCH_SIZE = 100 +OUTPUT_DIM = 1536 + + +class Required: + pass + + +# The fields names that we are going to be storing within Zilliz Cloud, the field declaration for schema creation, and the default value +SCHEMA = [ + ( + "pk", + FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True), + Required, + ), + ( + "embedding", + FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=OUTPUT_DIM), + Required, + ), + ( + "text", + FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=65535), + Required, + ), + ( + "document_id", + FieldSchema(name="document_id", dtype=DataType.VARCHAR, max_length=65535), + "", + ), + ( + "source_id", + FieldSchema(name="source_id", dtype=DataType.VARCHAR, max_length=65535), + "", + ), + ( + "id", + FieldSchema( + name="id", + dtype=DataType.VARCHAR, + max_length=65535, + ), + "", + ), + ( + "source", + FieldSchema(name="source", dtype=DataType.VARCHAR, max_length=65535), + "", + ), + ("url", FieldSchema(name="url", dtype=DataType.VARCHAR, max_length=65535), ""), + ("created_at", FieldSchema(name="created_at", dtype=DataType.INT64), -1), + ( + "author", + FieldSchema(name="author", dtype=DataType.VARCHAR, max_length=65535), + "", + ), +] + + +class ZillizDataStore(DataStore): + def __init__(self, create_new: Optional[bool] = False): + """Create a Zilliz DataStore. + + The Zilliz Datastore allows for storing your indexes and metadata within a Zilliz Cloud instance. + + Args: + create_new (Optional[bool], optional): Whether to overwrite if collection already exists. Defaults to True. + """ + + # # TODO: Auto infer the fields + # non_string_fields = [('embedding', List[float]), ('created_at', int)] + # fields_to_index = list(DocumentChunkMetadata.__fields__.keys()) + # fields_to_index = list(DocumentChunk.__fields__.keys()) + + # Check if the connection already exists + try: + i = [ + connections.get_connection_addr(x[0]) + for x in connections.list_connections() + ].index({"address": ZILLIZ_URI, "user": ZILLIZ_USER}) + self.alias = connections.list_connections()[i][0] + except ValueError: + # Connect to the Zilliz instance using the passed in Enviroment variables + self.alias = uuid4().hex + connections.connect(alias=self.alias, uri=ZILLIZ_URI, user=ZILLIZ_USER, password=ZILLIZ_PASSWORD, secure=ZILLIZ_USE_SECURITY) # type: ignore + + self._create_collection(create_new) # type: ignore + + def _create_collection(self, create_new: bool) -> None: + """Create a collection based on enviroment and passed in variables. + + Args: + create_new (bool): Whether to overwrite if collection already exists. + """ + + # If the collection exists and create_new is True, drop the existing collection + if utility.has_collection(ZILLIZ_COLLECTION, using=self.alias) and create_new: + utility.drop_collection(ZILLIZ_COLLECTION, using=self.alias) + + # Check if the collection doesnt exist + if utility.has_collection(ZILLIZ_COLLECTION, using=self.alias) is False: + # If it doesnt exist use the field params from init to create a new schem + schema = [field[1] for field in SCHEMA] + schema = CollectionSchema(schema) + # Use the schema to create a new collection + self.col = Collection( + ZILLIZ_COLLECTION, + schema=schema, + consistency_level="Strong", + using=self.alias, + ) + else: + # If the collection exists, point to it + self.col = Collection(ZILLIZ_COLLECTION, consistency_level="Strong", using=self.alias) # type: ignore + + # If no index on the collection, create one + if len(self.col.indexes) == 0: + i_p = {"metric_type": "L2", "index_type": "AUTOINDEX", "params": {}} + self.col.create_index("embedding", index_params=i_p) + + self.col.load() + + async def _upsert(self, chunks: Dict[str, List[DocumentChunk]]) -> List[str]: + """Upsert chunks into the datastore. + + Args: + chunks (Dict[str, List[DocumentChunk]]): A list of DocumentChunks to insert + + Raises: + e: Error in upserting data. + + Returns: + List[str]: The document_id's that were inserted. + """ + # The doc id's to return for the upsert + doc_ids: List[str] = [] + # List to collect all the insert data + insert_data = [[] for _ in range(len(SCHEMA) - 1)] + # Go through each document chunklist and grab the data + for doc_id, chunk_list in chunks.items(): + # Append the doc_id to the list we are returning + doc_ids.append(doc_id) + # Examine each chunk in the chunklist + for chunk in chunk_list: + # Extract data from the chunk + list_of_data = self._get_values(chunk) + # Check if the data is valid + if list_of_data is not None: + # Append each field to the insert_data + for x in range(len(insert_data)): + insert_data[x].append(list_of_data[x]) + # Slice up our insert data into batches + batches = [ + insert_data[i : i + UPSERT_BATCH_SIZE] + for i in range(0, len(insert_data), UPSERT_BATCH_SIZE) + ] + + # Attempt to insert each batch into our collection + for batch in batches: + # Check if empty batch + if len(batch[0]) != 0: + try: + print(f"Upserting batch of size {len(batch[0])}") + self.col.insert(batch) + print(f"Upserted batch successfully") + except Exception as e: + print(f"Error upserting batch: {e}") + raise e + + # This setting perfoms flushes after insert. Small insert == bad to use + # self.col.flush() + + return doc_ids + + def _get_values(self, chunk: DocumentChunk) -> List[any] | None: # type: ignore + """Convert the chunk into a list of values to insert whose indexes align with fields. + + Args: + chunk (DocumentChunk): The chunk to convert. + + Returns: + List (any): The values to insert. + """ + # Convert DocumentChunk and its sub models to dict + values = chunk.dict() + # Unpack the metadata into the same dict + meta = values.pop("metadata") + values.update(meta) + + # Convert date to int timestamp form + if values["created_at"]: + values["created_at"] = to_unix_timestamp(values["created_at"]) + + # If source exists, change from Source object to the string value it holds + if values["source"]: + values["source"] = values["source"].value + # List to collect data we will return + ret = [] + # Grab data responding to each field excluding the hidden auto pk field + for key, _, default in SCHEMA[1:]: + # Grab the data at the key and default to our defaults set in init + x = values.get(key) or default + # If one of our required fields is missing, ignore the entire entry + if x is Required: + print("Chunk " + values["id"] + " missing " + key + " skipping") + return None + # Add the corresponding value if it passes the tests + ret.append(x) + return ret + + async def _query( + self, + queries: List[QueryWithEmbedding], + ) -> List[QueryResult]: + """Query the QueryWithEmbedding against the ZillizDocumentSearch + + Search the embedding and its filter in the collection. + + Args: + queries (List[QueryWithEmbedding]): The list of searches to perform. + + Returns: + List[QueryResult]: Results for each search. + """ + # Async to perform the query, adapted from pinecone implementation + async def _single_query(query: QueryWithEmbedding) -> QueryResult: + + filter = None + # Set the filter to expression that is valid for Zilliz + if query.filter != None: + # Either a valid filter or None will be returned + filter = self._get_filter(query.filter) + + # Perform our search + res = self.col.search( + data=[query.embedding], + anns_field="embedding", + param={"metric_type": "L2", "params": {}}, + limit=query.top_k, + expr=filter, + output_fields=[ + field[0] for field in SCHEMA[2:] + ], # Ignoring pk, embedding + ) + # Results that will hold our DocumentChunkWithScores + results = [] + # Parse every result for our search + for hit in res[0]: # type: ignore + # The distance score for the search result, falls under DocumentChunkWithScore + score = hit.score + # Our metadata info, falls under DocumentChunkMetadata + metadata = {} + # Grab the values that correspond to our fields, ignore pk and embedding. + for x in [field[0] for field in SCHEMA[2:]]: + metadata[x] = hit.entity.get(x) + # If the source isnt valid, conver to None + if metadata["source"] not in Source.__members__: + metadata["source"] = None + # Text falls under the DocumentChunk + text = metadata.pop("text") + # Id falls under the DocumentChunk + ids = metadata.pop("id") + chunk = DocumentChunkWithScore( + id=ids, + score=score, + text=text, + metadata=DocumentChunkMetadata(**metadata), + ) + results.append(chunk) + + # TODO: decide on doing queries to grab the embedding itself, slows down performance as double query occurs + + return QueryResult(query=query.query, results=results) + + results: List[QueryResult] = await asyncio.gather( + *[_single_query(query) for query in queries] + ) + return results + + async def delete( + self, + ids: Optional[List[str]] = None, + filter: Optional[DocumentMetadataFilter] = None, + delete_all: Optional[bool] = None, + ) -> bool: + """Delete the entities based either on the chunk_id of the vector, + + Args: + ids (Optional[List[str]], optional): The document_ids to delete. Defaults to None. + filter (Optional[DocumentMetadataFilter], optional): The filter to delet by. Defaults to None. + delete_all (Optional[bool], optional): Whether to drop the collection and recreate it. Defaults to None. + """ + # If deleting all, drop and create the new collection + if delete_all: + # Release the collection from memory + self.col.release() + # Drop the collection + self.col.drop() + # Recreate the new collection + self._create_collection(True) + return True + + # Keep track of how many we have deleted for later printing + delete_count = 0 + + # Check if empty ids + if ids != None: + if len(ids) != 0: + # Add quotation marks around the string format id + ids = ['"' + str(id) + '"' for id in ids] + # Query for the pk's of entries that match id's + ids = self.col.query(f"document_id in [{','.join(ids)}]") + # Convert to list of pks + ids = [str(entry["pk"]) for entry in ids] # type: ignore + # Check to see if there are valid pk's to delete + if len(ids) != 0: + # Delete the entries for each pk + res = self.col.delete(f"pk in [{','.join(ids)}]") + # Incremet our deleted count + delete_count += int(res.delete_count) # type: ignore + + # Check if empty filter + if filter != None: + # Convert filter to Zilliz expression + filter = self._get_filter(filter) # type: ignore + # Check if there is anything to filter + if len(filter) != 0: # type: ignore + # Query for the pk's of entries that match filter + filter = self.col.query(filter) # type: ignore + # Convert to list of pks + filter = [str(entry["pk"]) for entry in filter] # type: ignore + # Check to see if there are valid pk's to delete + if len(filter) != 0: # type: ignore + # Delete the entries + res = self.col.delete(f"pk in [{','.join(filter)}]") # type: ignore + # Increment our delete count + delete_count += int(res.delete_count) # type: ignore + + # This setting perfoms flushes after delete. Small delete == bad to use + # self.col.flush() + + return True + + def _get_filter(self, filter: DocumentMetadataFilter) -> Optional[str]: + """Converts a DocumentMetdataFilter to the expression that Zilliz takes. + + Args: + filter (DocumentMetadataFilter): The Filter to convert to Zilliz expression. + + Returns: + Optional[str]: The filter if valid, otherwise None. + """ + filters = [] + # Go through all the fields and thier values + for field, value in filter.dict().items(): + # Check if the Value is empty + if value is not None: + # Convert start_date to int and add greater than or equal logic + if field == "start_date": + filters.append( + "(created_at >= " + str(to_unix_timestamp(value)) + ")" + ) + # Convert end_date to int and add less than or equal logic + elif field == "end_date": + filters.append( + "(created_at <= " + str(to_unix_timestamp(value)) + ")" + ) + # Convert Source to its string value and check equivalency + elif field == "source": + filters.append("(" + field + ' == "' + str(value.value) + '")') + # Check equivalency of rest of string fields + else: + filters.append("(" + field + ' == "' + str(value) + '")') + # Join all our expressions with `and`` + return " and ".join(filters) diff --git a/examples/authentication-methods/no-auth/ai-plugin.json b/examples/authentication-methods/no-auth/ai-plugin.json new file mode 100644 index 000000000..bf538beaf --- /dev/null +++ b/examples/authentication-methods/no-auth/ai-plugin.json @@ -0,0 +1,18 @@ +{ + "schema_version": "v1", + "name_for_model": "retrieval", + "name_for_human": "Retrieval Plugin", + "description_for_model": "Plugin for searching through the user's documents (such as files, emails, and more) to find answers to questions and retrieve relevant information. Use it whenever a user asks something that might be found in their personal information.", + "description_for_human": "Plugin to search through your personal documents.", + "auth": { + "type": "none" + }, + "api": { + "type": "openapi", + "url": "https://your-app-url.com/.well-known/openapi.yaml" + }, + "logo_url": "https://your-app-url.com/.well-known/logo.png", + "contact_email": "hello@contact.com", + "legal_info_url": "hello@legal.com" +} + diff --git a/examples/authentication-methods/no-auth/main.py b/examples/authentication-methods/no-auth/main.py new file mode 100644 index 000000000..c8d5da851 --- /dev/null +++ b/examples/authentication-methods/no-auth/main.py @@ -0,0 +1,133 @@ +# This is a version of the main.py file found in ../../../server/main.py without authentication. +# Copy and paste this into the main file at ../../../server/main.py if you choose to use no authentication for your retrieval plugin. + +import os +import uvicorn +from fastapi import FastAPI, File, HTTPException, Depends, Body, UploadFile +from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials +from fastapi.staticfiles import StaticFiles + +from models.api import ( + DeleteRequest, + DeleteResponse, + QueryRequest, + QueryResponse, + UpsertRequest, + UpsertResponse, +) +from datastore.factory import get_datastore +from services.file import get_document_from_file + + +app = FastAPI() +app.mount("/.well-known", StaticFiles(directory=".well-known"), name="static") + +# Create a sub-application, in order to access just the query endpoints in the OpenAPI schema, found at http://0.0.0.0:8000/sub/openapi.json when the app is running locally +sub_app = FastAPI( + title="Retrieval Plugin API", + description="A retrieval API for querying and filtering documents based on natural language queries and metadata", + version="1.0.0", + servers=[{"url": "https://your-app-url.com"}], +) +app.mount("/sub", sub_app) + + +@app.post( + "/upsert-file", + response_model=UpsertResponse, +) +async def upsert_file( + file: UploadFile = File(...), +): + document = await get_document_from_file(file) + + try: + ids = await datastore.upsert([document]) + return UpsertResponse(ids=ids) + except Exception as e: + print("Error:", e) + raise HTTPException(status_code=500, detail=f"str({e})") + + +@app.post( + "/upsert", + response_model=UpsertResponse, +) +async def upsert( + request: UpsertRequest = Body(...), +): + try: + ids = await datastore.upsert(request.documents) + return UpsertResponse(ids=ids) + except Exception as e: + print("Error:", e) + raise HTTPException(status_code=500, detail="Internal Service Error") + + +@app.post( + "/query", + response_model=QueryResponse, +) +async def query_main( + request: QueryRequest = Body(...), +): + try: + results = await datastore.query( + request.queries, + ) + return QueryResponse(results=results) + except Exception as e: + print("Error:", e) + raise HTTPException(status_code=500, detail="Internal Service Error") + + +@sub_app.post( + "/query", + response_model=QueryResponse, + description='Accepts an array of search query objects, each with a natural language query string ("query") and an optional metadata filter ("filter"). Filters are not necessary in most cases, but can sometimes help refine search results based on criteria such as document source or time period. Send multiple queries to compare information from different sources or break down complex questions into sub-questions. If you receive a ResponseTooLargeError, try splitting up the queries into multiple calls to this endpoint.', +) +async def query( + request: QueryRequest = Body(...), +): + try: + results = await datastore.query( + request.queries, + ) + return QueryResponse(results=results) + except Exception as e: + print("Error:", e) + raise HTTPException(status_code=500, detail="Internal Service Error") + + +@app.delete( + "/delete", + response_model=DeleteResponse, +) +async def delete( + request: DeleteRequest = Body(...), +): + if not (request.ids or request.filter or request.delete_all): + raise HTTPException( + status_code=400, + detail="One of ids, filter, or delete_all is required", + ) + try: + success = await datastore.delete( + ids=request.ids, + filter=request.filter, + delete_all=request.delete_all, + ) + return DeleteResponse(success=success) + except Exception as e: + print("Error:", e) + raise HTTPException(status_code=500, detail="Internal Service Error") + + +@app.on_event("startup") +async def startup(): + global datastore + datastore = await get_datastore() + + +def start(): + uvicorn.run("server.main:app", host="0.0.0.0", port=8000, reload=True) diff --git a/examples/authentication-methods/oauth/ai-plugin.json b/examples/authentication-methods/oauth/ai-plugin.json new file mode 100644 index 000000000..89d70c3c8 --- /dev/null +++ b/examples/authentication-methods/oauth/ai-plugin.json @@ -0,0 +1,25 @@ +{ + "schema_version": "v1", + "name_for_model": "retrieval", + "name_for_human": "Retrieval Plugin", + "description_for_model": "Plugin for searching through the user's documents (such as files, emails, and more) to find answers to questions and retrieve relevant information. Use it whenever a user asks something that might be found in their personal information.", + "description_for_human": "Plugin to search through your personal documents.", + "auth" : { + "type":"oauth", + "client_url":"e.g. https:///oauth/v2/authorize", + "authorization_url":"e.g. https:///api/oauth.v2.access", + "scope":"search:read", + "authorization_content_type":"application/x-www-form-urlencoded", + "verification_tokens":{ + "openai":"" + } + }, + "api":{ + "url": "https://your-app-url.com/.well-known/openapi.yaml", + "has_user_authentication":true, + "type":"openapi" + }, + "logo_url": "https://your-app-url.com/.well-known/logo.png", + "contact_email": "hello@contact.com", + "legal_info_url": "hello@legal.com" +} diff --git a/examples/authentication-methods/service-http/ai-plugin.json b/examples/authentication-methods/service-http/ai-plugin.json new file mode 100644 index 000000000..d7b060767 --- /dev/null +++ b/examples/authentication-methods/service-http/ai-plugin.json @@ -0,0 +1,22 @@ +{ + "schema_version": "v1", + "name_for_model": "retrieval", + "name_for_human": "Retrieval Plugin", + "description_for_model": "Plugin for searching through the user's documents (such as files, emails, and more) to find answers to questions and retrieve relevant information. Use it whenever a user asks something that might be found in their personal information.", + "description_for_human": "Plugin to search through your personal documents.", + "auth":{ + "type":"service_http", + "authorization_type":"bearer", + "verification_tokens":{ + "openai":"" + } + }, + "api":{ + "url": "https://your-app-url.com/.well-known/openapi.yaml", + "has_user_authentication":false, + "type":"openapi" + }, + "logo_url": "https://your-app-url.com/.well-known/logo.png", + "contact_email": "hello@contact.com", + "legal_info_url": "hello@legal.com" +} diff --git a/examples/authentication-methods/user-http/ai-plugin.json b/examples/authentication-methods/user-http/ai-plugin.json new file mode 100644 index 000000000..c4495251a --- /dev/null +++ b/examples/authentication-methods/user-http/ai-plugin.json @@ -0,0 +1,19 @@ +{ + "schema_version": "v1", + "name_for_model": "retrieval", + "name_for_human": "Retrieval Plugin", + "description_for_model": "Plugin for searching through the user's documents (such as files, emails, and more) to find answers to questions and retrieve relevant information. Use it whenever a user asks something that might be found in their personal information.", + "description_for_human": "Plugin to search through your personal documents.", + "auth": { + "type": "user_http", + "authorization_type": "bearer" + }, + "api": { + "type": "openapi", + "url": "https://your-app-url.com/.well-known/openapi.yaml", + "has_user_authentication": false + }, + "logo_url": "https://your-app-url.com/.well-known/logo.png", + "contact_email": "hello@contact.com", + "legal_info_url": "hello@legal.com" +} \ No newline at end of file diff --git a/examples/docker/milvus/self-hosted/docker-compose.yml b/examples/docker/milvus/self-hosted/docker-compose.yml new file mode 100644 index 000000000..795f3bf25 --- /dev/null +++ b/examples/docker/milvus/self-hosted/docker-compose.yml @@ -0,0 +1,52 @@ +version: '3.5' + +services: + etcd: + container_name: milvus-etcd + image: quay.io/coreos/etcd:v3.5.5 + environment: + - ETCD_AUTO_COMPACTION_MODE=revision + - ETCD_AUTO_COMPACTION_RETENTION=1000 + - ETCD_QUOTA_BACKEND_BYTES=4294967296 + - ETCD_SNAPSHOT_COUNT=50000 + volumes: + - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd + command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd + + minio: + container_name: milvus-minio + image: minio/minio:RELEASE.2022-03-17T06-34-49Z + environment: + MINIO_ACCESS_KEY: minioadmin + MINIO_SECRET_KEY: minioadmin + ports: + - "9001:9001" + - "9000:9000" + volumes: + - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data + command: minio server /minio_data --console-address ":9001" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 30s + timeout: 20s + retries: 3 + + standalone: + container_name: milvus-standalone + image: milvusdb/milvus:v2.2.3 + command: ["milvus", "run", "standalone"] + environment: + ETCD_ENDPOINTS: etcd:2379 + MINIO_ADDRESS: minio:9000 + volumes: + - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus + ports: + - "19530:19530" + - "9091:9091" + depends_on: + - "etcd" + - "minio" + +networks: + default: + name: milvus diff --git a/examples/docker/qdrant/README.md b/examples/docker/qdrant/README.md new file mode 100644 index 000000000..6d00b25ae --- /dev/null +++ b/examples/docker/qdrant/README.md @@ -0,0 +1,46 @@ +# Running the Retrieval Plugin with Qdrant in Docker Containers + +To set up the ChatGPT retrieval plugin with a single instance of a Qdrant vector database, follow these steps: + +## Set Environment Variables + +Set the following environment variables: + +```bash +# Provide your own OpenAI API key in order to start. +export OPENAI_API_KEY="" +# This is an example of a minimal token generated by https://jwt.io/ +export BEARER_TOKEN="" +``` + +## Run Qdrant and the Retrieval Plugin in Docker Containers + +Both Docker containers might be launched with docker-compose: + +```bash +docker-compose up -d +``` + +## Store the Documents + +Store an initial batch of documents by calling the `/upsert` endpoint: + +```bash +curl -X POST \ + -H "Content-type: application/json" \ + -H "Authorization: Bearer $BEARER_TOKEN" \ + --data-binary '@documents.json' \ + "http://localhost:80/upsert" +``` + +## Send a Test Query + +You can query Qdrant to find relevant document chunks by calling the `/query` endpoint: + +```bash +curl -X POST \ + -H "Content-type: application/json" \ + -H "Authorization: Bearer $BEARER_TOKEN" \ + --data-binary '@queries.json' \ + "http://localhost:80/query" +``` diff --git a/examples/docker/qdrant/docker-compose.yaml b/examples/docker/qdrant/docker-compose.yaml new file mode 100644 index 000000000..ead9fd6e3 --- /dev/null +++ b/examples/docker/qdrant/docker-compose.yaml @@ -0,0 +1,17 @@ +services: + retrieval-app: + build: + context: ../../../ + dockerfile: Dockerfile + image: openai/chatgpt-retrieval-plugin + ports: + - "80:80" + depends_on: + - qdrant + environment: + DATASTORE: "qdrant" + QDRANT_URL: "http://qdrant" + BEARER_TOKEN: "${BEARER_TOKEN}" + OPENAI_API_KEY: "${OPENAI_API_KEY}" + qdrant: + image: qdrant/qdrant:v1.0.3 \ No newline at end of file diff --git a/examples/docker/qdrant/documents.json b/examples/docker/qdrant/documents.json new file mode 100644 index 000000000..7dc6572fb --- /dev/null +++ b/examples/docker/qdrant/documents.json @@ -0,0 +1,23 @@ +{ + "documents": [ + { + "id": "openai", + "text": "OpenAI is an AI research and deployment company. Our mission is to ensure that artificial general intelligence benefits all of humanity.", + "metadata": { + "created_at": "2023-03-14" + } + }, + { + "id": "chatgpt", + "text": "ChatGPT is a sibling model to InstructGPT, which is trained to follow an instruction in a prompt and provide a detailed response. The dialogue format makes it possible for ChatGPT to answer followup questions, admit its mistakes, challenge incorrect premises, and reject inappropriate requests." + }, + { + "id": "qdrant", + "text": "Qdrant is a vector similarity engine & vector database. It deploys as an API service providing search for the nearest high-dimensional vectors. With Qdrant, embeddings or neural network encoders can be turned into full-fledged applications for matching, searching, recommending, and much more!", + "metadata": { + "created_at": "2023-03-14", + "author": "Kacper Łukawski" + } + } + ] +} \ No newline at end of file diff --git a/examples/docker/qdrant/queries.json b/examples/docker/qdrant/queries.json new file mode 100644 index 000000000..6d967e20d --- /dev/null +++ b/examples/docker/qdrant/queries.json @@ -0,0 +1,7 @@ +{ + "queries": [ + { + "query": "What vector database should I use?" + } + ] +} \ No newline at end of file diff --git a/examples/docker/redis/docker-compose.yml b/examples/docker/redis/docker-compose.yml new file mode 100644 index 000000000..b3c197e08 --- /dev/null +++ b/examples/docker/redis/docker-compose.yml @@ -0,0 +1,18 @@ +version: "3.9" + +services: + redis: + image: redis/redis-stack-server:latest + ports: + - "6379:6379" + volumes: + - redis_data:/data + healthcheck: + test: ["CMD", "redis-cli", "-h", "localhost", "-p", "6379", "ping"] + interval: 2s + timeout: 1m30s + retries: 5 + start_period: 5s + +volumes: + redis_data: \ No newline at end of file diff --git a/examples/memory/README.md b/examples/memory/README.md new file mode 100644 index 000000000..ccf2e03cf --- /dev/null +++ b/examples/memory/README.md @@ -0,0 +1,15 @@ +# ChatGPT Retrieval Plugin with Memory + +This example demonstrates how to give ChatGPT the ability to remember information from conversations and store it in the retrieval plugin for later use. By allowing the model to access the `/upsert` endpoint, it can save snippets from the conversation to the vector database and retrieve them when needed. + +## Setup + +To enable ChatGPT to save information from conversations, follow these steps: + +- Copy the contents of [openapi.yaml](openapi.yaml) into the main [openapi.yaml](../../.well-known/openapi.yaml) file. + +- Copy the contents of [ai-plugin.json](ai-plugin.json) into the main [ai-plugin.json](../../.well-known/ai-plugin.json) file. + +**Optional:** If you make any changes to the plugin instructions or metadata models, you can also copy the contents of [main.py](main.py) into the main [main.py](../../server/main.py) file. This will allow you to access the openapi.json at `http://0.0.0.0:8000/sub/openapi.json` when you run the app locally. You can convert from JSON to YAML format with [Swagger Editor](https://editor.swagger.io/). Alternatively, you can replace the openapi.yaml file with an openapi.json file. + +After completing these steps, ChatGPT will be able to access your plugin's `/upsert` endpoint and save snippets from the conversation to the vector database. This enables the model to remember information from previous conversations and retrieve it when needed. diff --git a/examples/memory/ai-plugin.json b/examples/memory/ai-plugin.json new file mode 100644 index 000000000..05394b2f7 --- /dev/null +++ b/examples/memory/ai-plugin.json @@ -0,0 +1,19 @@ +{ + "schema_version": "v1", + "name_for_model": "retrieval", + "name_for_human": "Retrieval Plugin", + "description_for_model": "Plugin for searching through the user's documents (such as files, emails, and more) to find answers to questions and retrieve relevant information. Use it whenever a user asks something that might be found in their personal information, or asks you to save information for later.", + "description_for_human": "Search through your documents", + "auth": { + "type": "user_http", + "authorization_type": "bearer" + }, + "api": { + "type": "openapi", + "url": "https://your-app-url.com/.well-known/openapi.yaml", + "has_user_authentication": false + }, + "logo_url": "https://your-app-url.com/.well-known/logo.png", + "contact_email": "hello@contact.com", + "legal_info_url": "hello@legal.com" + } \ No newline at end of file diff --git a/examples/memory/main.py b/examples/memory/main.py new file mode 100644 index 000000000..6fad11ca9 --- /dev/null +++ b/examples/memory/main.py @@ -0,0 +1,166 @@ +# This is a version of the main.py file found in ../../server/main.py that also gives ChatGPT access to the upsert endpoint +# (allowing it to save information from the chat back to the vector) database. +# Copy and paste this into the main file at ../../server/main.py if you choose to give the model access to the upsert endpoint +# and want to access the openapi.json when you run the app locally at http://0.0.0.0:8000/sub/openapi.json. +import os +import uvicorn +from fastapi import FastAPI, File, HTTPException, Depends, Body, UploadFile +from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials +from fastapi.staticfiles import StaticFiles + +from models.api import ( + DeleteRequest, + DeleteResponse, + QueryRequest, + QueryResponse, + UpsertRequest, + UpsertResponse, +) +from datastore.factory import get_datastore +from services.file import get_document_from_file + + +app = FastAPI() +app.mount("/.well-known", StaticFiles(directory=".well-known"), name="static") + +# Create a sub-application, in order to access just the upsert and query endpoints in the OpenAPI schema, found at http://0.0.0.0:8000/sub/openapi.json when the app is running locally +sub_app = FastAPI( + title="Retrieval Plugin API", + description="A retrieval API for querying and filtering documents based on natural language queries and metadata", + version="1.0.0", + servers=[{"url": "https://your-app-url.com"}], +) +app.mount("/sub", sub_app) + +bearer_scheme = HTTPBearer() +BEARER_TOKEN = os.environ.get("BEARER_TOKEN") +assert BEARER_TOKEN is not None + + +def validate_token(credentials: HTTPAuthorizationCredentials = Depends(bearer_scheme)): + if credentials.scheme != "Bearer" or credentials.credentials != BEARER_TOKEN: + raise HTTPException(status_code=401, detail="Invalid or missing token") + return credentials + + +@app.post( + "/upsert-file", + response_model=UpsertResponse, +) +async def upsert_file( + file: UploadFile = File(...), + token: HTTPAuthorizationCredentials = Depends(validate_token), +): + document = await get_document_from_file(file) + + try: + ids = await datastore.upsert([document]) + return UpsertResponse(ids=ids) + except Exception as e: + print("Error:", e) + raise HTTPException(status_code=500, detail=f"str({e})") + + +@app.post( + "/upsert", + response_model=UpsertResponse, +) +async def upsert_main( + request: UpsertRequest = Body(...), + token: HTTPAuthorizationCredentials = Depends(validate_token), +): + try: + ids = await datastore.upsert(request.documents) + return UpsertResponse(ids=ids) + except Exception as e: + print("Error:", e) + raise HTTPException(status_code=500, detail="Internal Service Error") + + +@sub_app.post( + "/upsert", + response_model=UpsertResponse, + description="Save information from chat conversations as documents, only if the user asks you to. Accepts an array of documents, each document has a text field with the conversation text and possible questions that could lead to the answer, and metadata including the source (chat) and created_at timestamp. Confirm with the user before saving information, and ask if they want to add details / context.", +) +async def upsert( + request: UpsertRequest = Body(...), + token: HTTPAuthorizationCredentials = Depends(validate_token), +): + try: + ids = await datastore.upsert(request.documents) + return UpsertResponse(ids=ids) + except Exception as e: + print("Error:", e) + raise HTTPException(status_code=500, detail="Internal Service Error") + + +@app.post( + "/query", + response_model=QueryResponse, +) +async def query_main( + request: QueryRequest = Body(...), + token: HTTPAuthorizationCredentials = Depends(validate_token), +): + try: + results = await datastore.query( + request.queries, + ) + return QueryResponse(results=results) + except Exception as e: + print("Error:", e) + raise HTTPException(status_code=500, detail="Internal Service Error") + + +@sub_app.post( + "/query", + response_model=QueryResponse, + description='Accepts an array of search query objects, each with a natural language query string ("query") and an optional metadata filter ("filter"). Filters are not necessary in most cases, but can sometimes help refine search results based on criteria such as document source or time period. Send multiple queries to compare information from different sources or break down complex questions into sub-questions. If you receive a ResponseTooLargeError, try splitting up the queries into multiple calls to this endpoint.', +) +async def query( + request: QueryRequest = Body(...), + token: HTTPAuthorizationCredentials = Depends(validate_token), +): + try: + results = await datastore.query( + request.queries, + ) + return QueryResponse(results=results) + except Exception as e: + print("Error:", e) + raise HTTPException(status_code=500, detail="Internal Service Error") + + +@app.delete( + "/delete", + response_model=DeleteResponse, +) +async def delete( + request: DeleteRequest = Body(...), + token: HTTPAuthorizationCredentials = Depends(validate_token), +): + if not (request.ids or request.filter or request.delete_all): + raise HTTPException( + status_code=400, + detail="One of ids, filter, or delete_all is required", + ) + try: + success = await datastore.delete( + ids=request.ids, + filter=request.filter, + delete_all=request.delete_all, + ) + return DeleteResponse(success=success) + except Exception as e: + print("Error:", e) + raise HTTPException(status_code=500, detail="Internal Service Error") + + +@app.on_event("startup") +async def startup(): + global datastore + datastore = await get_datastore() + + +def start(): + uvicorn.run("server.main:app", host="0.0.0.0", port=8000, reload=True) diff --git a/examples/memory/openapi.yaml b/examples/memory/openapi.yaml new file mode 100644 index 000000000..47d810b21 --- /dev/null +++ b/examples/memory/openapi.yaml @@ -0,0 +1,276 @@ +openapi: 3.0.2 +info: + title: Retrieval Plugin API + description: A retrieval API for querying and filtering documents based on natural language queries and metadata + version: 1.0.0 +servers: + - url: https://your-app-url.com +paths: + /upsert: + post: + summary: Upsert + description: Save information from chat conversations as documents, only if the user asks you to. Accepts an array of documents, each document has a text field with the conversation text and possible questions that could lead to the answer, and metadata including the source (chat) and created_at timestamp. Confirm with the user before saving information, and ask if they want to add details / context. + operationId: upsert_upsert_post + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/UpsertRequest" + required: true + responses: + "200": + description: Successful Response + content: + application/json: + schema: + $ref: "#/components/schemas/UpsertResponse" + "422": + description: Validation Error + content: + application/json: + schema: + $ref: "#/components/schemas/HTTPValidationError" + security: + - HTTPBearer: [] + /query: + post: + summary: Query + description: Accepts an array of search query objects, each with a natural language query string ("query") and an optional metadata filter ("filter"). Filters are not necessary in most cases, but can sometimes help refine search results based on criteria such as document source or time period. Send multiple queries to compare information from different sources or break down complex questions into sub-questions. If you receive a ResponseTooLargeError, try splitting up the queries into multiple calls to this endpoint. + operationId: query_query_post + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/QueryRequest" + required: true + responses: + "200": + description: Successful Response + content: + application/json: + schema: + $ref: "#/components/schemas/QueryResponse" + "422": + description: Validation Error + content: + application/json: + schema: + $ref: "#/components/schemas/HTTPValidationError" + security: + - HTTPBearer: [] +components: + schemas: + Document: + title: Document + required: + - text + type: object + properties: + id: + title: Id + type: string + text: + title: Text + type: string + metadata: + $ref: "#/components/schemas/DocumentMetadata" + DocumentChunkMetadata: + title: DocumentChunkMetadata + type: object + properties: + source: + $ref: "#/components/schemas/Source" + source_id: + title: Source Id + type: string + url: + title: Url + type: string + created_at: + title: Created At + type: string + author: + title: Author + type: string + document_id: + title: Document Id + type: string + DocumentChunkWithScore: + title: DocumentChunkWithScore + required: + - text + - metadata + - score + type: object + properties: + id: + title: Id + type: string + text: + title: Text + type: string + metadata: + $ref: "#/components/schemas/DocumentChunkMetadata" + embedding: + title: Embedding + type: array + items: + type: number + score: + title: Score + type: number + DocumentMetadata: + title: DocumentMetadata + type: object + properties: + source: + $ref: "#/components/schemas/Source" + source_id: + title: Source Id + type: string + url: + title: Url + type: string + created_at: + title: Created At + type: string + author: + title: Author + type: string + DocumentMetadataFilter: + title: DocumentMetadataFilter + type: object + properties: + document_id: + title: Document Id + type: string + source: + $ref: "#/components/schemas/Source" + source_id: + title: Source Id + type: string + author: + title: Author + type: string + start_date: + title: Start Date + type: string + end_date: + title: End Date + type: string + HTTPValidationError: + title: HTTPValidationError + type: object + properties: + detail: + title: Detail + type: array + items: + $ref: "#/components/schemas/ValidationError" + Query: + title: Query + required: + - query + type: object + properties: + query: + title: Query + type: string + filter: + $ref: "#/components/schemas/DocumentMetadataFilter" + top_k: + title: Top K + type: integer + default: 3 + QueryRequest: + title: QueryRequest + required: + - queries + type: object + properties: + queries: + title: Queries + type: array + items: + $ref: "#/components/schemas/Query" + QueryResponse: + title: QueryResponse + required: + - results + type: object + properties: + results: + title: Results + type: array + items: + $ref: "#/components/schemas/QueryResult" + QueryResult: + title: QueryResult + required: + - query + - results + type: object + properties: + query: + title: Query + type: string + results: + title: Results + type: array + items: + $ref: "#/components/schemas/DocumentChunkWithScore" + Source: + title: Source + enum: + - email + - file + - chat + type: string + description: An enumeration. + UpsertRequest: + title: UpsertRequest + required: + - documents + type: object + properties: + documents: + title: Documents + type: array + items: + $ref: "#/components/schemas/Document" + UpsertResponse: + title: UpsertResponse + required: + - ids + type: object + properties: + ids: + title: Ids + type: array + items: + type: string + ValidationError: + title: ValidationError + required: + - loc + - msg + - type + type: object + properties: + loc: + title: Location + type: array + items: + anyOf: + - type: string + - type: integer + msg: + title: Message + type: string + type: + title: Error Type + type: string + securitySchemes: + HTTPBearer: + type: http + scheme: bearer diff --git a/examples/providers/pinecone/semantic-search.ipynb b/examples/providers/pinecone/semantic-search.ipynb new file mode 100644 index 000000000..343c13848 --- /dev/null +++ b/examples/providers/pinecone/semantic-search.ipynb @@ -0,0 +1,809 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Using the Pinecone Retrieval App\n", + "\n", + "In this walkthrough we will see how to use the retrieval API with a Pinecone datastore for *semantic search / question-answering*.\n", + "\n", + "Before running this notebook you should have already initialized the retrieval API and have it running locally or elsewhere. The full instructions for doing this are found in the [project README]().\n", + "\n", + "We will summarize the instructions (specific to the Pinecone datastore) before moving on to the walkthrough." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## App Quickstart" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1. Install Python 3.10 if not already installed.\n", + "\n", + "2. Clone the `retrieval-app` repository:\n", + "\n", + "```\n", + "git clone git@github.com:openai/retrieval-app.git\n", + "```\n", + "\n", + "3. Navigate to the app directory:\n", + "\n", + "```\n", + "cd /path/to/retrieval-app\n", + "```\n", + "\n", + "4. Install `poetry`:\n", + "\n", + "```\n", + "pip install poetry\n", + "```\n", + "\n", + "5. Create a new virtual environment:\n", + "\n", + "```\n", + "poetry env use python3.10\n", + "```\n", + "\n", + "6. Install the `retrieval-app` dependencies:\n", + "\n", + "```\n", + "poetry install\n", + "```\n", + "\n", + "7. Set app environment variables:\n", + "\n", + "* `BEARER_TOKEN`: Secret token used by the app to authorize incoming requests. We will later include this in the request `headers`. The token can be generated however you prefer, such as using [jwt.io](https://jwt.io/).\n", + "\n", + "* `OPENAI_API_KEY`: The OpenAI API key used for generating embeddings with the `text-embedding-ada-002` model. [Get an API key here](https://platform.openai.com/account/api-keys)!\n", + "\n", + "8. Set Pinecone-specific environment variables:\n", + "\n", + "* `DATASTORE`: set to `pinecone`.\n", + "\n", + "* `PINECONE_API_KEY`: Set to your Pinecone API key. This requires a free Pinecone account and can be [found in the Pinecone console](https://app.pinecone.io/).\n", + "\n", + "* `PINECONE_ENVIRONMENT`: Set to your Pinecone environment, looks like `us-east1-gcp`, `us-west1-aws`, and can be found next to your API key in the [Pinecone console](https://app.pinecone.io/).\n", + "\n", + "* `PINECONE_INDEX`: Set this to your chosen index name. The name you choose is your choice, we just recommend setting it to something descriptive like `\"openai-retrieval-app\"`. *Note that index names are restricted to alphanumeric characters, `\"-\"`, and can contain a maximum of 45 characters.*\n", + "\n", + "8. Run the app with:\n", + "\n", + "```\n", + "poetry run start\n", + "```\n", + "\n", + "If running the app locally you should see something like:\n", + "\n", + "```\n", + "INFO: Uvicorn running on http://0.0.0.0:8000\n", + "INFO: Application startup complete.\n", + "```\n", + "\n", + "In that case, the app has automatically connected to our index (specified by `PINECONE_INDEX`), if no index with that name existed beforehand, the app creates one for us.\n", + "\n", + "Now we're ready to move on to populating our index with some data." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Required Libraries" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are a few Python libraries we must `pip install` for this notebook to run, those are:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -qU datasets pandas tqdm" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Preparing Data" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example, we will use the **S**tanford **Qu**estion **A**nswering **D**ataset (SQuAD), which we download from Hugging Face Datasets." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Found cached dataset squad (/Users/jamesbriggs/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453)\n" + ] + }, + { + "data": { + "text/plain": [ + "Dataset({\n", + " features: ['id', 'title', 'context', 'question', 'answers'],\n", + " num_rows: 87599\n", + "})" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datasets import load_dataset\n", + "\n", + "data = load_dataset(\"squad\", split=\"train\")\n", + "data" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Convert to Pandas dataframe for easier preprocessing steps." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtitlecontextquestionanswers
05733be284776f41900661182University_of_Notre_DameArchitecturally, the school has a Catholic cha...To whom did the Virgin Mary allegedly appear i...{'text': ['Saint Bernadette Soubirous'], 'answ...
15733be284776f4190066117fUniversity_of_Notre_DameArchitecturally, the school has a Catholic cha...What is in front of the Notre Dame Main Building?{'text': ['a copper statue of Christ'], 'answe...
25733be284776f41900661180University_of_Notre_DameArchitecturally, the school has a Catholic cha...The Basilica of the Sacred heart at Notre Dame...{'text': ['the Main Building'], 'answer_start'...
35733be284776f41900661181University_of_Notre_DameArchitecturally, the school has a Catholic cha...What is the Grotto at Notre Dame?{'text': ['a Marian place of prayer and reflec...
45733be284776f4190066117eUniversity_of_Notre_DameArchitecturally, the school has a Catholic cha...What sits on top of the Main Building at Notre...{'text': ['a golden statue of the Virgin Mary'...
\n", + "
" + ], + "text/plain": [ + " id title \\\n", + "0 5733be284776f41900661182 University_of_Notre_Dame \n", + "1 5733be284776f4190066117f University_of_Notre_Dame \n", + "2 5733be284776f41900661180 University_of_Notre_Dame \n", + "3 5733be284776f41900661181 University_of_Notre_Dame \n", + "4 5733be284776f4190066117e University_of_Notre_Dame \n", + "\n", + " context \\\n", + "0 Architecturally, the school has a Catholic cha... \n", + "1 Architecturally, the school has a Catholic cha... \n", + "2 Architecturally, the school has a Catholic cha... \n", + "3 Architecturally, the school has a Catholic cha... \n", + "4 Architecturally, the school has a Catholic cha... \n", + "\n", + " question \\\n", + "0 To whom did the Virgin Mary allegedly appear i... \n", + "1 What is in front of the Notre Dame Main Building? \n", + "2 The Basilica of the Sacred heart at Notre Dame... \n", + "3 What is the Grotto at Notre Dame? \n", + "4 What sits on top of the Main Building at Notre... \n", + "\n", + " answers \n", + "0 {'text': ['Saint Bernadette Soubirous'], 'answ... \n", + "1 {'text': ['a copper statue of Christ'], 'answe... \n", + "2 {'text': ['the Main Building'], 'answer_start'... \n", + "3 {'text': ['a Marian place of prayer and reflec... \n", + "4 {'text': ['a golden statue of the Virgin Mary'... " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = data.to_pandas()\n", + "data.head()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The dataset contains a lot of duplicate `context` paragraphs, this is because each `context` can have many relevant questions. We don't want these duplicates so we remove like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18891\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtitlecontextquestionanswers
05733be284776f41900661182University_of_Notre_DameArchitecturally, the school has a Catholic cha...To whom did the Virgin Mary allegedly appear i...{'text': ['Saint Bernadette Soubirous'], 'answ...
55733bf84d058e614000b61beUniversity_of_Notre_DameAs at most other universities, Notre Dame's st...When did the Scholastic Magazine of Notre dame...{'text': ['September 1876'], 'answer_start': [...
105733bed24776f41900661188University_of_Notre_DameThe university is the major seat of the Congre...Where is the headquarters of the Congregation ...{'text': ['Rome'], 'answer_start': [119]}
155733a6424776f41900660f51University_of_Notre_DameThe College of Engineering was established in ...How many BS level degrees are offered in the C...{'text': ['eight'], 'answer_start': [487]}
205733a70c4776f41900660f64University_of_Notre_DameAll of Notre Dame's undergraduate students are...What entity provides help with the management ...{'text': ['Learning Resource Center'], 'answer...
\n", + "
" + ], + "text/plain": [ + " id title \\\n", + "0 5733be284776f41900661182 University_of_Notre_Dame \n", + "5 5733bf84d058e614000b61be University_of_Notre_Dame \n", + "10 5733bed24776f41900661188 University_of_Notre_Dame \n", + "15 5733a6424776f41900660f51 University_of_Notre_Dame \n", + "20 5733a70c4776f41900660f64 University_of_Notre_Dame \n", + "\n", + " context \\\n", + "0 Architecturally, the school has a Catholic cha... \n", + "5 As at most other universities, Notre Dame's st... \n", + "10 The university is the major seat of the Congre... \n", + "15 The College of Engineering was established in ... \n", + "20 All of Notre Dame's undergraduate students are... \n", + "\n", + " question \\\n", + "0 To whom did the Virgin Mary allegedly appear i... \n", + "5 When did the Scholastic Magazine of Notre dame... \n", + "10 Where is the headquarters of the Congregation ... \n", + "15 How many BS level degrees are offered in the C... \n", + "20 What entity provides help with the management ... \n", + "\n", + " answers \n", + "0 {'text': ['Saint Bernadette Soubirous'], 'answ... \n", + "5 {'text': ['September 1876'], 'answer_start': [... \n", + "10 {'text': ['Rome'], 'answer_start': [119]} \n", + "15 {'text': ['eight'], 'answer_start': [487]} \n", + "20 {'text': ['Learning Resource Center'], 'answer... " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = data.drop_duplicates(subset=[\"context\"])\n", + "print(len(data))\n", + "data.head()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The format required by the apps `upsert` function is a list of documents like:\n", + "\n", + "```json\n", + "[\n", + " {\n", + " \"id\": \"abc\",\n", + " \"text\": \"some important document text\",\n", + " \"metadata\": {\n", + " \"field1\": \"optional metadata goes here\",\n", + " \"field2\": 54\n", + " }\n", + " },\n", + " {\n", + " \"id\": \"123\",\n", + " \"text\": \"some other important text\",\n", + " \"metadata\": {\n", + " \"field1\": \"another metadata\",\n", + " \"field2\": 71,\n", + " \"field3\": \"not all metadatas need the same structure\"\n", + " }\n", + " }\n", + " ...\n", + "]\n", + "```\n", + "\n", + "Every document *must* have a `\"text\"` field. The `\"id\"` and `\"metadata\"` fields are optional.\n", + "\n", + "To create this format for our SQuAD data we do:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'id': '5733be284776f41900661182',\n", + " 'text': 'Architecturally, the school has a Catholic character. Atop the Main Building\\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend \"Venite Ad Me Omnes\". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.',\n", + " 'metadata': {'title': 'University_of_Notre_Dame'}},\n", + " {'id': '5733bf84d058e614000b61be',\n", + " 'text': \"As at most other universities, Notre Dame's students run a number of news media outlets. The nine student-run outlets include three newspapers, both a radio and television station, and several magazines and journals. Begun as a one-page journal in September 1876, the Scholastic magazine is issued twice monthly and claims to be the oldest continuous collegiate publication in the United States. The other magazine, The Juggler, is released twice a year and focuses on student literature and artwork. The Dome yearbook is published annually. The newspapers have varying publication interests, with The Observer published daily and mainly reporting university and other news, and staffed by students from both Notre Dame and Saint Mary's College. Unlike Scholastic and The Dome, The Observer is an independent publication and does not have a faculty advisor or any editorial oversight from the University. In 1987, when some students believed that The Observer began to show a conservative bias, a liberal newspaper, Common Sense was published. Likewise, in 2003, when other students believed that the paper showed a liberal bias, the conservative paper Irish Rover went into production. Neither paper is published as often as The Observer; however, all three are distributed to all students. Finally, in Spring 2008 an undergraduate journal for political science research, Beyond Politics, made its debut.\",\n", + " 'metadata': {'title': 'University_of_Notre_Dame'}},\n", + " {'id': '5733bed24776f41900661188',\n", + " 'text': 'The university is the major seat of the Congregation of Holy Cross (albeit not its official headquarters, which are in Rome). Its main seminary, Moreau Seminary, is located on the campus across St. Joseph lake from the Main Building. Old College, the oldest building on campus and located near the shore of St. Mary lake, houses undergraduate seminarians. Retired priests and brothers reside in Fatima House (a former retreat center), Holy Cross House, as well as Columba Hall near the Grotto. The university through the Moreau Seminary has ties to theologian Frederick Buechner. While not Catholic, Buechner has praised writers from Notre Dame and Moreau Seminary created a Buechner Prize for Preaching.',\n", + " 'metadata': {'title': 'University_of_Notre_Dame'}}]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "documents = [\n", + " {\n", + " 'id': r['id'],\n", + " 'text': r['context'],\n", + " 'metadata': {\n", + " 'title': r['title']\n", + " }\n", + " } for r in data.to_dict(orient='records')\n", + "]\n", + "documents[:3]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Indexing the Docs" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We're now ready to begin indexing (or *upserting*) our `documents`. To make these requests to the retrieval app API, we will need to provide authorization in the form of the `BEARER_TOKEN` we set earlier. We do this below:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "BEARER_TOKEN = os.environ.get(\"BEARER_TOKEN\") or \"BEARER_TOKEN_HERE\"" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use the `BEARER_TOKEN` to create our authorization `headers`:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "headers = {\n", + " \"Authorization\": f\"Bearer {BEARER_TOKEN}\"\n", + "}" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We'll perform the upsert in batches of `batch_size`. Make sure that the `endpoint_url` variable is set to the correct location for your running *retrieval-app* API." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8694da67455d4bb78cc778e49f69a872", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/10 [00:00" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res = requests.post(\n", + " \"http://0.0.0.0:8000/query\",\n", + " headers=headers,\n", + " json={\n", + " 'queries': queries[:3]\n", + " }\n", + ")\n", + "res" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can loop through the responses and see the results returned for each query:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "----------------------------------------------------------------------\n", + "To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?\n", + "\n", + "0.83: Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend \"Venite Ad Me Omnes\". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.\n", + "0.81: Within the white inescutcheon, the five quinas (small blue shields) with their five white bezants representing the five wounds of Christ (Portuguese: Cinco Chagas) when crucified and are popularly associated with the \"Miracle of Ourique\". The story associated with this miracle tells that before the Battle of Ourique (25 July 1139), an old hermit appeared before Count Afonso Henriques (future Afonso I) as a divine messenger. He foretold Afonso's victory and assured him that God was watching over him and his peers. The messenger advised him to walk away from his camp, alone, if he heard a nearby chapel bell tolling, in the following night. In doing so, he witnessed an apparition of Jesus on the cross. Ecstatic, Afonso heard Jesus promising victories for the coming battles, as well as God's wish to act through Afonso, and his descendants, in order to create an empire which would carry His name to unknown lands, thus choosing the Portuguese to perform great tasks.\n", + "0.79: In 1842, the Bishop of Vincennes, Célestine Guynemer de la Hailandière, offered land to Father Edward Sorin of the Congregation of the Holy Cross, on the condition that he build a college in two years. Fr. Sorin arrived on the site with eight Holy Cross brothers from France and Ireland on November 26, 1842, and began the school using Father Stephen Badin's old log chapel. He soon erected additional buildings, including Old College, the first church, and the first main building. They immediately acquired two students and set about building additions to the campus.\n", + "0.79: Because of its Catholic identity, a number of religious buildings stand on campus. The Old College building has become one of two seminaries on campus run by the Congregation of Holy Cross. The current Basilica of the Sacred Heart is located on the spot of Fr. Sorin's original church, which became too small for the growing college. It is built in French Revival style and it is decorated by stained glass windows imported directly from France. The interior was painted by Luigi Gregori, an Italian painter invited by Fr. Sorin to be artist in residence. The Basilica also features a bell tower with a carillon. Inside the church there are also sculptures by Ivan Mestrovic. The Grotto of Our Lady of Lourdes, which was built in 1896, is a replica of the original in Lourdes, France. It is very popular among students and alumni as a place of prayer and meditation, and it is considered one of the most beloved spots on campus.\n", + "0.78: The funeral, held at the Church of the Madeleine in Paris, was delayed almost two weeks, until 30 October. Entrance was restricted to ticket holders as many people were expected to attend. Over 3,000 people arrived without invitations, from as far as London, Berlin and Vienna, and were excluded.\n", + "----------------------------------------------------------------------\n", + "\n", + "\n", + "----------------------------------------------------------------------\n", + "When did the Scholastic Magazine of Notre dame begin publishing?\n", + "\n", + "0.88: As at most other universities, Notre Dame's students run a number of news media outlets. The nine student-run outlets include three newspapers, both a radio and television station, and several magazines and journals. Begun as a one-page journal in September 1876, the Scholastic magazine is issued twice monthly and claims to be the oldest continuous collegiate publication in the United States. The other magazine, The Juggler, is released twice a year and focuses on student literature and artwork. The Dome yearbook is published annually. The newspapers have varying publication interests, with The Observer published daily and mainly reporting university and other news, and staffed by students from both Notre Dame and Saint Mary's College. Unlike Scholastic and The Dome, The Observer is an independent publication and does not have a faculty advisor or any editorial oversight from the University. In 1987, when some students believed that The Observer began to show a conservative bias, a liberal newspaper, Common Sense was published. Likewise, in 2003, when other students believed that the paper showed a liberal bias, the conservative paper Irish Rover went into production. Neither paper is published as often as The Observer; however, all three are distributed to all students.\n", + "0.83: In 1919 Father James Burns became president of Notre Dame, and in three years he produced an academic revolution that brought the school up to national standards by adopting the elective system and moving away from the university's traditional scholastic and classical emphasis. By contrast, the Jesuit colleges, bastions of academic conservatism, were reluctant to move to a system of electives. Their graduates were shut out of Harvard Law School for that reason. Notre Dame continued to grow over the years, adding more colleges, programs, and sports teams. By 1921, with the addition of the College of Commerce, Notre Dame had grown from a small college to a university with five colleges and a professional law school. The university continued to expand and add new residence halls and buildings with each subsequent president.\n", + "0.83: The rise of Hitler and other dictators in the 1930s forced numerous Catholic intellectuals to flee Europe; president John O'Hara brought many to Notre Dame. From Germany came Anton-Hermann Chroust (1907–1982) in classics and law, and Waldemar Gurian a German Catholic intellectual of Jewish descent. Positivism dominated American intellectual life in the 1920s onward but in marked contrast, Gurian received a German Catholic education and wrote his doctoral dissertation under Max Scheler. Ivan Meštrović (1883–1962), a renowned sculptor, brought Croatian culture to campus, 1955–62. Yves Simon (1903–61), brought to ND in the 1940s the insights of French studies in the Aristotelian-Thomistic tradition of philosophy; his own teacher Jacques Maritain (1882–73) was a frequent visitor to campus.\n", + "0.82: In the 18 years under the presidency of Edward Malloy, C.S.C., (1987–2005), there was a rapid growth in the school's reputation, faculty, and resources. He increased the faculty by more than 500 professors; the academic quality of the student body has improved dramatically, with the average SAT score rising from 1240 to 1360; the number of minority students more than doubled; the endowment grew from $350 million to more than $3 billion; the annual operating budget rose from $177 million to more than $650 million; and annual research funding improved from $15 million to more than $70 million. Notre Dame's most recent[when?] capital campaign raised $1.1 billion, far exceeding its goal of $767 million, and is the largest in the history of Catholic higher education.\n", + "0.82: The Rev. John J. Cavanaugh, C.S.C. served as president from 1946 to 1952. Cavanaugh's legacy at Notre Dame in the post-war years was devoted to raising academic standards and reshaping the university administration to suit it to an enlarged educational mission and an expanded student body and stressing advanced studies and research at a time when Notre Dame quadrupled in student census, undergraduate enrollment increased by more than half, and graduate student enrollment grew fivefold. Cavanaugh also established the Lobund Institute for Animal Studies and Notre Dame's Medieval Institute. Cavanaugh also presided over the construction of the Nieuwland Science Hall, Fisher Hall, and the Morris Inn, as well as the Hall of Liberal Arts (now O'Shaughnessy Hall), made possible by a donation from I.A. O'Shaughnessy, at the time the largest ever made to an American Catholic university.\n", + "----------------------------------------------------------------------\n", + "\n", + "\n", + "----------------------------------------------------------------------\n", + "Where is the headquarters of the Congregation of the Holy Cross?\n", + "\n", + "0.88: The university is the major seat of the Congregation of Holy Cross (albeit not its official headquarters, which are in Rome). Its main seminary, Moreau Seminary, is located on the campus across St. Joseph lake from the Main Building. Old College, the oldest building on campus and located near the shore of St. Mary lake, houses undergraduate seminarians. Retired priests and brothers reside in Fatima House (a former retreat center), Holy Cross House, as well as Columba Hall near the Grotto. The university through the Moreau Seminary has ties to theologian Frederick Buechner. While not Catholic, Buechner has praised writers from Notre Dame and Moreau Seminary created a Buechner Prize for Preaching.\n", + "0.84: In 1842, the Bishop of Vincennes, Célestine Guynemer de la Hailandière, offered land to Father Edward Sorin of the Congregation of the Holy Cross, on the condition that he build a college in two years. Fr. Sorin arrived on the site with eight Holy Cross brothers from France and Ireland on November 26, 1842, and began the school using Father Stephen Badin's old log chapel. He soon erected additional buildings, including Old College, the first church, and the first main building. They immediately acquired two students and set about building additions to the campus.\n", + "0.84: Because of its Catholic identity, a number of religious buildings stand on campus. The Old College building has become one of two seminaries on campus run by the Congregation of Holy Cross. The current Basilica of the Sacred Heart is located on the spot of Fr. Sorin's original church, which became too small for the growing college. It is built in French Revival style and it is decorated by stained glass windows imported directly from France. The interior was painted by Luigi Gregori, an Italian painter invited by Fr. Sorin to be artist in residence. The Basilica also features a bell tower with a carillon. Inside the church there are also sculptures by Ivan Mestrovic. The Grotto of Our Lady of Lourdes, which was built in 1896, is a replica of the original in Lourdes, France. It is very popular among students and alumni as a place of prayer and meditation, and it is considered one of the most beloved spots on campus.\n", + "0.84: The university is affiliated with the Congregation of Holy Cross (Latin: Congregatio a Sancta Cruce, abbreviated postnominals: \"CSC\"). While religious affiliation is not a criterion for admission, more than 93% of students identify as Christian, with over 80% of the total being Catholic. Collectively, Catholic Mass is celebrated over 100 times per week on campus, and a large campus ministry program provides for the faith needs of the community. There are multitudes of religious statues and artwork around campus, most prominent of which are the statue of Mary on the Main Building, the Notre Dame Grotto, and the Word of Life mural on Hesburgh Library depicting Christ as a teacher. Additionally, every classroom displays a crucifix. There are many religious clubs (catholic and non-Catholic) at the school, including Council #1477 of the Knights of Columbus (KOC), Baptist Collegiate Ministry (BCM), Jewish Club, Muslim Student Association, Orthodox Christian Fellowship, The Mormon Club, and many more. The Notre Dame KofC are known for being the first collegiate council of KofC, operating a charitable concession stand during every home football game and owning their own building on campus which can be used as a cigar lounge.\n", + "0.83: Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend \"Venite Ad Me Omnes\". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.\n", + "----------------------------------------------------------------------\n", + "\n", + "\n" + ] + } + ], + "source": [ + "for query_result in res.json()['results']:\n", + " query = query_result['query']\n", + " answers = []\n", + " scores = []\n", + " for result in query_result['results']:\n", + " answers.append(result['text'])\n", + " scores.append(round(result['score'], 2))\n", + " print(\"-\"*70+\"\\n\"+query+\"\\n\\n\"+\"\\n\".join([f\"{s}: {a}\" for a, s in zip(answers, scores)])+\"\\n\"+\"-\"*70+\"\\n\\n\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The top results are all relevant as we would have hoped. With that we've finished. The retrieval app API can be shut down, and to save resources the Pinecone index can be deleted within the [Pinecone console](https://app.pinecone.io/)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "chatgpt-retrieval-plugin-S7h-2AWq-py3.10", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "1979a773a5778de9a5fa593a629dff0ab3c80c2563810d3e6a8dfb123dc01c7d" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/models/api.py b/models/api.py new file mode 100644 index 000000000..cdf7872eb --- /dev/null +++ b/models/api.py @@ -0,0 +1,35 @@ +from models.models import ( + Document, + DocumentChunkWithScore, + DocumentMetadataFilter, + Query, + QueryResult, +) +from pydantic import BaseModel +from typing import List, Optional + + +class UpsertRequest(BaseModel): + documents: List[Document] + + +class UpsertResponse(BaseModel): + ids: List[str] + + +class QueryRequest(BaseModel): + queries: List[Query] + + +class QueryResponse(BaseModel): + results: List[QueryResult] + + +class DeleteRequest(BaseModel): + ids: Optional[List[str]] = None + filter: Optional[DocumentMetadataFilter] = None + delete_all: Optional[bool] = False + + +class DeleteResponse(BaseModel): + success: bool diff --git a/models/models.py b/models/models.py new file mode 100644 index 000000000..ee3ad7794 --- /dev/null +++ b/models/models.py @@ -0,0 +1,66 @@ +from pydantic import BaseModel +from typing import List, Optional +from enum import Enum + + +class Source(str, Enum): + email = "email" + file = "file" + chat = "chat" + + +class DocumentMetadata(BaseModel): + source: Optional[Source] = None + source_id: Optional[str] = None + url: Optional[str] = None + created_at: Optional[str] = None + author: Optional[str] = None + + +class DocumentChunkMetadata(DocumentMetadata): + document_id: Optional[str] = None + + +class DocumentChunk(BaseModel): + id: Optional[str] = None + text: str + metadata: DocumentChunkMetadata + embedding: Optional[List[float]] = None + + +class DocumentChunkWithScore(DocumentChunk): + score: float + + +class Document(BaseModel): + id: Optional[str] = None + text: str + metadata: Optional[DocumentMetadata] = None + + +class DocumentWithChunks(Document): + chunks: List[DocumentChunk] + + +class DocumentMetadataFilter(BaseModel): + document_id: Optional[str] = None + source: Optional[Source] = None + source_id: Optional[str] = None + author: Optional[str] = None + start_date: Optional[str] = None # any date string format + end_date: Optional[str] = None # any date string format + + +class Query(BaseModel): + query: str + filter: Optional[DocumentMetadataFilter] = None + top_k: Optional[int] = 3 + + +class QueryWithEmbedding(Query): + embedding: List[float] + + +class QueryResult(BaseModel): + query: str + results: List[DocumentChunkWithScore] diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 000000000..1243d8969 --- /dev/null +++ b/poetry.lock @@ -0,0 +1,2370 @@ +# This file is automatically @generated by Poetry 1.4.0 and should not be changed by hand. + +[[package]] +name = "aiohttp" +version = "3.8.4" +description = "Async http client/server framework (asyncio)" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "aiohttp-3.8.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5ce45967538fb747370308d3145aa68a074bdecb4f3a300869590f725ced69c1"}, + {file = "aiohttp-3.8.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b744c33b6f14ca26b7544e8d8aadff6b765a80ad6164fb1a430bbadd593dfb1a"}, + {file = "aiohttp-3.8.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1a45865451439eb320784918617ba54b7a377e3501fb70402ab84d38c2cd891b"}, + {file = "aiohttp-3.8.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a86d42d7cba1cec432d47ab13b6637bee393a10f664c425ea7b305d1301ca1a3"}, + {file = "aiohttp-3.8.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ee3c36df21b5714d49fc4580247947aa64bcbe2939d1b77b4c8dcb8f6c9faecc"}, + {file = "aiohttp-3.8.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:176a64b24c0935869d5bbc4c96e82f89f643bcdf08ec947701b9dbb3c956b7dd"}, + {file = "aiohttp-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c844fd628851c0bc309f3c801b3a3d58ce430b2ce5b359cd918a5a76d0b20cb5"}, + {file = "aiohttp-3.8.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5393fb786a9e23e4799fec788e7e735de18052f83682ce2dfcabaf1c00c2c08e"}, + {file = "aiohttp-3.8.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e4b09863aae0dc965c3ef36500d891a3ff495a2ea9ae9171e4519963c12ceefd"}, + {file = "aiohttp-3.8.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:adfbc22e87365a6e564c804c58fc44ff7727deea782d175c33602737b7feadb6"}, + {file = "aiohttp-3.8.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:147ae376f14b55f4f3c2b118b95be50a369b89b38a971e80a17c3fd623f280c9"}, + {file = "aiohttp-3.8.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:eafb3e874816ebe2a92f5e155f17260034c8c341dad1df25672fb710627c6949"}, + {file = "aiohttp-3.8.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c6cc15d58053c76eacac5fa9152d7d84b8d67b3fde92709195cb984cfb3475ea"}, + {file = "aiohttp-3.8.4-cp310-cp310-win32.whl", hash = "sha256:59f029a5f6e2d679296db7bee982bb3d20c088e52a2977e3175faf31d6fb75d1"}, + {file = "aiohttp-3.8.4-cp310-cp310-win_amd64.whl", hash = "sha256:fe7ba4a51f33ab275515f66b0a236bcde4fb5561498fe8f898d4e549b2e4509f"}, + {file = "aiohttp-3.8.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3d8ef1a630519a26d6760bc695842579cb09e373c5f227a21b67dc3eb16cfea4"}, + {file = "aiohttp-3.8.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b3f2e06a512e94722886c0827bee9807c86a9f698fac6b3aee841fab49bbfb4"}, + {file = "aiohttp-3.8.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3a80464982d41b1fbfe3154e440ba4904b71c1a53e9cd584098cd41efdb188ef"}, + {file = "aiohttp-3.8.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b631e26df63e52f7cce0cce6507b7a7f1bc9b0c501fcde69742130b32e8782f"}, + {file = "aiohttp-3.8.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f43255086fe25e36fd5ed8f2ee47477408a73ef00e804cb2b5cba4bf2ac7f5e"}, + {file = "aiohttp-3.8.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4d347a172f866cd1d93126d9b239fcbe682acb39b48ee0873c73c933dd23bd0f"}, + {file = "aiohttp-3.8.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3fec6a4cb5551721cdd70473eb009d90935b4063acc5f40905d40ecfea23e05"}, + {file = "aiohttp-3.8.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:80a37fe8f7c1e6ce8f2d9c411676e4bc633a8462844e38f46156d07a7d401654"}, + {file = "aiohttp-3.8.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d1e6a862b76f34395a985b3cd39a0d949ca80a70b6ebdea37d3ab39ceea6698a"}, + {file = "aiohttp-3.8.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cd468460eefef601ece4428d3cf4562459157c0f6523db89365202c31b6daebb"}, + {file = "aiohttp-3.8.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:618c901dd3aad4ace71dfa0f5e82e88b46ef57e3239fc7027773cb6d4ed53531"}, + {file = "aiohttp-3.8.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:652b1bff4f15f6287550b4670546a2947f2a4575b6c6dff7760eafb22eacbf0b"}, + {file = "aiohttp-3.8.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80575ba9377c5171407a06d0196b2310b679dc752d02a1fcaa2bc20b235dbf24"}, + {file = "aiohttp-3.8.4-cp311-cp311-win32.whl", hash = "sha256:bbcf1a76cf6f6dacf2c7f4d2ebd411438c275faa1dc0c68e46eb84eebd05dd7d"}, + {file = "aiohttp-3.8.4-cp311-cp311-win_amd64.whl", hash = "sha256:6e74dd54f7239fcffe07913ff8b964e28b712f09846e20de78676ce2a3dc0bfc"}, + {file = "aiohttp-3.8.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:880e15bb6dad90549b43f796b391cfffd7af373f4646784795e20d92606b7a51"}, + {file = "aiohttp-3.8.4-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb96fa6b56bb536c42d6a4a87dfca570ff8e52de2d63cabebfd6fb67049c34b6"}, + {file = "aiohttp-3.8.4-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4a6cadebe132e90cefa77e45f2d2f1a4b2ce5c6b1bfc1656c1ddafcfe4ba8131"}, + {file = "aiohttp-3.8.4-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f352b62b45dff37b55ddd7b9c0c8672c4dd2eb9c0f9c11d395075a84e2c40f75"}, + {file = "aiohttp-3.8.4-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ab43061a0c81198d88f39aaf90dae9a7744620978f7ef3e3708339b8ed2ef01"}, + {file = "aiohttp-3.8.4-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c9cb1565a7ad52e096a6988e2ee0397f72fe056dadf75d17fa6b5aebaea05622"}, + {file = "aiohttp-3.8.4-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:1b3ea7edd2d24538959c1c1abf97c744d879d4e541d38305f9bd7d9b10c9ec41"}, + {file = "aiohttp-3.8.4-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:7c7837fe8037e96b6dd5cfcf47263c1620a9d332a87ec06a6ca4564e56bd0f36"}, + {file = "aiohttp-3.8.4-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:3b90467ebc3d9fa5b0f9b6489dfb2c304a1db7b9946fa92aa76a831b9d587e99"}, + {file = "aiohttp-3.8.4-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:cab9401de3ea52b4b4c6971db5fb5c999bd4260898af972bf23de1c6b5dd9d71"}, + {file = "aiohttp-3.8.4-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:d1f9282c5f2b5e241034a009779e7b2a1aa045f667ff521e7948ea9b56e0c5ff"}, + {file = "aiohttp-3.8.4-cp36-cp36m-win32.whl", hash = "sha256:5e14f25765a578a0a634d5f0cd1e2c3f53964553a00347998dfdf96b8137f777"}, + {file = "aiohttp-3.8.4-cp36-cp36m-win_amd64.whl", hash = "sha256:4c745b109057e7e5f1848c689ee4fb3a016c8d4d92da52b312f8a509f83aa05e"}, + {file = "aiohttp-3.8.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:aede4df4eeb926c8fa70de46c340a1bc2c6079e1c40ccf7b0eae1313ffd33519"}, + {file = "aiohttp-3.8.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ddaae3f3d32fc2cb4c53fab020b69a05c8ab1f02e0e59665c6f7a0d3a5be54f"}, + {file = "aiohttp-3.8.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4eb3b82ca349cf6fadcdc7abcc8b3a50ab74a62e9113ab7a8ebc268aad35bb9"}, + {file = "aiohttp-3.8.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9bcb89336efa095ea21b30f9e686763f2be4478f1b0a616969551982c4ee4c3b"}, + {file = "aiohttp-3.8.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c08e8ed6fa3d477e501ec9db169bfac8140e830aa372d77e4a43084d8dd91ab"}, + {file = "aiohttp-3.8.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c6cd05ea06daca6ad6a4ca3ba7fe7dc5b5de063ff4daec6170ec0f9979f6c332"}, + {file = "aiohttp-3.8.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7a00a9ed8d6e725b55ef98b1b35c88013245f35f68b1b12c5cd4100dddac333"}, + {file = "aiohttp-3.8.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:de04b491d0e5007ee1b63a309956eaed959a49f5bb4e84b26c8f5d49de140fa9"}, + {file = "aiohttp-3.8.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:40653609b3bf50611356e6b6554e3a331f6879fa7116f3959b20e3528783e699"}, + {file = "aiohttp-3.8.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:dbf3a08a06b3f433013c143ebd72c15cac33d2914b8ea4bea7ac2c23578815d6"}, + {file = "aiohttp-3.8.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:854f422ac44af92bfe172d8e73229c270dc09b96535e8a548f99c84f82dde241"}, + {file = "aiohttp-3.8.4-cp37-cp37m-win32.whl", hash = "sha256:aeb29c84bb53a84b1a81c6c09d24cf33bb8432cc5c39979021cc0f98c1292a1a"}, + {file = "aiohttp-3.8.4-cp37-cp37m-win_amd64.whl", hash = "sha256:db3fc6120bce9f446d13b1b834ea5b15341ca9ff3f335e4a951a6ead31105480"}, + {file = "aiohttp-3.8.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:fabb87dd8850ef0f7fe2b366d44b77d7e6fa2ea87861ab3844da99291e81e60f"}, + {file = "aiohttp-3.8.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:91f6d540163f90bbaef9387e65f18f73ffd7c79f5225ac3d3f61df7b0d01ad15"}, + {file = "aiohttp-3.8.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d265f09a75a79a788237d7f9054f929ced2e69eb0bb79de3798c468d8a90f945"}, + {file = "aiohttp-3.8.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d89efa095ca7d442a6d0cbc755f9e08190ba40069b235c9886a8763b03785da"}, + {file = "aiohttp-3.8.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4dac314662f4e2aa5009977b652d9b8db7121b46c38f2073bfeed9f4049732cd"}, + {file = "aiohttp-3.8.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe11310ae1e4cd560035598c3f29d86cef39a83d244c7466f95c27ae04850f10"}, + {file = "aiohttp-3.8.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ddb2a2026c3f6a68c3998a6c47ab6795e4127315d2e35a09997da21865757f8"}, + {file = "aiohttp-3.8.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e75b89ac3bd27d2d043b234aa7b734c38ba1b0e43f07787130a0ecac1e12228a"}, + {file = "aiohttp-3.8.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6e601588f2b502c93c30cd5a45bfc665faaf37bbe835b7cfd461753068232074"}, + {file = "aiohttp-3.8.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a5d794d1ae64e7753e405ba58e08fcfa73e3fad93ef9b7e31112ef3c9a0efb52"}, + {file = "aiohttp-3.8.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:a1f4689c9a1462f3df0a1f7e797791cd6b124ddbee2b570d34e7f38ade0e2c71"}, + {file = "aiohttp-3.8.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:3032dcb1c35bc330134a5b8a5d4f68c1a87252dfc6e1262c65a7e30e62298275"}, + {file = "aiohttp-3.8.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8189c56eb0ddbb95bfadb8f60ea1b22fcfa659396ea36f6adcc521213cd7b44d"}, + {file = "aiohttp-3.8.4-cp38-cp38-win32.whl", hash = "sha256:33587f26dcee66efb2fff3c177547bd0449ab7edf1b73a7f5dea1e38609a0c54"}, + {file = "aiohttp-3.8.4-cp38-cp38-win_amd64.whl", hash = "sha256:e595432ac259af2d4630008bf638873d69346372d38255774c0e286951e8b79f"}, + {file = "aiohttp-3.8.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5a7bdf9e57126dc345b683c3632e8ba317c31d2a41acd5800c10640387d193ed"}, + {file = "aiohttp-3.8.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:22f6eab15b6db242499a16de87939a342f5a950ad0abaf1532038e2ce7d31567"}, + {file = "aiohttp-3.8.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7235604476a76ef249bd64cb8274ed24ccf6995c4a8b51a237005ee7a57e8643"}, + {file = "aiohttp-3.8.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea9eb976ffdd79d0e893869cfe179a8f60f152d42cb64622fca418cd9b18dc2a"}, + {file = "aiohttp-3.8.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:92c0cea74a2a81c4c76b62ea1cac163ecb20fb3ba3a75c909b9fa71b4ad493cf"}, + {file = "aiohttp-3.8.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:493f5bc2f8307286b7799c6d899d388bbaa7dfa6c4caf4f97ef7521b9cb13719"}, + {file = "aiohttp-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0a63f03189a6fa7c900226e3ef5ba4d3bd047e18f445e69adbd65af433add5a2"}, + {file = "aiohttp-3.8.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:10c8cefcff98fd9168cdd86c4da8b84baaa90bf2da2269c6161984e6737bf23e"}, + {file = "aiohttp-3.8.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bca5f24726e2919de94f047739d0a4fc01372801a3672708260546aa2601bf57"}, + {file = "aiohttp-3.8.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:03baa76b730e4e15a45f81dfe29a8d910314143414e528737f8589ec60cf7391"}, + {file = "aiohttp-3.8.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:8c29c77cc57e40f84acef9bfb904373a4e89a4e8b74e71aa8075c021ec9078c2"}, + {file = "aiohttp-3.8.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:03543dcf98a6619254b409be2d22b51f21ec66272be4ebda7b04e6412e4b2e14"}, + {file = "aiohttp-3.8.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:17b79c2963db82086229012cff93ea55196ed31f6493bb1ccd2c62f1724324e4"}, + {file = "aiohttp-3.8.4-cp39-cp39-win32.whl", hash = "sha256:34ce9f93a4a68d1272d26030655dd1b58ff727b3ed2a33d80ec433561b03d67a"}, + {file = "aiohttp-3.8.4-cp39-cp39-win_amd64.whl", hash = "sha256:41a86a69bb63bb2fc3dc9ad5ea9f10f1c9c8e282b471931be0268ddd09430b04"}, + {file = "aiohttp-3.8.4.tar.gz", hash = "sha256:bf2e1a9162c1e441bf805a1fd166e249d574ca04e03b34f97e2928769e91ab5c"}, +] + +[package.dependencies] +aiosignal = ">=1.1.2" +async-timeout = ">=4.0.0a3,<5.0" +attrs = ">=17.3.0" +charset-normalizer = ">=2.0,<4.0" +frozenlist = ">=1.1.1" +multidict = ">=4.5,<7.0" +yarl = ">=1.0,<2.0" + +[package.extras] +speedups = ["Brotli", "aiodns", "cchardet"] + +[[package]] +name = "aiosignal" +version = "1.3.1" +description = "aiosignal: a list of registered asynchronous callbacks" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, + {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, +] + +[package.dependencies] +frozenlist = ">=1.1.0" + +[[package]] +name = "anyio" +version = "3.6.2" +description = "High level compatibility layer for multiple asynchronous event loop implementations" +category = "main" +optional = false +python-versions = ">=3.6.2" +files = [ + {file = "anyio-3.6.2-py3-none-any.whl", hash = "sha256:fbbe32bd270d2a2ef3ed1c5d45041250284e31fc0a4df4a5a6071842051a51e3"}, + {file = "anyio-3.6.2.tar.gz", hash = "sha256:25ea0d673ae30af41a0c442f81cf3b38c7e79fdc7b60335a4c14e05eb0947421"}, +] + +[package.dependencies] +idna = ">=2.8" +sniffio = ">=1.1" + +[package.extras] +doc = ["packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] +test = ["contextlib2", "coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "mock (>=4)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (<0.15)", "uvloop (>=0.15)"] +trio = ["trio (>=0.16,<0.22)"] + +[[package]] +name = "arrow" +version = "1.2.3" +description = "Better dates & times for Python" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "arrow-1.2.3-py3-none-any.whl", hash = "sha256:5a49ab92e3b7b71d96cd6bfcc4df14efefc9dfa96ea19045815914a6ab6b1fe2"}, + {file = "arrow-1.2.3.tar.gz", hash = "sha256:3934b30ca1b9f292376d9db15b19446088d12ec58629bc3f0da28fd55fb633a1"}, +] + +[package.dependencies] +python-dateutil = ">=2.7.0" + +[[package]] +name = "async-timeout" +version = "4.0.2" +description = "Timeout context manager for asyncio programs" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "async-timeout-4.0.2.tar.gz", hash = "sha256:2163e1640ddb52b7a8c80d0a67a08587e5d245cc9c553a74a847056bc2976b15"}, + {file = "async_timeout-4.0.2-py3-none-any.whl", hash = "sha256:8ca1e4fcf50d07413d66d1a5e416e42cfdf5851c981d679a09851a6853383b3c"}, +] + +[[package]] +name = "attrs" +version = "22.2.0" +description = "Classes Without Boilerplate" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "attrs-22.2.0-py3-none-any.whl", hash = "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836"}, + {file = "attrs-22.2.0.tar.gz", hash = "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"}, +] + +[package.extras] +cov = ["attrs[tests]", "coverage-enable-subprocess", "coverage[toml] (>=5.3)"] +dev = ["attrs[docs,tests]"] +docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope.interface"] +tests = ["attrs[tests-no-zope]", "zope.interface"] +tests-no-zope = ["cloudpickle", "cloudpickle", "hypothesis", "hypothesis", "mypy (>=0.971,<0.990)", "mypy (>=0.971,<0.990)", "pympler", "pympler", "pytest (>=4.3.0)", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-mypy-plugins", "pytest-xdist[psutil]", "pytest-xdist[psutil]"] + +[[package]] +name = "authlib" +version = "1.2.0" +description = "The ultimate Python library in building OAuth and OpenID Connect servers and clients." +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "Authlib-1.2.0-py2.py3-none-any.whl", hash = "sha256:4ddf4fd6cfa75c9a460b361d4bd9dac71ffda0be879dbe4292a02e92349ad55a"}, + {file = "Authlib-1.2.0.tar.gz", hash = "sha256:4fa3e80883a5915ef9f5bc28630564bc4ed5b5af39812a3ff130ec76bd631e9d"}, +] + +[package.dependencies] +cryptography = ">=3.2" + +[[package]] +name = "blobfile" +version = "2.0.1" +description = "Read GCS, ABS and local paths with the same interface, clone of tensorflow.io.gfile" +category = "main" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "blobfile-2.0.1-py3-none-any.whl", hash = "sha256:6f51b3e948f30a92e734a974b24fcd0f6a51841fd083de968899231481358da2"}, +] + +[package.dependencies] +filelock = ">=3.0,<4.0" +lxml = ">=4.9,<5.0" +pycryptodomex = ">=3.8,<4.0" +urllib3 = ">=1.25.3,<3" + +[[package]] +name = "certifi" +version = "2022.12.7" +description = "Python package for providing Mozilla's CA Bundle." +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2022.12.7-py3-none-any.whl", hash = "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"}, + {file = "certifi-2022.12.7.tar.gz", hash = "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3"}, +] + +[[package]] +name = "cffi" +version = "1.15.1" +description = "Foreign Function Interface for Python calling C code." +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "cffi-1.15.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2"}, + {file = "cffi-1.15.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:470c103ae716238bbe698d67ad020e1db9d9dba34fa5a899b5e21577e6d52ed2"}, + {file = "cffi-1.15.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:9ad5db27f9cabae298d151c85cf2bad1d359a1b9c686a275df03385758e2f914"}, + {file = "cffi-1.15.1-cp27-cp27m-win32.whl", hash = "sha256:b3bbeb01c2b273cca1e1e0c5df57f12dce9a4dd331b4fa1635b8bec26350bde3"}, + {file = "cffi-1.15.1-cp27-cp27m-win_amd64.whl", hash = "sha256:e00b098126fd45523dd056d2efba6c5a63b71ffe9f2bbe1a4fe1716e1d0c331e"}, + {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:d61f4695e6c866a23a21acab0509af1cdfd2c013cf256bbf5b6b5e2695827162"}, + {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:ed9cb427ba5504c1dc15ede7d516b84757c3e3d7868ccc85121d9310d27eed0b"}, + {file = "cffi-1.15.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:39d39875251ca8f612b6f33e6b1195af86d1b3e60086068be9cc053aa4376e21"}, + {file = "cffi-1.15.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:285d29981935eb726a4399badae8f0ffdff4f5050eaa6d0cfc3f64b857b77185"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3eb6971dcff08619f8d91607cfc726518b6fa2a9eba42856be181c6d0d9515fd"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21157295583fe8943475029ed5abdcf71eb3911894724e360acff1d61c1d54bc"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5635bd9cb9731e6d4a1132a498dd34f764034a8ce60cef4f5319c0541159392f"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2012c72d854c2d03e45d06ae57f40d78e5770d252f195b93f581acf3ba44496e"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd86c085fae2efd48ac91dd7ccffcfc0571387fe1193d33b6394db7ef31fe2a4"}, + {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:fa6693661a4c91757f4412306191b6dc88c1703f780c8234035eac011922bc01"}, + {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:59c0b02d0a6c384d453fece7566d1c7e6b7bae4fc5874ef2ef46d56776d61c9e"}, + {file = "cffi-1.15.1-cp310-cp310-win32.whl", hash = "sha256:cba9d6b9a7d64d4bd46167096fc9d2f835e25d7e4c121fb2ddfc6528fb0413b2"}, + {file = "cffi-1.15.1-cp310-cp310-win_amd64.whl", hash = "sha256:ce4bcc037df4fc5e3d184794f27bdaab018943698f4ca31630bc7f84a7b69c6d"}, + {file = "cffi-1.15.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3d08afd128ddaa624a48cf2b859afef385b720bb4b43df214f85616922e6a5ac"}, + {file = "cffi-1.15.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3799aecf2e17cf585d977b780ce79ff0dc9b78d799fc694221ce814c2c19db83"}, + {file = "cffi-1.15.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a591fe9e525846e4d154205572a029f653ada1a78b93697f3b5a8f1f2bc055b9"}, + {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3548db281cd7d2561c9ad9984681c95f7b0e38881201e157833a2342c30d5e8c"}, + {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91fc98adde3d7881af9b59ed0294046f3806221863722ba7d8d120c575314325"}, + {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94411f22c3985acaec6f83c6df553f2dbe17b698cc7f8ae751ff2237d96b9e3c"}, + {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:03425bdae262c76aad70202debd780501fabeaca237cdfddc008987c0e0f59ef"}, + {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cc4d65aeeaa04136a12677d3dd0b1c0c94dc43abac5860ab33cceb42b801c1e8"}, + {file = "cffi-1.15.1-cp311-cp311-win32.whl", hash = "sha256:a0f100c8912c114ff53e1202d0078b425bee3649ae34d7b070e9697f93c5d52d"}, + {file = "cffi-1.15.1-cp311-cp311-win_amd64.whl", hash = "sha256:04ed324bda3cda42b9b695d51bb7d54b680b9719cfab04227cdd1e04e5de3104"}, + {file = "cffi-1.15.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50a74364d85fd319352182ef59c5c790484a336f6db772c1a9231f1c3ed0cbd7"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e263d77ee3dd201c3a142934a086a4450861778baaeeb45db4591ef65550b0a6"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cec7d9412a9102bdc577382c3929b337320c4c4c4849f2c5cdd14d7368c5562d"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4289fc34b2f5316fbb762d75362931e351941fa95fa18789191b33fc4cf9504a"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:173379135477dc8cac4bc58f45db08ab45d228b3363adb7af79436135d028405"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6975a3fac6bc83c4a65c9f9fcab9e47019a11d3d2cf7f3c0d03431bf145a941e"}, + {file = "cffi-1.15.1-cp36-cp36m-win32.whl", hash = "sha256:2470043b93ff09bf8fb1d46d1cb756ce6132c54826661a32d4e4d132e1977adf"}, + {file = "cffi-1.15.1-cp36-cp36m-win_amd64.whl", hash = "sha256:30d78fbc8ebf9c92c9b7823ee18eb92f2e6ef79b45ac84db507f52fbe3ec4497"}, + {file = "cffi-1.15.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:198caafb44239b60e252492445da556afafc7d1e3ab7a1fb3f0584ef6d742375"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5ef34d190326c3b1f822a5b7a45f6c4535e2f47ed06fec77d3d799c450b2651e"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8102eaf27e1e448db915d08afa8b41d6c7ca7a04b7d73af6514df10a3e74bd82"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5df2768244d19ab7f60546d0c7c63ce1581f7af8b5de3eb3004b9b6fc8a9f84b"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8c4917bd7ad33e8eb21e9a5bbba979b49d9a97acb3a803092cbc1133e20343c"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2642fe3142e4cc4af0799748233ad6da94c62a8bec3a6648bf8ee68b1c7426"}, + {file = "cffi-1.15.1-cp37-cp37m-win32.whl", hash = "sha256:e229a521186c75c8ad9490854fd8bbdd9a0c9aa3a524326b55be83b54d4e0ad9"}, + {file = "cffi-1.15.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a0b71b1b8fbf2b96e41c4d990244165e2c9be83d54962a9a1d118fd8657d2045"}, + {file = "cffi-1.15.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:320dab6e7cb2eacdf0e658569d2575c4dad258c0fcc794f46215e1e39f90f2c3"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e74c6b51a9ed6589199c787bf5f9875612ca4a8a0785fb2d4a84429badaf22a"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5c84c68147988265e60416b57fc83425a78058853509c1b0629c180094904a5"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b926aa83d1edb5aa5b427b4053dc420ec295a08e40911296b9eb1b6170f6cca"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87c450779d0914f2861b8526e035c5e6da0a3199d8f1add1a665e1cbc6fc6d02"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f2c9f67e9821cad2e5f480bc8d83b8742896f1242dba247911072d4fa94c192"}, + {file = "cffi-1.15.1-cp38-cp38-win32.whl", hash = "sha256:8b7ee99e510d7b66cdb6c593f21c043c248537a32e0bedf02e01e9553a172314"}, + {file = "cffi-1.15.1-cp38-cp38-win_amd64.whl", hash = "sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5"}, + {file = "cffi-1.15.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:54a2db7b78338edd780e7ef7f9f6c442500fb0d41a5a4ea24fff1c929d5af585"}, + {file = "cffi-1.15.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fcd131dd944808b5bdb38e6f5b53013c5aa4f334c5cad0c72742f6eba4b73db0"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7473e861101c9e72452f9bf8acb984947aa1661a7704553a9f6e4baa5ba64415"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c9a799e985904922a4d207a94eae35c78ebae90e128f0c4e521ce339396be9d"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3bcde07039e586f91b45c88f8583ea7cf7a0770df3a1649627bf598332cb6984"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33ab79603146aace82c2427da5ca6e58f2b3f2fb5da893ceac0c42218a40be35"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27"}, + {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db0fbb9c62743ce59a9ff687eb5f4afbe77e5e8403d6697f7446e5f609976f76"}, + {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:98d85c6a2bef81588d9227dde12db8a7f47f639f4a17c9ae08e773aa9c697bf3"}, + {file = "cffi-1.15.1-cp39-cp39-win32.whl", hash = "sha256:40f4774f5a9d4f5e344f31a32b5096977b5d48560c5592e2f3d2c4374bd543ee"}, + {file = "cffi-1.15.1-cp39-cp39-win_amd64.whl", hash = "sha256:70df4e3b545a17496c9b3f41f5115e69a4f2e77e94e1d2a8e1070bc0c38c8a3c"}, + {file = "cffi-1.15.1.tar.gz", hash = "sha256:d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9"}, +] + +[package.dependencies] +pycparser = "*" + +[[package]] +name = "charset-normalizer" +version = "3.1.0" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.1.0.tar.gz", hash = "sha256:34e0a2f9c370eb95597aae63bf85eb5e96826d81e3dcf88b8886012906f509b5"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e0ac8959c929593fee38da1c2b64ee9778733cdf03c482c9ff1d508b6b593b2b"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d7fc3fca01da18fbabe4625d64bb612b533533ed10045a2ac3dd194bfa656b60"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:04eefcee095f58eaabe6dc3cc2262f3bcd776d2c67005880894f447b3f2cb9c1"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20064ead0717cf9a73a6d1e779b23d149b53daf971169289ed2ed43a71e8d3b0"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1435ae15108b1cb6fffbcea2af3d468683b7afed0169ad718451f8db5d1aff6f"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c84132a54c750fda57729d1e2599bb598f5fa0344085dbde5003ba429a4798c0"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75f2568b4189dda1c567339b48cba4ac7384accb9c2a7ed655cd86b04055c795"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11d3bcb7be35e7b1bba2c23beedac81ee893ac9871d0ba79effc7fc01167db6c"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:891cf9b48776b5c61c700b55a598621fdb7b1e301a550365571e9624f270c203"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:5f008525e02908b20e04707a4f704cd286d94718f48bb33edddc7d7b584dddc1"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:b06f0d3bf045158d2fb8837c5785fe9ff9b8c93358be64461a1089f5da983137"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:49919f8400b5e49e961f320c735388ee686a62327e773fa5b3ce6721f7e785ce"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:22908891a380d50738e1f978667536f6c6b526a2064156203d418f4856d6e86a"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-win32.whl", hash = "sha256:12d1a39aa6b8c6f6248bb54550efcc1c38ce0d8096a146638fd4738e42284448"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:65ed923f84a6844de5fd29726b888e58c62820e0769b76565480e1fdc3d062f8"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9a3267620866c9d17b959a84dd0bd2d45719b817245e49371ead79ed4f710d19"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6734e606355834f13445b6adc38b53c0fd45f1a56a9ba06c2058f86893ae8017"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f8303414c7b03f794347ad062c0516cee0e15f7a612abd0ce1e25caf6ceb47df"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aaf53a6cebad0eae578f062c7d462155eada9c172bd8c4d250b8c1d8eb7f916a"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3dc5b6a8ecfdc5748a7e429782598e4f17ef378e3e272eeb1340ea57c9109f41"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e1b25e3ad6c909f398df8921780d6a3d120d8c09466720226fc621605b6f92b1"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ca564606d2caafb0abe6d1b5311c2649e8071eb241b2d64e75a0d0065107e62"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b82fab78e0b1329e183a65260581de4375f619167478dddab510c6c6fb04d9b6"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bd7163182133c0c7701b25e604cf1611c0d87712e56e88e7ee5d72deab3e76b5"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:11d117e6c63e8f495412d37e7dc2e2fff09c34b2d09dbe2bee3c6229577818be"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:cf6511efa4801b9b38dc5546d7547d5b5c6ef4b081c60b23e4d941d0eba9cbeb"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:abc1185d79f47c0a7aaf7e2412a0eb2c03b724581139193d2d82b3ad8cbb00ac"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cb7b2ab0188829593b9de646545175547a70d9a6e2b63bf2cd87a0a391599324"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-win32.whl", hash = "sha256:c36bcbc0d5174a80d6cccf43a0ecaca44e81d25be4b7f90f0ed7bcfbb5a00909"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:cca4def576f47a09a943666b8f829606bcb17e2bc2d5911a46c8f8da45f56755"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0c95f12b74681e9ae127728f7e5409cbbef9cd914d5896ef238cc779b8152373"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fca62a8301b605b954ad2e9c3666f9d97f63872aa4efcae5492baca2056b74ab"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac0aa6cd53ab9a31d397f8303f92c42f534693528fafbdb997c82bae6e477ad9"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c3af8e0f07399d3176b179f2e2634c3ce9c1301379a6b8c9c9aeecd481da494f"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a5fc78f9e3f501a1614a98f7c54d3969f3ad9bba8ba3d9b438c3bc5d047dd28"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:628c985afb2c7d27a4800bfb609e03985aaecb42f955049957814e0491d4006d"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:74db0052d985cf37fa111828d0dd230776ac99c740e1a758ad99094be4f1803d"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1e8fcdd8f672a1c4fc8d0bd3a2b576b152d2a349782d1eb0f6b8e52e9954731d"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:04afa6387e2b282cf78ff3dbce20f0cc071c12dc8f685bd40960cc68644cfea6"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:dd5653e67b149503c68c4018bf07e42eeed6b4e956b24c00ccdf93ac79cdff84"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d2686f91611f9e17f4548dbf050e75b079bbc2a82be565832bc8ea9047b61c8c"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-win32.whl", hash = "sha256:4155b51ae05ed47199dc5b2a4e62abccb274cee6b01da5b895099b61b1982974"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:322102cdf1ab682ecc7d9b1c5eed4ec59657a65e1c146a0da342b78f4112db23"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e633940f28c1e913615fd624fcdd72fdba807bf53ea6925d6a588e84e1151531"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3a06f32c9634a8705f4ca9946d667609f52cf130d5548881401f1eb2c39b1e2c"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7381c66e0561c5757ffe616af869b916c8b4e42b367ab29fedc98481d1e74e14"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3573d376454d956553c356df45bb824262c397c6e26ce43e8203c4c540ee0acb"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e89df2958e5159b811af9ff0f92614dabf4ff617c03a4c1c6ff53bf1c399e0e1"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:78cacd03e79d009d95635e7d6ff12c21eb89b894c354bd2b2ed0b4763373693b"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de5695a6f1d8340b12a5d6d4484290ee74d61e467c39ff03b39e30df62cf83a0"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c60b9c202d00052183c9be85e5eaf18a4ada0a47d188a83c8f5c5b23252f649"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f645caaf0008bacf349875a974220f1f1da349c5dbe7c4ec93048cdc785a3326"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ea9f9c6034ea2d93d9147818f17c2a0860d41b71c38b9ce4d55f21b6f9165a11"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:80d1543d58bd3d6c271b66abf454d437a438dff01c3e62fdbcd68f2a11310d4b"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:73dc03a6a7e30b7edc5b01b601e53e7fc924b04e1835e8e407c12c037e81adbd"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6f5c2e7bc8a4bf7c426599765b1bd33217ec84023033672c1e9a8b35eaeaaaf8"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-win32.whl", hash = "sha256:12a2b561af122e3d94cdb97fe6fb2bb2b82cef0cdca131646fdb940a1eda04f0"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:3160a0fd9754aab7d47f95a6b63ab355388d890163eb03b2d2b87ab0a30cfa59"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:38e812a197bf8e71a59fe55b757a84c1f946d0ac114acafaafaf21667a7e169e"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6baf0baf0d5d265fa7944feb9f7451cc316bfe30e8df1a61b1bb08577c554f31"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8f25e17ab3039b05f762b0a55ae0b3632b2e073d9c8fc88e89aca31a6198e88f"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3747443b6a904001473370d7810aa19c3a180ccd52a7157aacc264a5ac79265e"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b116502087ce8a6b7a5f1814568ccbd0e9f6cfd99948aa59b0e241dc57cf739f"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d16fd5252f883eb074ca55cb622bc0bee49b979ae4e8639fff6ca3ff44f9f854"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21fa558996782fc226b529fdd2ed7866c2c6ec91cee82735c98a197fae39f706"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f6c7a8a57e9405cad7485f4c9d3172ae486cfef1344b5ddd8e5239582d7355e"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ac3775e3311661d4adace3697a52ac0bab17edd166087d493b52d4f4f553f9f0"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:10c93628d7497c81686e8e5e557aafa78f230cd9e77dd0c40032ef90c18f2230"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:6f4f4668e1831850ebcc2fd0b1cd11721947b6dc7c00bf1c6bd3c929ae14f2c7"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:0be65ccf618c1e7ac9b849c315cc2e8a8751d9cfdaa43027d4f6624bd587ab7e"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:53d0a3fa5f8af98a1e261de6a3943ca631c526635eb5817a87a59d9a57ebf48f"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-win32.whl", hash = "sha256:a04f86f41a8916fe45ac5024ec477f41f886b3c435da2d4e3d2709b22ab02af1"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:830d2948a5ec37c386d3170c483063798d7879037492540f10a475e3fd6f244b"}, + {file = "charset_normalizer-3.1.0-py3-none-any.whl", hash = "sha256:3d9098b479e78c85080c98e1e35ff40b4a31d8953102bb0fd7d1b6f8a2111a3d"}, +] + +[[package]] +name = "click" +version = "8.1.3" +description = "Composable command line interface toolkit" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, + {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "coverage" +version = "7.2.2" +description = "Code coverage measurement for Python" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "coverage-7.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c90e73bdecb7b0d1cea65a08cb41e9d672ac6d7995603d6465ed4914b98b9ad7"}, + {file = "coverage-7.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e2926b8abedf750c2ecf5035c07515770944acf02e1c46ab08f6348d24c5f94d"}, + {file = "coverage-7.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57b77b9099f172804e695a40ebaa374f79e4fb8b92f3e167f66facbf92e8e7f5"}, + {file = "coverage-7.2.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:efe1c0adad110bf0ad7fb59f833880e489a61e39d699d37249bdf42f80590169"}, + {file = "coverage-7.2.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2199988e0bc8325d941b209f4fd1c6fa007024b1442c5576f1a32ca2e48941e6"}, + {file = "coverage-7.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:81f63e0fb74effd5be736cfe07d710307cc0a3ccb8f4741f7f053c057615a137"}, + {file = "coverage-7.2.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:186e0fc9cf497365036d51d4d2ab76113fb74f729bd25da0975daab2e107fd90"}, + {file = "coverage-7.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:420f94a35e3e00a2b43ad5740f935358e24478354ce41c99407cddd283be00d2"}, + {file = "coverage-7.2.2-cp310-cp310-win32.whl", hash = "sha256:38004671848b5745bb05d4d621526fca30cee164db42a1f185615f39dc997292"}, + {file = "coverage-7.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:0ce383d5f56d0729d2dd40e53fe3afeb8f2237244b0975e1427bfb2cf0d32bab"}, + {file = "coverage-7.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3eb55b7b26389dd4f8ae911ba9bc8c027411163839dea4c8b8be54c4ee9ae10b"}, + {file = "coverage-7.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d2b96123a453a2d7f3995ddb9f28d01fd112319a7a4d5ca99796a7ff43f02af5"}, + {file = "coverage-7.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:299bc75cb2a41e6741b5e470b8c9fb78d931edbd0cd009c58e5c84de57c06731"}, + {file = "coverage-7.2.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5e1df45c23d4230e3d56d04414f9057eba501f78db60d4eeecfcb940501b08fd"}, + {file = "coverage-7.2.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:006ed5582e9cbc8115d2e22d6d2144a0725db542f654d9d4fda86793832f873d"}, + {file = "coverage-7.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d683d230b5774816e7d784d7ed8444f2a40e7a450e5720d58af593cb0b94a212"}, + {file = "coverage-7.2.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:8efb48fa743d1c1a65ee8787b5b552681610f06c40a40b7ef94a5b517d885c54"}, + {file = "coverage-7.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4c752d5264053a7cf2fe81c9e14f8a4fb261370a7bb344c2a011836a96fb3f57"}, + {file = "coverage-7.2.2-cp311-cp311-win32.whl", hash = "sha256:55272f33da9a5d7cccd3774aeca7a01e500a614eaea2a77091e9be000ecd401d"}, + {file = "coverage-7.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:92ebc1619650409da324d001b3a36f14f63644c7f0a588e331f3b0f67491f512"}, + {file = "coverage-7.2.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5afdad4cc4cc199fdf3e18088812edcf8f4c5a3c8e6cb69127513ad4cb7471a9"}, + {file = "coverage-7.2.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0484d9dd1e6f481b24070c87561c8d7151bdd8b044c93ac99faafd01f695c78e"}, + {file = "coverage-7.2.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d530191aa9c66ab4f190be8ac8cc7cfd8f4f3217da379606f3dd4e3d83feba69"}, + {file = "coverage-7.2.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ac0f522c3b6109c4b764ffec71bf04ebc0523e926ca7cbe6c5ac88f84faced0"}, + {file = "coverage-7.2.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ba279aae162b20444881fc3ed4e4f934c1cf8620f3dab3b531480cf602c76b7f"}, + {file = "coverage-7.2.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:53d0fd4c17175aded9c633e319360d41a1f3c6e352ba94edcb0fa5167e2bad67"}, + {file = "coverage-7.2.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c99cb7c26a3039a8a4ee3ca1efdde471e61b4837108847fb7d5be7789ed8fd9"}, + {file = "coverage-7.2.2-cp37-cp37m-win32.whl", hash = "sha256:5cc0783844c84af2522e3a99b9b761a979a3ef10fb87fc4048d1ee174e18a7d8"}, + {file = "coverage-7.2.2-cp37-cp37m-win_amd64.whl", hash = "sha256:817295f06eacdc8623dc4df7d8b49cea65925030d4e1e2a7c7218380c0072c25"}, + {file = "coverage-7.2.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6146910231ece63facfc5984234ad1b06a36cecc9fd0c028e59ac7c9b18c38c6"}, + {file = "coverage-7.2.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:387fb46cb8e53ba7304d80aadca5dca84a2fbf6fe3faf6951d8cf2d46485d1e5"}, + {file = "coverage-7.2.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:046936ab032a2810dcaafd39cc4ef6dd295df1a7cbead08fe996d4765fca9fe4"}, + {file = "coverage-7.2.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e627dee428a176ffb13697a2c4318d3f60b2ccdde3acdc9b3f304206ec130ccd"}, + {file = "coverage-7.2.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4fa54fb483decc45f94011898727802309a109d89446a3c76387d016057d2c84"}, + {file = "coverage-7.2.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:3668291b50b69a0c1ef9f462c7df2c235da3c4073f49543b01e7eb1dee7dd540"}, + {file = "coverage-7.2.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:7c20b731211261dc9739bbe080c579a1835b0c2d9b274e5fcd903c3a7821cf88"}, + {file = "coverage-7.2.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5764e1f7471cb8f64b8cda0554f3d4c4085ae4b417bfeab236799863703e5de2"}, + {file = "coverage-7.2.2-cp38-cp38-win32.whl", hash = "sha256:4f01911c010122f49a3e9bdc730eccc66f9b72bd410a3a9d3cb8448bb50d65d3"}, + {file = "coverage-7.2.2-cp38-cp38-win_amd64.whl", hash = "sha256:c448b5c9e3df5448a362208b8d4b9ed85305528313fca1b479f14f9fe0d873b8"}, + {file = "coverage-7.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bfe7085783cda55e53510482fa7b5efc761fad1abe4d653b32710eb548ebdd2d"}, + {file = "coverage-7.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9d22e94e6dc86de981b1b684b342bec5e331401599ce652900ec59db52940005"}, + {file = "coverage-7.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:507e4720791977934bba016101579b8c500fb21c5fa3cd4cf256477331ddd988"}, + {file = "coverage-7.2.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bc4803779f0e4b06a2361f666e76f5c2e3715e8e379889d02251ec911befd149"}, + {file = "coverage-7.2.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db8c2c5ace167fd25ab5dd732714c51d4633f58bac21fb0ff63b0349f62755a8"}, + {file = "coverage-7.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4f68ee32d7c4164f1e2c8797535a6d0a3733355f5861e0f667e37df2d4b07140"}, + {file = "coverage-7.2.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d52f0a114b6a58305b11a5cdecd42b2e7f1ec77eb20e2b33969d702feafdd016"}, + {file = "coverage-7.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:797aad79e7b6182cb49c08cc5d2f7aa7b2128133b0926060d0a8889ac43843be"}, + {file = "coverage-7.2.2-cp39-cp39-win32.whl", hash = "sha256:db45eec1dfccdadb179b0f9ca616872c6f700d23945ecc8f21bb105d74b1c5fc"}, + {file = "coverage-7.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:8dbe2647bf58d2c5a6c5bcc685f23b5f371909a5624e9f5cd51436d6a9f6c6ef"}, + {file = "coverage-7.2.2-pp37.pp38.pp39-none-any.whl", hash = "sha256:872d6ce1f5be73f05bea4df498c140b9e7ee5418bfa2cc8204e7f9b817caa968"}, + {file = "coverage-7.2.2.tar.gz", hash = "sha256:36dd42da34fe94ed98c39887b86db9d06777b1c8f860520e21126a75507024f2"}, +] + +[package.dependencies] +tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} + +[package.extras] +toml = ["tomli"] + +[[package]] +name = "cryptography" +version = "39.0.2" +description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "cryptography-39.0.2-cp36-abi3-macosx_10_12_universal2.whl", hash = "sha256:2725672bb53bb92dc7b4150d233cd4b8c59615cd8288d495eaa86db00d4e5c06"}, + {file = "cryptography-39.0.2-cp36-abi3-macosx_10_12_x86_64.whl", hash = "sha256:23df8ca3f24699167daf3e23e51f7ba7334d504af63a94af468f468b975b7dd7"}, + {file = "cryptography-39.0.2-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:eb40fe69cfc6f5cdab9a5ebd022131ba21453cf7b8a7fd3631f45bbf52bed612"}, + {file = "cryptography-39.0.2-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc0521cce2c1d541634b19f3ac661d7a64f9555135e9d8af3980965be717fd4a"}, + {file = "cryptography-39.0.2-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffd394c7896ed7821a6d13b24657c6a34b6e2650bd84ae063cf11ccffa4f1a97"}, + {file = "cryptography-39.0.2-cp36-abi3-manylinux_2_24_x86_64.whl", hash = "sha256:e8a0772016feeb106efd28d4a328e77dc2edae84dfbac06061319fdb669ff828"}, + {file = "cryptography-39.0.2-cp36-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:8f35c17bd4faed2bc7797d2a66cbb4f986242ce2e30340ab832e5d99ae60e011"}, + {file = "cryptography-39.0.2-cp36-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:b49a88ff802e1993b7f749b1eeb31134f03c8d5c956e3c125c75558955cda536"}, + {file = "cryptography-39.0.2-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:5f8c682e736513db7d04349b4f6693690170f95aac449c56f97415c6980edef5"}, + {file = "cryptography-39.0.2-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:d7d84a512a59f4412ca8549b01f94be4161c94efc598bf09d027d67826beddc0"}, + {file = "cryptography-39.0.2-cp36-abi3-win32.whl", hash = "sha256:c43ac224aabcbf83a947eeb8b17eaf1547bce3767ee2d70093b461f31729a480"}, + {file = "cryptography-39.0.2-cp36-abi3-win_amd64.whl", hash = "sha256:788b3921d763ee35dfdb04248d0e3de11e3ca8eb22e2e48fef880c42e1f3c8f9"}, + {file = "cryptography-39.0.2-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:d15809e0dbdad486f4ad0979753518f47980020b7a34e9fc56e8be4f60702fac"}, + {file = "cryptography-39.0.2-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:50cadb9b2f961757e712a9737ef33d89b8190c3ea34d0fb6675e00edbe35d074"}, + {file = "cryptography-39.0.2-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:103e8f7155f3ce2ffa0049fe60169878d47a4364b277906386f8de21c9234aa1"}, + {file = "cryptography-39.0.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:6236a9610c912b129610eb1a274bdc1350b5df834d124fa84729ebeaf7da42c3"}, + {file = "cryptography-39.0.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e944fe07b6f229f4c1a06a7ef906a19652bdd9fd54c761b0ff87e83ae7a30354"}, + {file = "cryptography-39.0.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:35d658536b0a4117c885728d1a7032bdc9a5974722ae298d6c533755a6ee3915"}, + {file = "cryptography-39.0.2-pp39-pypy39_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:30b1d1bfd00f6fc80d11300a29f1d8ab2b8d9febb6ed4a38a76880ec564fae84"}, + {file = "cryptography-39.0.2-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:e029b844c21116564b8b61216befabca4b500e6816fa9f0ba49527653cae2108"}, + {file = "cryptography-39.0.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:fa507318e427169ade4e9eccef39e9011cdc19534f55ca2f36ec3f388c1f70f3"}, + {file = "cryptography-39.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:8bc0008ef798231fac03fe7d26e82d601d15bd16f3afaad1c6113771566570f3"}, + {file = "cryptography-39.0.2.tar.gz", hash = "sha256:bc5b871e977c8ee5a1bbc42fa8d19bcc08baf0c51cbf1586b0e87a2694dde42f"}, +] + +[package.dependencies] +cffi = ">=1.12" + +[package.extras] +docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"] +docstest = ["pyenchant (>=1.6.11)", "sphinxcontrib-spelling (>=4.0.1)", "twine (>=1.12.0)"] +pep8test = ["black", "check-manifest", "mypy", "ruff", "types-pytz", "types-requests"] +sdist = ["setuptools-rust (>=0.11.4)"] +ssh = ["bcrypt (>=3.1.5)"] +test = ["hypothesis (>=1.11.4,!=3.79.2)", "iso8601", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-shard (>=0.1.2)", "pytest-subtests", "pytest-xdist", "pytz"] +test-randomorder = ["pytest-randomly"] +tox = ["tox"] + +[[package]] +name = "decorator" +version = "5.1.1" +description = "Decorators for Humans" +category = "main" +optional = false +python-versions = ">=3.5" +files = [ + {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, + {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, +] + +[[package]] +name = "dnspython" +version = "2.3.0" +description = "DNS toolkit" +category = "main" +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "dnspython-2.3.0-py3-none-any.whl", hash = "sha256:89141536394f909066cabd112e3e1a37e4e654db00a25308b0f130bc3152eb46"}, + {file = "dnspython-2.3.0.tar.gz", hash = "sha256:224e32b03eb46be70e12ef6d64e0be123a64e621ab4c0822ff6d450d52a540b9"}, +] + +[package.extras] +curio = ["curio (>=1.2,<2.0)", "sniffio (>=1.1,<2.0)"] +dnssec = ["cryptography (>=2.6,<40.0)"] +doh = ["h2 (>=4.1.0)", "httpx (>=0.21.1)", "requests (>=2.23.0,<3.0.0)", "requests-toolbelt (>=0.9.1,<0.11.0)"] +doq = ["aioquic (>=0.9.20)"] +idna = ["idna (>=2.1,<4.0)"] +trio = ["trio (>=0.14,<0.23)"] +wmi = ["wmi (>=1.5.1,<2.0.0)"] + +[[package]] +name = "docx2txt" +version = "0.8" +description = "A pure python-based utility to extract text and images from docx files." +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "docx2txt-0.8.tar.gz", hash = "sha256:2c06d98d7cfe2d3947e5760a57d924e3ff07745b379c8737723922e7009236e5"}, +] + +[[package]] +name = "exceptiongroup" +version = "1.1.1" +description = "Backport of PEP 654 (exception groups)" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.1.1-py3-none-any.whl", hash = "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e"}, + {file = "exceptiongroup-1.1.1.tar.gz", hash = "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"}, +] + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "fastapi" +version = "0.92.0" +description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "fastapi-0.92.0-py3-none-any.whl", hash = "sha256:ae7b97c778e2f2ec3fb3cb4fb14162129411d99907fb71920f6d69a524340ebf"}, + {file = "fastapi-0.92.0.tar.gz", hash = "sha256:023a0f5bd2c8b2609014d3bba1e14a1d7df96c6abea0a73070621c9862b9a4de"}, +] + +[package.dependencies] +pydantic = ">=1.6.2,<1.7 || >1.7,<1.7.1 || >1.7.1,<1.7.2 || >1.7.2,<1.7.3 || >1.7.3,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0" +starlette = ">=0.25.0,<0.26.0" + +[package.extras] +all = ["email-validator (>=1.1.1)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "python-multipart (>=0.0.5)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] +dev = ["pre-commit (>=2.17.0,<3.0.0)", "ruff (==0.0.138)", "uvicorn[standard] (>=0.12.0,<0.21.0)"] +doc = ["mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-markdownextradata-plugin (>=0.1.7,<0.3.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pyyaml (>=5.3.1,<7.0.0)", "typer[all] (>=0.6.1,<0.8.0)"] +test = ["anyio[trio] (>=3.2.1,<4.0.0)", "black (==22.10.0)", "coverage[toml] (>=6.5.0,<8.0)", "databases[sqlite] (>=0.3.2,<0.7.0)", "email-validator (>=1.1.1,<2.0.0)", "flask (>=1.1.2,<3.0.0)", "httpx (>=0.23.0,<0.24.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.982)", "orjson (>=3.2.1,<4.0.0)", "passlib[bcrypt] (>=1.7.2,<2.0.0)", "peewee (>=3.13.3,<4.0.0)", "pytest (>=7.1.3,<8.0.0)", "python-jose[cryptography] (>=3.3.0,<4.0.0)", "python-multipart (>=0.0.5,<0.0.6)", "pyyaml (>=5.3.1,<7.0.0)", "ruff (==0.0.138)", "sqlalchemy (>=1.3.18,<1.4.43)", "types-orjson (==3.6.2)", "types-ujson (==5.6.0.0)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,<6.0.0)"] + +[[package]] +name = "filelock" +version = "3.10.2" +description = "A platform independent file lock." +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "filelock-3.10.2-py3-none-any.whl", hash = "sha256:eb8f0f2d37ed68223ea63e3bddf2fac99667e4362c88b3f762e434d160190d18"}, + {file = "filelock-3.10.2.tar.gz", hash = "sha256:75997740323c5f12e18f10b494bc11c03e42843129f980f17c04352cc7b09d40"}, +] + +[package.extras] +docs = ["furo (>=2022.12.7)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.22,!=1.23.4)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.2.2)", "pytest (>=7.2.2)", "pytest-cov (>=4)", "pytest-timeout (>=2.1)"] + +[[package]] +name = "frozenlist" +version = "1.3.3" +description = "A list-like structure which implements collections.abc.MutableSequence" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "frozenlist-1.3.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff8bf625fe85e119553b5383ba0fb6aa3d0ec2ae980295aaefa552374926b3f4"}, + {file = "frozenlist-1.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dfbac4c2dfcc082fcf8d942d1e49b6aa0766c19d3358bd86e2000bf0fa4a9cf0"}, + {file = "frozenlist-1.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b1c63e8d377d039ac769cd0926558bb7068a1f7abb0f003e3717ee003ad85530"}, + {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7fdfc24dcfce5b48109867c13b4cb15e4660e7bd7661741a391f821f23dfdca7"}, + {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2c926450857408e42f0bbc295e84395722ce74bae69a3b2aa2a65fe22cb14b99"}, + {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1841e200fdafc3d51f974d9d377c079a0694a8f06de2e67b48150328d66d5483"}, + {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f470c92737afa7d4c3aacc001e335062d582053d4dbe73cda126f2d7031068dd"}, + {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:783263a4eaad7c49983fe4b2e7b53fa9770c136c270d2d4bbb6d2192bf4d9caf"}, + {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:924620eef691990dfb56dc4709f280f40baee568c794b5c1885800c3ecc69816"}, + {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ae4dc05c465a08a866b7a1baf360747078b362e6a6dbeb0c57f234db0ef88ae0"}, + {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:bed331fe18f58d844d39ceb398b77d6ac0b010d571cba8267c2e7165806b00ce"}, + {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:02c9ac843e3390826a265e331105efeab489ffaf4dd86384595ee8ce6d35ae7f"}, + {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9545a33965d0d377b0bc823dcabf26980e77f1b6a7caa368a365a9497fb09420"}, + {file = "frozenlist-1.3.3-cp310-cp310-win32.whl", hash = "sha256:d5cd3ab21acbdb414bb6c31958d7b06b85eeb40f66463c264a9b343a4e238642"}, + {file = "frozenlist-1.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:b756072364347cb6aa5b60f9bc18e94b2f79632de3b0190253ad770c5df17db1"}, + {file = "frozenlist-1.3.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b4395e2f8d83fbe0c627b2b696acce67868793d7d9750e90e39592b3626691b7"}, + {file = "frozenlist-1.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:14143ae966a6229350021384870458e4777d1eae4c28d1a7aa47f24d030e6678"}, + {file = "frozenlist-1.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5d8860749e813a6f65bad8285a0520607c9500caa23fea6ee407e63debcdbef6"}, + {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23d16d9f477bb55b6154654e0e74557040575d9d19fe78a161bd33d7d76808e8"}, + {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eb82dbba47a8318e75f679690190c10a5e1f447fbf9df41cbc4c3afd726d88cb"}, + {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9309869032abb23d196cb4e4db574232abe8b8be1339026f489eeb34a4acfd91"}, + {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a97b4fe50b5890d36300820abd305694cb865ddb7885049587a5678215782a6b"}, + {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c188512b43542b1e91cadc3c6c915a82a5eb95929134faf7fd109f14f9892ce4"}, + {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:303e04d422e9b911a09ad499b0368dc551e8c3cd15293c99160c7f1f07b59a48"}, + {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0771aed7f596c7d73444c847a1c16288937ef988dc04fb9f7be4b2aa91db609d"}, + {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:66080ec69883597e4d026f2f71a231a1ee9887835902dbe6b6467d5a89216cf6"}, + {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:41fe21dc74ad3a779c3d73a2786bdf622ea81234bdd4faf90b8b03cad0c2c0b4"}, + {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f20380df709d91525e4bee04746ba612a4df0972c1b8f8e1e8af997e678c7b81"}, + {file = "frozenlist-1.3.3-cp311-cp311-win32.whl", hash = "sha256:f30f1928162e189091cf4d9da2eac617bfe78ef907a761614ff577ef4edfb3c8"}, + {file = "frozenlist-1.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:a6394d7dadd3cfe3f4b3b186e54d5d8504d44f2d58dcc89d693698e8b7132b32"}, + {file = "frozenlist-1.3.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8df3de3a9ab8325f94f646609a66cbeeede263910c5c0de0101079ad541af332"}, + {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0693c609e9742c66ba4870bcee1ad5ff35462d5ffec18710b4ac89337ff16e27"}, + {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cd4210baef299717db0a600d7a3cac81d46ef0e007f88c9335db79f8979c0d3d"}, + {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:394c9c242113bfb4b9aa36e2b80a05ffa163a30691c7b5a29eba82e937895d5e"}, + {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6327eb8e419f7d9c38f333cde41b9ae348bec26d840927332f17e887a8dcb70d"}, + {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e24900aa13212e75e5b366cb9065e78bbf3893d4baab6052d1aca10d46d944c"}, + {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3843f84a6c465a36559161e6c59dce2f2ac10943040c2fd021cfb70d58c4ad56"}, + {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:84610c1502b2461255b4c9b7d5e9c48052601a8957cd0aea6ec7a7a1e1fb9420"}, + {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:c21b9aa40e08e4f63a2f92ff3748e6b6c84d717d033c7b3438dd3123ee18f70e"}, + {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:efce6ae830831ab6a22b9b4091d411698145cb9b8fc869e1397ccf4b4b6455cb"}, + {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:40de71985e9042ca00b7953c4f41eabc3dc514a2d1ff534027f091bc74416401"}, + {file = "frozenlist-1.3.3-cp37-cp37m-win32.whl", hash = "sha256:180c00c66bde6146a860cbb81b54ee0df350d2daf13ca85b275123bbf85de18a"}, + {file = "frozenlist-1.3.3-cp37-cp37m-win_amd64.whl", hash = "sha256:9bbbcedd75acdfecf2159663b87f1bb5cfc80e7cd99f7ddd9d66eb98b14a8411"}, + {file = "frozenlist-1.3.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:034a5c08d36649591be1cbb10e09da9f531034acfe29275fc5454a3b101ce41a"}, + {file = "frozenlist-1.3.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ba64dc2b3b7b158c6660d49cdb1d872d1d0bf4e42043ad8d5006099479a194e5"}, + {file = "frozenlist-1.3.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:47df36a9fe24054b950bbc2db630d508cca3aa27ed0566c0baf661225e52c18e"}, + {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:008a054b75d77c995ea26629ab3a0c0d7281341f2fa7e1e85fa6153ae29ae99c"}, + {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:841ea19b43d438a80b4de62ac6ab21cfe6827bb8a9dc62b896acc88eaf9cecba"}, + {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e235688f42b36be2b6b06fc37ac2126a73b75fb8d6bc66dd632aa35286238703"}, + {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca713d4af15bae6e5d79b15c10c8522859a9a89d3b361a50b817c98c2fb402a2"}, + {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ac5995f2b408017b0be26d4a1d7c61bce106ff3d9e3324374d66b5964325448"}, + {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a4ae8135b11652b08a8baf07631d3ebfe65a4c87909dbef5fa0cdde440444ee4"}, + {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4ea42116ceb6bb16dbb7d526e242cb6747b08b7710d9782aa3d6732bd8d27649"}, + {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:810860bb4bdce7557bc0febb84bbd88198b9dbc2022d8eebe5b3590b2ad6c842"}, + {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:ee78feb9d293c323b59a6f2dd441b63339a30edf35abcb51187d2fc26e696d13"}, + {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0af2e7c87d35b38732e810befb9d797a99279cbb85374d42ea61c1e9d23094b3"}, + {file = "frozenlist-1.3.3-cp38-cp38-win32.whl", hash = "sha256:899c5e1928eec13fd6f6d8dc51be23f0d09c5281e40d9cf4273d188d9feeaf9b"}, + {file = "frozenlist-1.3.3-cp38-cp38-win_amd64.whl", hash = "sha256:7f44e24fa70f6fbc74aeec3e971f60a14dde85da364aa87f15d1be94ae75aeef"}, + {file = "frozenlist-1.3.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2b07ae0c1edaa0a36339ec6cce700f51b14a3fc6545fdd32930d2c83917332cf"}, + {file = "frozenlist-1.3.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ebb86518203e12e96af765ee89034a1dbb0c3c65052d1b0c19bbbd6af8a145e1"}, + {file = "frozenlist-1.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5cf820485f1b4c91e0417ea0afd41ce5cf5965011b3c22c400f6d144296ccbc0"}, + {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c11e43016b9024240212d2a65043b70ed8dfd3b52678a1271972702d990ac6d"}, + {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8fa3c6e3305aa1146b59a09b32b2e04074945ffcfb2f0931836d103a2c38f936"}, + {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:352bd4c8c72d508778cf05ab491f6ef36149f4d0cb3c56b1b4302852255d05d5"}, + {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:65a5e4d3aa679610ac6e3569e865425b23b372277f89b5ef06cf2cdaf1ebf22b"}, + {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e2c1185858d7e10ff045c496bbf90ae752c28b365fef2c09cf0fa309291669"}, + {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f163d2fd041c630fed01bc48d28c3ed4a3b003c00acd396900e11ee5316b56bb"}, + {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:05cdb16d09a0832eedf770cb7bd1fe57d8cf4eaf5aced29c4e41e3f20b30a784"}, + {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:8bae29d60768bfa8fb92244b74502b18fae55a80eac13c88eb0b496d4268fd2d"}, + {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:eedab4c310c0299961ac285591acd53dc6723a1ebd90a57207c71f6e0c2153ab"}, + {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3bbdf44855ed8f0fbcd102ef05ec3012d6a4fd7c7562403f76ce6a52aeffb2b1"}, + {file = "frozenlist-1.3.3-cp39-cp39-win32.whl", hash = "sha256:efa568b885bca461f7c7b9e032655c0c143d305bf01c30caf6db2854a4532b38"}, + {file = "frozenlist-1.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:cfe33efc9cb900a4c46f91a5ceba26d6df370ffddd9ca386eb1d4f0ad97b9ea9"}, + {file = "frozenlist-1.3.3.tar.gz", hash = "sha256:58bcc55721e8a90b88332d6cd441261ebb22342e238296bb330968952fbb3a6a"}, +] + +[[package]] +name = "grpcio" +version = "1.47.5" +description = "HTTP/2-based RPC framework" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "grpcio-1.47.5-cp310-cp310-linux_armv7l.whl", hash = "sha256:acc73289d0c44650aa1f21eccfa967f5623b01c3b5e2b4596fe5f9c5bf10956d"}, + {file = "grpcio-1.47.5-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:f3174c798959998876d546944523a558f78a9b9feb22a2cbaaa3822f2e158653"}, + {file = "grpcio-1.47.5-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:64401ee6d54b4d5869bcba4be3cae9f2e335c44a39ba1e29991ad22cfe2abacb"}, + {file = "grpcio-1.47.5-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:39a07eb5e7ec9277e5d124fb0e2d4f51ddbaadc2abdd27e8bbf1716dcf45e581"}, + {file = "grpcio-1.47.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:874b138ca95a6375ae6f6a12c10a348827c9aa8fbd05d025b87b5e050ab55b46"}, + {file = "grpcio-1.47.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:90539369afba42fc921cdda9d5f697a421f05a2e82ba58342ffbe88aa586019e"}, + {file = "grpcio-1.47.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2b18f970514bbc76547928e26d0cec06996ce3f947a3634b3adbe79d0e48e980"}, + {file = "grpcio-1.47.5-cp310-cp310-win32.whl", hash = "sha256:44c52923be0c4a0f662de43644679c6356960c38c4edf44864c23b998693c7cc"}, + {file = "grpcio-1.47.5-cp310-cp310-win_amd64.whl", hash = "sha256:07761f427551fced386db8c78701d6a167b2a682aa8df808303dd0a0d44bf6c9"}, + {file = "grpcio-1.47.5-cp36-cp36m-linux_armv7l.whl", hash = "sha256:10eb026bf75568de06933366f0340d2b4b207425c74a5640aa1812b8b69e7d9d"}, + {file = "grpcio-1.47.5-cp36-cp36m-macosx_10_10_universal2.whl", hash = "sha256:4f8e7fba6b1150a63aebd04d03be779de4ea4c4a8b28869e7a3c8f0b3ec59edc"}, + {file = "grpcio-1.47.5-cp36-cp36m-manylinux_2_17_aarch64.whl", hash = "sha256:36d93b19c214bc654fc50ae65cce84b8f7698159191b9d3f21f9ad92ae7bc325"}, + {file = "grpcio-1.47.5-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4e59f916bf58528e55893743151c6bd9f0a393fddfe411a6fffd29a300e6acf2"}, + {file = "grpcio-1.47.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18f8b2d316a3be464eb2a20afa7026a235a07a0094be879876611206d8026679"}, + {file = "grpcio-1.47.5-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:0c3076957cd2aea34fe69384453315fd765948eb6cb73a12f332277308d04b76"}, + {file = "grpcio-1.47.5-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:007f5ad07d2f3a4a422c1df589a0d25e918b96d8f6069cb6f0254386a5f09262"}, + {file = "grpcio-1.47.5-cp36-cp36m-win32.whl", hash = "sha256:01ac149a5ca9512277b1d2fe85687099f3e442c6f9f924eae003a6700735e23e"}, + {file = "grpcio-1.47.5-cp36-cp36m-win_amd64.whl", hash = "sha256:a32ccc88950f2be619157201161e70a5e5ed9e2427662bb2e60f1a8cea7d0db6"}, + {file = "grpcio-1.47.5-cp37-cp37m-linux_armv7l.whl", hash = "sha256:ec71f15258e086acadb13ec06e4e4c54eb0f5455cd4c618997f847874d5ff9ea"}, + {file = "grpcio-1.47.5-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:4bbf5a63497dbd5e44c4335cab153796a4274be17ca40ec971a7749c3f4fef6a"}, + {file = "grpcio-1.47.5-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:11e1bc97e88232201256b718c63a8a1fd86ec6fca3a501293be5c5e423de9d56"}, + {file = "grpcio-1.47.5-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e568d84fed80713d2fa3221552beee27ed8034f7eff52bb7871bf5ffe4d4ca78"}, + {file = "grpcio-1.47.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb4c838de8e1e7194d3f9a679fd76cc44a1dbe81f18bd39ee233c72347d772bf"}, + {file = "grpcio-1.47.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a74c19baf2f8127b44b3f58e2a5801a17992dae9a20197b4a8fa26e2ea79742b"}, + {file = "grpcio-1.47.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e369ed5ecff11ef85666cabbb5736495604e052c8dc2c03a2104f99dfd0a59e3"}, + {file = "grpcio-1.47.5-cp37-cp37m-win32.whl", hash = "sha256:ccb741fab5117aea981d4ac341d2ce1e588f515f83091807d4e2bb388ed59edd"}, + {file = "grpcio-1.47.5-cp37-cp37m-win_amd64.whl", hash = "sha256:af9d3b075dfcbc343d44b0e98725ba6d56dc0669e61905a4e71e8f4409cfefbd"}, + {file = "grpcio-1.47.5-cp38-cp38-linux_armv7l.whl", hash = "sha256:cac6847a4b9a7e7a1f270a71fef1c17c2e8a6b411c0ca48080ce1e08d284aded"}, + {file = "grpcio-1.47.5-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:54a3e17d155b6fb141e1fbb7c47d30556bec4c940b66ff4d9513536e2e214d4a"}, + {file = "grpcio-1.47.5-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:d1873c0b84a0ffb129f75e7c8be45d2cae427baf0b090d15b9ff46c1841c3f53"}, + {file = "grpcio-1.47.5-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e209df91cf8dfb335c2e26784702b0e12c20dc4de7b9b6d2cccd968146155f06"}, + {file = "grpcio-1.47.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:350e2627684f93f8b59af9c76a03eeb4aa145ecc589569137d4518486f4f1727"}, + {file = "grpcio-1.47.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:23754807314c5aa4c26eb1c50aaf506801a2f7825951100280d2c013b127436f"}, + {file = "grpcio-1.47.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:503c3fa0045f3ef80aa1ad082eac6a888081da2e1cd793f281ed499831e4c498"}, + {file = "grpcio-1.47.5-cp38-cp38-win32.whl", hash = "sha256:a4eecfbe994c88996461bd1459e43ea460952d4147f53e8c18e089764e6808f5"}, + {file = "grpcio-1.47.5-cp38-cp38-win_amd64.whl", hash = "sha256:941927ae4d589a2fef5c22b9c47df9e5e613c737bd750bafc3a9547cc506017c"}, + {file = "grpcio-1.47.5-cp39-cp39-linux_armv7l.whl", hash = "sha256:9891c77e69bd4109c25c1bea51d78fbc5ba2fcd9445bf99225bb8fb03d849913"}, + {file = "grpcio-1.47.5-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:61e83778d85dbbbd7446451ec28b7261e9ebba489cc8c262dfe8fedc119f769b"}, + {file = "grpcio-1.47.5-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:21ccfc0e989531cbdc93c54a7581ea5f7c46bf585016d9320b4be042f1e02374"}, + {file = "grpcio-1.47.5-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bea35a0114a39827ffe59f73950d242f95d59a9ac2009ae8da7b065c06f0a57f"}, + {file = "grpcio-1.47.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97e75b9e52eeb9d1335aaeecf581cb3cea7fc4bafd7bd675c83f208a386a42a8"}, + {file = "grpcio-1.47.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:1fb86f95228827b55e860278d142326af4489c0f4220975780daff325fc87172"}, + {file = "grpcio-1.47.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c9b83183525afe58dd9e7bb249f9e55df326e3c3834d09ea476c7a6bb12f73ee"}, + {file = "grpcio-1.47.5-cp39-cp39-win32.whl", hash = "sha256:00bff7492875ab04ec5ed3d92550d8f8aa423151e187b79684c8a22c7a6f1670"}, + {file = "grpcio-1.47.5-cp39-cp39-win_amd64.whl", hash = "sha256:2b32adae820cc0347e5e44efe91b661b436dbca73f25c5763cadb1cafd1dca10"}, + {file = "grpcio-1.47.5.tar.gz", hash = "sha256:b62b8bea0c94b4603bb4c8332d8a814375120bea3c2dbeb71397213bde5ea832"}, +] + +[package.dependencies] +six = ">=1.5.2" + +[package.extras] +protobuf = ["grpcio-tools (>=1.47.5)"] + +[[package]] +name = "grpcio-tools" +version = "1.47.5" +description = "Protobuf code generator for gRPC" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "grpcio-tools-1.47.5.tar.gz", hash = "sha256:62ced60566a4cbcf35c57e887e2e68b4f108b3474ef3ec0022d38cd579345f92"}, + {file = "grpcio_tools-1.47.5-cp310-cp310-linux_armv7l.whl", hash = "sha256:9f92c561b245a562110bd84d3b64b016c8af5afde39febf1f71553ae56f6e8e4"}, + {file = "grpcio_tools-1.47.5-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:a0a991844a024705ad177cb858d36e3e6b329ea4a78b7f4c597b2817fc2692e7"}, + {file = "grpcio_tools-1.47.5-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:935976d5436d4306de052d1e00848fa25abc667e185aaaffcd367915f33a67c7"}, + {file = "grpcio_tools-1.47.5-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2481dba6a30d415a4756cd88cc380780e3f00bb41d56b8f6547bc3c09c6f4e7f"}, + {file = "grpcio_tools-1.47.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e62176978faa96b21e4e821e7070b0feed919726ff730c0b3b7e8d106ddb45bf"}, + {file = "grpcio_tools-1.47.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:728eb1f4ef6d380366a2de9940d1f910ece8bf4e44de5ca935cd16d4394e82ff"}, + {file = "grpcio_tools-1.47.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d58982c747e107f65c7307ec1646cce105b0785088287bf209f545377aeedaf4"}, + {file = "grpcio_tools-1.47.5-cp310-cp310-win32.whl", hash = "sha256:ea6d8f07b087bc2d579b7727daee2abf38fe5dc475c9e7c4f16b4a2c31895319"}, + {file = "grpcio_tools-1.47.5-cp310-cp310-win_amd64.whl", hash = "sha256:5e7a4e68072639fa767bde1011f5d83f4461a8e60651ea202af597777ee1ffd7"}, + {file = "grpcio_tools-1.47.5-cp36-cp36m-linux_armv7l.whl", hash = "sha256:bb1e066fc50ef7503b024924858658692d3e98582a9727b156f2f845da70e11e"}, + {file = "grpcio_tools-1.47.5-cp36-cp36m-macosx_10_10_universal2.whl", hash = "sha256:7d3e397a27e652ae6579f1f7dc3fc0c771db977ccaaded1fe113e882df425c15"}, + {file = "grpcio_tools-1.47.5-cp36-cp36m-manylinux_2_17_aarch64.whl", hash = "sha256:b19d8f1e8422826d49fc428acc66b69aa450c70f7090681df32d535188edf524"}, + {file = "grpcio_tools-1.47.5-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a0e017bd1022bc981fa1629e757e0d3d4a1991f999fb90ec714c2683fe05b8fa"}, + {file = "grpcio_tools-1.47.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abb56ea33c4a33ee3b707f62339fd579e1a8dbbfeb7665d7ff85ee837cf64794"}, + {file = "grpcio_tools-1.47.5-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:02882ff2f703b75d343991608b39104f1621508cf407e427a75c1794ed0fac95"}, + {file = "grpcio_tools-1.47.5-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:84395aacae4f8a3358ad648a8bacf6b15bbb8946d8cf73f47dc77cfe1a154d48"}, + {file = "grpcio_tools-1.47.5-cp36-cp36m-win32.whl", hash = "sha256:de8901c64a1091cc474318e7a013af8c30feba34c7954c29ca8f477baf07db28"}, + {file = "grpcio_tools-1.47.5-cp36-cp36m-win_amd64.whl", hash = "sha256:37cb5c3d94ba1efef0d17a66e5e69b177fc934389eda8b76b161a6623e45e714"}, + {file = "grpcio_tools-1.47.5-cp37-cp37m-linux_armv7l.whl", hash = "sha256:5c2d3a35e9341ea9c68afe289054bd8604eda4214e6d916f97b19a316537a296"}, + {file = "grpcio_tools-1.47.5-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:89733edb89ec28e52dd9cc25e90b78248b6edd265f564726be2a9c4b4ee78479"}, + {file = "grpcio_tools-1.47.5-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:489f41535d779287759942c6cced93c4219ea53dad46ebdc4faca6220e1dba88"}, + {file = "grpcio_tools-1.47.5-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:072c84f561912400363b81af6bf5424c38fab80f0c9436c0fe19b2e7c2bcf15c"}, + {file = "grpcio_tools-1.47.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c650233420279f943bd1dcf286742aaeb4db7cc5f6554a5e8c16c2e4fa19a28f"}, + {file = "grpcio_tools-1.47.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:dab220aba6b5777b16df5c5b3a30f831cdbc4f493eabdaf9f6585691bad5496a"}, + {file = "grpcio_tools-1.47.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:309ca8508f361895ef2d4f533611272228d2412c8cae754b695673c7c65a2f8b"}, + {file = "grpcio_tools-1.47.5-cp37-cp37m-win32.whl", hash = "sha256:f8ce5fb65e97866257943cbf6d504195ab55e01ef467988d86322a36041b6de8"}, + {file = "grpcio_tools-1.47.5-cp37-cp37m-win_amd64.whl", hash = "sha256:b9154a18b0ad2bc4b9ceadedd7b67bb65b500b3427495b4d224a1a835aa55ce6"}, + {file = "grpcio_tools-1.47.5-cp38-cp38-linux_armv7l.whl", hash = "sha256:aaa4063bc05a18f32ae98e414e2472477468b966b9a1425c41eec160250beff2"}, + {file = "grpcio_tools-1.47.5-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:093da28f8ce3a0eedd5370b9f09f815fb6c01fd663d60734eab5b300b9a305ec"}, + {file = "grpcio_tools-1.47.5-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:0771f57585b9070086dec509b02fa2804a9d4c395e95cd7a6cb42d8f4b5683f7"}, + {file = "grpcio_tools-1.47.5-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:68d4cdc674c8596da8e25cf37741aab3f07bdf38731510a92019e5ec57f5fcea"}, + {file = "grpcio_tools-1.47.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:08fdce5549acca9fd7a45084c62e8ab0a1ca1c530bcbfa089625e9523f224023"}, + {file = "grpcio_tools-1.47.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8431b9ee083bec444ca6d48705b89774f97ba0a75e8c33ef3b9a2dc6ed2aa584"}, + {file = "grpcio_tools-1.47.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:baf37376da0062155d728fb9a1d522ea8f5039ebf774885d269f7772cbc3a2e6"}, + {file = "grpcio_tools-1.47.5-cp38-cp38-win32.whl", hash = "sha256:b65a59698f938fa59fd756799cd641c3755fb09cb95de008e4d67a9e5b1af6d5"}, + {file = "grpcio_tools-1.47.5-cp38-cp38-win_amd64.whl", hash = "sha256:17c2b5ce8b3100c8da4ae5070d8d2c2466f174e66d8127fb85ef8a7937a03853"}, + {file = "grpcio_tools-1.47.5-cp39-cp39-linux_armv7l.whl", hash = "sha256:9070301f079fef76fb0d51b84f393c6738587f3a16a2f0ced303362b0cc0ecf6"}, + {file = "grpcio_tools-1.47.5-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:5bcf01116a4d3bed2faf832f8c5618d1c69473576f3925240e3c5042dfbc115e"}, + {file = "grpcio_tools-1.47.5-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:b555b954aa213eac8efe7df507a178c3ab7323df9f501846a1bbccdf81354831"}, + {file = "grpcio_tools-1.47.5-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7604e08530b3edc688e41aa8af46051478d417b08afdf6fc2eafb5eb90528a26"}, + {file = "grpcio_tools-1.47.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d3f80818a560abee8189c4f0b074f45c16309b4596e013cb6ce105a022c5965"}, + {file = "grpcio_tools-1.47.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:c801ebd7fa2304ff85aa15147f134aefe33132d85308c43e46f6a5be78b5a8a8"}, + {file = "grpcio_tools-1.47.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:235adfc22e9c703533573344de1d2394ddd92b27c82eb259bb5fb46f885159b8"}, + {file = "grpcio_tools-1.47.5-cp39-cp39-win32.whl", hash = "sha256:d659c257cbb48c843931b584d3c3da5473fa17275e0d04af79c9e9fdd6077179"}, + {file = "grpcio_tools-1.47.5-cp39-cp39-win_amd64.whl", hash = "sha256:9d121c63ff2fddeae2c65f6675eb944f47808a242b647d80b4661b2c5e1e6732"}, +] + +[package.dependencies] +grpcio = ">=1.47.5" +protobuf = ">=3.12.0,<4.0dev" +setuptools = "*" + +[[package]] +name = "h11" +version = "0.14.0" +description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, + {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, +] + +[[package]] +name = "h2" +version = "4.1.0" +description = "HTTP/2 State-Machine based protocol implementation" +category = "main" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"}, + {file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"}, +] + +[package.dependencies] +hpack = ">=4.0,<5" +hyperframe = ">=6.0,<7" + +[[package]] +name = "hpack" +version = "4.0.0" +description = "Pure-Python HPACK header compression" +category = "main" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"}, + {file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"}, +] + +[[package]] +name = "httpcore" +version = "0.16.3" +description = "A minimal low-level HTTP client." +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "httpcore-0.16.3-py3-none-any.whl", hash = "sha256:da1fb708784a938aa084bde4feb8317056c55037247c787bd7e19eb2c2949dc0"}, + {file = "httpcore-0.16.3.tar.gz", hash = "sha256:c5d6f04e2fc530f39e0c077e6a30caa53f1451096120f1f38b954afd0b17c0cb"}, +] + +[package.dependencies] +anyio = ">=3.0,<5.0" +certifi = "*" +h11 = ">=0.13,<0.15" +sniffio = ">=1.0.0,<2.0.0" + +[package.extras] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (>=1.0.0,<2.0.0)"] + +[[package]] +name = "httpx" +version = "0.23.3" +description = "The next generation HTTP client." +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "httpx-0.23.3-py3-none-any.whl", hash = "sha256:a211fcce9b1254ea24f0cd6af9869b3d29aba40154e947d2a07bb499b3e310d6"}, + {file = "httpx-0.23.3.tar.gz", hash = "sha256:9818458eb565bb54898ccb9b8b251a28785dd4a55afbc23d0eb410754fe7d0f9"}, +] + +[package.dependencies] +certifi = "*" +h2 = {version = ">=3,<5", optional = true, markers = "extra == \"http2\""} +httpcore = ">=0.15.0,<0.17.0" +rfc3986 = {version = ">=1.3,<2", extras = ["idna2008"]} +sniffio = "*" + +[package.extras] +brotli = ["brotli", "brotlicffi"] +cli = ["click (>=8.0.0,<9.0.0)", "pygments (>=2.0.0,<3.0.0)", "rich (>=10,<13)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (>=1.0.0,<2.0.0)"] + +[[package]] +name = "hyperframe" +version = "6.0.1" +description = "HTTP/2 framing layer for Python" +category = "main" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"}, + {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"}, +] + +[[package]] +name = "idna" +version = "3.4" +description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, + {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, +] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "loguru" +version = "0.6.0" +description = "Python logging made (stupidly) simple" +category = "main" +optional = false +python-versions = ">=3.5" +files = [ + {file = "loguru-0.6.0-py3-none-any.whl", hash = "sha256:4e2414d534a2ab57573365b3e6d0234dfb1d84b68b7f3b948e6fb743860a77c3"}, + {file = "loguru-0.6.0.tar.gz", hash = "sha256:066bd06758d0a513e9836fd9c6b5a75bfb3fd36841f4b996bc60b547a309d41c"}, +] + +[package.dependencies] +colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""} +win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""} + +[package.extras] +dev = ["Sphinx (>=4.1.1)", "black (>=19.10b0)", "colorama (>=0.3.4)", "docutils (==0.16)", "flake8 (>=3.7.7)", "isort (>=5.1.1)", "pytest (>=4.6.2)", "pytest-cov (>=2.7.1)", "sphinx-autobuild (>=0.7.1)", "sphinx-rtd-theme (>=0.4.3)", "tox (>=3.9.0)"] + +[[package]] +name = "lxml" +version = "4.9.2" +description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" +files = [ + {file = "lxml-4.9.2-cp27-cp27m-macosx_10_15_x86_64.whl", hash = "sha256:76cf573e5a365e790396a5cc2b909812633409306c6531a6877c59061e42c4f2"}, + {file = "lxml-4.9.2-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b1f42b6921d0e81b1bcb5e395bc091a70f41c4d4e55ba99c6da2b31626c44892"}, + {file = "lxml-4.9.2-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:9f102706d0ca011de571de32c3247c6476b55bb6bc65a20f682f000b07a4852a"}, + {file = "lxml-4.9.2-cp27-cp27m-win32.whl", hash = "sha256:8d0b4612b66ff5d62d03bcaa043bb018f74dfea51184e53f067e6fdcba4bd8de"}, + {file = "lxml-4.9.2-cp27-cp27m-win_amd64.whl", hash = "sha256:4c8f293f14abc8fd3e8e01c5bd86e6ed0b6ef71936ded5bf10fe7a5efefbaca3"}, + {file = "lxml-4.9.2-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2899456259589aa38bfb018c364d6ae7b53c5c22d8e27d0ec7609c2a1ff78b50"}, + {file = "lxml-4.9.2-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6749649eecd6a9871cae297bffa4ee76f90b4504a2a2ab528d9ebe912b101975"}, + {file = "lxml-4.9.2-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:a08cff61517ee26cb56f1e949cca38caabe9ea9fbb4b1e10a805dc39844b7d5c"}, + {file = "lxml-4.9.2-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:85cabf64adec449132e55616e7ca3e1000ab449d1d0f9d7f83146ed5bdcb6d8a"}, + {file = "lxml-4.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:8340225bd5e7a701c0fa98284c849c9b9fc9238abf53a0ebd90900f25d39a4e4"}, + {file = "lxml-4.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:1ab8f1f932e8f82355e75dda5413a57612c6ea448069d4fb2e217e9a4bed13d4"}, + {file = "lxml-4.9.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:699a9af7dffaf67deeae27b2112aa06b41c370d5e7633e0ee0aea2e0b6c211f7"}, + {file = "lxml-4.9.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b9cc34af337a97d470040f99ba4282f6e6bac88407d021688a5d585e44a23184"}, + {file = "lxml-4.9.2-cp310-cp310-win32.whl", hash = "sha256:d02a5399126a53492415d4906ab0ad0375a5456cc05c3fc0fc4ca11771745cda"}, + {file = "lxml-4.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:a38486985ca49cfa574a507e7a2215c0c780fd1778bb6290c21193b7211702ab"}, + {file = "lxml-4.9.2-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:c83203addf554215463b59f6399835201999b5e48019dc17f182ed5ad87205c9"}, + {file = "lxml-4.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:2a87fa548561d2f4643c99cd13131acb607ddabb70682dcf1dff5f71f781a4bf"}, + {file = "lxml-4.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:d6b430a9938a5a5d85fc107d852262ddcd48602c120e3dbb02137c83d212b380"}, + {file = "lxml-4.9.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3efea981d956a6f7173b4659849f55081867cf897e719f57383698af6f618a92"}, + {file = "lxml-4.9.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:df0623dcf9668ad0445e0558a21211d4e9a149ea8f5666917c8eeec515f0a6d1"}, + {file = "lxml-4.9.2-cp311-cp311-win32.whl", hash = "sha256:da248f93f0418a9e9d94b0080d7ebc407a9a5e6d0b57bb30db9b5cc28de1ad33"}, + {file = "lxml-4.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:3818b8e2c4b5148567e1b09ce739006acfaa44ce3156f8cbbc11062994b8e8dd"}, + {file = "lxml-4.9.2-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ca989b91cf3a3ba28930a9fc1e9aeafc2a395448641df1f387a2d394638943b0"}, + {file = "lxml-4.9.2-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:822068f85e12a6e292803e112ab876bc03ed1f03dddb80154c395f891ca6b31e"}, + {file = "lxml-4.9.2-cp35-cp35m-win32.whl", hash = "sha256:be7292c55101e22f2a3d4d8913944cbea71eea90792bf914add27454a13905df"}, + {file = "lxml-4.9.2-cp35-cp35m-win_amd64.whl", hash = "sha256:998c7c41910666d2976928c38ea96a70d1aa43be6fe502f21a651e17483a43c5"}, + {file = "lxml-4.9.2-cp36-cp36m-macosx_10_15_x86_64.whl", hash = "sha256:b26a29f0b7fc6f0897f043ca366142d2b609dc60756ee6e4e90b5f762c6adc53"}, + {file = "lxml-4.9.2-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:ab323679b8b3030000f2be63e22cdeea5b47ee0abd2d6a1dc0c8103ddaa56cd7"}, + {file = "lxml-4.9.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:689bb688a1db722485e4610a503e3e9210dcc20c520b45ac8f7533c837be76fe"}, + {file = "lxml-4.9.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:f49e52d174375a7def9915c9f06ec4e569d235ad428f70751765f48d5926678c"}, + {file = "lxml-4.9.2-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:36c3c175d34652a35475a73762b545f4527aec044910a651d2bf50de9c3352b1"}, + {file = "lxml-4.9.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a35f8b7fa99f90dd2f5dc5a9fa12332642f087a7641289ca6c40d6e1a2637d8e"}, + {file = "lxml-4.9.2-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:58bfa3aa19ca4c0f28c5dde0ff56c520fbac6f0daf4fac66ed4c8d2fb7f22e74"}, + {file = "lxml-4.9.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:bc718cd47b765e790eecb74d044cc8d37d58562f6c314ee9484df26276d36a38"}, + {file = "lxml-4.9.2-cp36-cp36m-win32.whl", hash = "sha256:d5bf6545cd27aaa8a13033ce56354ed9e25ab0e4ac3b5392b763d8d04b08e0c5"}, + {file = "lxml-4.9.2-cp36-cp36m-win_amd64.whl", hash = "sha256:3ab9fa9d6dc2a7f29d7affdf3edebf6ece6fb28a6d80b14c3b2fb9d39b9322c3"}, + {file = "lxml-4.9.2-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:05ca3f6abf5cf78fe053da9b1166e062ade3fa5d4f92b4ed688127ea7d7b1d03"}, + {file = "lxml-4.9.2-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:a5da296eb617d18e497bcf0a5c528f5d3b18dadb3619fbdadf4ed2356ef8d941"}, + {file = "lxml-4.9.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:04876580c050a8c5341d706dd464ff04fd597095cc8c023252566a8826505726"}, + {file = "lxml-4.9.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:c9ec3eaf616d67db0764b3bb983962b4f385a1f08304fd30c7283954e6a7869b"}, + {file = "lxml-4.9.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2a29ba94d065945944016b6b74e538bdb1751a1db6ffb80c9d3c2e40d6fa9894"}, + {file = "lxml-4.9.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a82d05da00a58b8e4c0008edbc8a4b6ec5a4bc1e2ee0fb6ed157cf634ed7fa45"}, + {file = "lxml-4.9.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:223f4232855ade399bd409331e6ca70fb5578efef22cf4069a6090acc0f53c0e"}, + {file = "lxml-4.9.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d17bc7c2ccf49c478c5bdd447594e82692c74222698cfc9b5daae7ae7e90743b"}, + {file = "lxml-4.9.2-cp37-cp37m-win32.whl", hash = "sha256:b64d891da92e232c36976c80ed7ebb383e3f148489796d8d31a5b6a677825efe"}, + {file = "lxml-4.9.2-cp37-cp37m-win_amd64.whl", hash = "sha256:a0a336d6d3e8b234a3aae3c674873d8f0e720b76bc1d9416866c41cd9500ffb9"}, + {file = "lxml-4.9.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:da4dd7c9c50c059aba52b3524f84d7de956f7fef88f0bafcf4ad7dde94a064e8"}, + {file = "lxml-4.9.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:821b7f59b99551c69c85a6039c65b75f5683bdc63270fec660f75da67469ca24"}, + {file = "lxml-4.9.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:e5168986b90a8d1f2f9dc1b841467c74221bd752537b99761a93d2d981e04889"}, + {file = "lxml-4.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:8e20cb5a47247e383cf4ff523205060991021233ebd6f924bca927fcf25cf86f"}, + {file = "lxml-4.9.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:13598ecfbd2e86ea7ae45ec28a2a54fb87ee9b9fdb0f6d343297d8e548392c03"}, + {file = "lxml-4.9.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:880bbbcbe2fca64e2f4d8e04db47bcdf504936fa2b33933efd945e1b429bea8c"}, + {file = "lxml-4.9.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7d2278d59425777cfcb19735018d897ca8303abe67cc735f9f97177ceff8027f"}, + {file = "lxml-4.9.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5344a43228767f53a9df6e5b253f8cdca7dfc7b7aeae52551958192f56d98457"}, + {file = "lxml-4.9.2-cp38-cp38-win32.whl", hash = "sha256:925073b2fe14ab9b87e73f9a5fde6ce6392da430f3004d8b72cc86f746f5163b"}, + {file = "lxml-4.9.2-cp38-cp38-win_amd64.whl", hash = "sha256:9b22c5c66f67ae00c0199f6055705bc3eb3fcb08d03d2ec4059a2b1b25ed48d7"}, + {file = "lxml-4.9.2-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:5f50a1c177e2fa3ee0667a5ab79fdc6b23086bc8b589d90b93b4bd17eb0e64d1"}, + {file = "lxml-4.9.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:090c6543d3696cbe15b4ac6e175e576bcc3f1ccfbba970061b7300b0c15a2140"}, + {file = "lxml-4.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:63da2ccc0857c311d764e7d3d90f429c252e83b52d1f8f1d1fe55be26827d1f4"}, + {file = "lxml-4.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:5b4545b8a40478183ac06c073e81a5ce4cf01bf1734962577cf2bb569a5b3bbf"}, + {file = "lxml-4.9.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2e430cd2824f05f2d4f687701144556646bae8f249fd60aa1e4c768ba7018947"}, + {file = "lxml-4.9.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6804daeb7ef69e7b36f76caddb85cccd63d0c56dedb47555d2fc969e2af6a1a5"}, + {file = "lxml-4.9.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a6e441a86553c310258aca15d1c05903aaf4965b23f3bc2d55f200804e005ee5"}, + {file = "lxml-4.9.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ca34efc80a29351897e18888c71c6aca4a359247c87e0b1c7ada14f0ab0c0fb2"}, + {file = "lxml-4.9.2-cp39-cp39-win32.whl", hash = "sha256:6b418afe5df18233fc6b6093deb82a32895b6bb0b1155c2cdb05203f583053f1"}, + {file = "lxml-4.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:f1496ea22ca2c830cbcbd473de8f114a320da308438ae65abad6bab7867fe38f"}, + {file = "lxml-4.9.2-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:b264171e3143d842ded311b7dccd46ff9ef34247129ff5bf5066123c55c2431c"}, + {file = "lxml-4.9.2-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0dc313ef231edf866912e9d8f5a042ddab56c752619e92dfd3a2c277e6a7299a"}, + {file = "lxml-4.9.2-pp38-pypy38_pp73-macosx_10_15_x86_64.whl", hash = "sha256:16efd54337136e8cd72fb9485c368d91d77a47ee2d42b057564aae201257d419"}, + {file = "lxml-4.9.2-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:0f2b1e0d79180f344ff9f321327b005ca043a50ece8713de61d1cb383fb8ac05"}, + {file = "lxml-4.9.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:7b770ed79542ed52c519119473898198761d78beb24b107acf3ad65deae61f1f"}, + {file = "lxml-4.9.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:efa29c2fe6b4fdd32e8ef81c1528506895eca86e1d8c4657fda04c9b3786ddf9"}, + {file = "lxml-4.9.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7e91ee82f4199af8c43d8158024cbdff3d931df350252288f0d4ce656df7f3b5"}, + {file = "lxml-4.9.2-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:b23e19989c355ca854276178a0463951a653309fb8e57ce674497f2d9f208746"}, + {file = "lxml-4.9.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:01d36c05f4afb8f7c20fd9ed5badca32a2029b93b1750f571ccc0b142531caf7"}, + {file = "lxml-4.9.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7b515674acfdcadb0eb5d00d8a709868173acece5cb0be3dd165950cbfdf5409"}, + {file = "lxml-4.9.2.tar.gz", hash = "sha256:2455cfaeb7ac70338b3257f41e21f0724f4b5b0c0e7702da67ee6c3640835b67"}, +] + +[package.extras] +cssselect = ["cssselect (>=0.7)"] +html5 = ["html5lib"] +htmlsoup = ["BeautifulSoup4"] +source = ["Cython (>=0.29.7)"] + +[[package]] +name = "mmh3" +version = "3.0.0" +description = "Python wrapper for MurmurHash (MurmurHash3), a set of fast and robust hash functions." +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "mmh3-3.0.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:23912dde2ad4f701926948dd8e79a0e42b000f73962806f153931f52985e1e07"}, + {file = "mmh3-3.0.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:07f1308a410dc406d6a3c282a685728d00a87f3ed684f012671b96d6cc6a41c3"}, + {file = "mmh3-3.0.0-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:167cbc2b5ae27f3bccd797a2e8a9e7561791bee4cc2885f2c140eedc5df000ef"}, + {file = "mmh3-3.0.0-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:8fb833c2942917eff54f984b067d93e5a3c54dbb00720323460cdfed9292835f"}, + {file = "mmh3-3.0.0-cp36-cp36m-win32.whl", hash = "sha256:b7d26d0243ed9a5b8bf7aa8c53697cb79dff1e1d207f42396b7a7cb2a62298b7"}, + {file = "mmh3-3.0.0-cp36-cp36m-win_amd64.whl", hash = "sha256:2b6c79fc314b34b911245b460a79b601fff39bb807521fb7ed7c15cacf0394ac"}, + {file = "mmh3-3.0.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6d0b3e9def1fdfe4eadd35ee26bf72bd715ba97711f7101302d54c9d2e70ba27"}, + {file = "mmh3-3.0.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:8803d28c17cf898f5f00c0433e8b13d51fa3bb4ebecf59872ba1eaa20d94128a"}, + {file = "mmh3-3.0.0-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:01e456edf9cc381298a590923aadd1c0bf9934d93433099a5001d656112437c2"}, + {file = "mmh3-3.0.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:ff69ddc2d46e3e42720840b6b4f7bfb032fd1e677fac347fdfff6e4d9fd01212"}, + {file = "mmh3-3.0.0-cp37-cp37m-win32.whl", hash = "sha256:e08a5d81a2ff53625953290187bed4ae96a6972e2b5cd5984a6ebc5a9aab256c"}, + {file = "mmh3-3.0.0-cp37-cp37m-win_amd64.whl", hash = "sha256:12484ac80373db77d8a6beb7615e7dac8b6c3fb118905311a51450b4fc4a24d1"}, + {file = "mmh3-3.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:93c96e657e9bf9e9ef12ddaeae9f109c0b3134146e2eff2cbddde5a34190920e"}, + {file = "mmh3-3.0.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:9097be65aa95460bc68b6108601da8894757532450daf74034e4eaecd536acca"}, + {file = "mmh3-3.0.0-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:19874e12acb4119ef1ef83062ef4ac953c3343dd07a67ede8fa096d0393f34be"}, + {file = "mmh3-3.0.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:4589adcb609d1547aac7c1ac1064eb27cdd44b65b7e8a114e2971cd3b7110306"}, + {file = "mmh3-3.0.0-cp38-cp38-win32.whl", hash = "sha256:7a311efd4ecf122f21392ec6bf447c620cc783d20bdb9aec60bb469a54318419"}, + {file = "mmh3-3.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:3566d1455fa4a09f8fb1aa5b37f68914949674f9aa2bd630e9fdf344207f55b5"}, + {file = "mmh3-3.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:92fdffd63edb67c30dbaba18a7448d762209c0e678b0c9d577d17b30362b59a3"}, + {file = "mmh3-3.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3e52b869572c09db0c1a483f6e9cedbccfae8a282d95e552d3d4bd0712ab3196"}, + {file = "mmh3-3.0.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:f1cce018cc82a8a6287e6aeb139e441129837b810f2ddf372e3ff7f0fefb0947"}, + {file = "mmh3-3.0.0-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:0fd09c4b61fcddbcf0a87d5463b4e6d2919896736a67efc5248d5c74c1c9c742"}, + {file = "mmh3-3.0.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:c17fe2e276edd37ad8a6aff3b1663d3479c2c5c5993539c1050422a1dae33033"}, + {file = "mmh3-3.0.0-cp39-cp39-win32.whl", hash = "sha256:150439b906b4deaf6d796b2c2d11fb6159f08d02330d97723071ab3bf43b51df"}, + {file = "mmh3-3.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:bd870aedd9189eff1cf4e1687869b56c7e9461ee869789139c3e704009e5c227"}, + {file = "mmh3-3.0.0.tar.gz", hash = "sha256:d1ec578c09a07d3518ec9be540b87546397fa3455de73c166fcce51eaa5c41c5"}, +] + +[[package]] +name = "multidict" +version = "6.0.4" +description = "multidict implementation" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b1a97283e0c85772d613878028fec909f003993e1007eafa715b24b377cb9b8"}, + {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eeb6dcc05e911516ae3d1f207d4b0520d07f54484c49dfc294d6e7d63b734171"}, + {file = "multidict-6.0.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d6d635d5209b82a3492508cf5b365f3446afb65ae7ebd755e70e18f287b0adf7"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c048099e4c9e9d615545e2001d3d8a4380bd403e1a0578734e0d31703d1b0c0b"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea20853c6dbbb53ed34cb4d080382169b6f4554d394015f1bef35e881bf83547"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16d232d4e5396c2efbbf4f6d4df89bfa905eb0d4dc5b3549d872ab898451f569"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36c63aaa167f6c6b04ef2c85704e93af16c11d20de1d133e39de6a0e84582a93"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:64bdf1086b6043bf519869678f5f2757f473dee970d7abf6da91ec00acb9cb98"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:43644e38f42e3af682690876cff722d301ac585c5b9e1eacc013b7a3f7b696a0"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7582a1d1030e15422262de9f58711774e02fa80df0d1578995c76214f6954988"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ddff9c4e225a63a5afab9dd15590432c22e8057e1a9a13d28ed128ecf047bbdc"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ee2a1ece51b9b9e7752e742cfb661d2a29e7bcdba2d27e66e28a99f1890e4fa0"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a2e4369eb3d47d2034032a26c7a80fcb21a2cb22e1173d761a162f11e562caa5"}, + {file = "multidict-6.0.4-cp310-cp310-win32.whl", hash = "sha256:574b7eae1ab267e5f8285f0fe881f17efe4b98c39a40858247720935b893bba8"}, + {file = "multidict-6.0.4-cp310-cp310-win_amd64.whl", hash = "sha256:4dcbb0906e38440fa3e325df2359ac6cb043df8e58c965bb45f4e406ecb162cc"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0dfad7a5a1e39c53ed00d2dd0c2e36aed4650936dc18fd9a1826a5ae1cad6f03"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:64da238a09d6039e3bd39bb3aee9c21a5e34f28bfa5aa22518581f910ff94af3"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ff959bee35038c4624250473988b24f846cbeb2c6639de3602c073f10410ceba"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01a3a55bd90018c9c080fbb0b9f4891db37d148a0a18722b42f94694f8b6d4c9"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5cb09abb18c1ea940fb99360ea0396f34d46566f157122c92dfa069d3e0e982"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:666daae833559deb2d609afa4490b85830ab0dfca811a98b70a205621a6109fe"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11bdf3f5e1518b24530b8241529d2050014c884cf18b6fc69c0c2b30ca248710"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d18748f2d30f94f498e852c67d61261c643b349b9d2a581131725595c45ec6c"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:458f37be2d9e4c95e2d8866a851663cbc76e865b78395090786f6cd9b3bbf4f4"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b1a2eeedcead3a41694130495593a559a668f382eee0727352b9a41e1c45759a"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7d6ae9d593ef8641544d6263c7fa6408cc90370c8cb2bbb65f8d43e5b0351d9c"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5979b5632c3e3534e42ca6ff856bb24b2e3071b37861c2c727ce220d80eee9ed"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dcfe792765fab89c365123c81046ad4103fcabbc4f56d1c1997e6715e8015461"}, + {file = "multidict-6.0.4-cp311-cp311-win32.whl", hash = "sha256:3601a3cece3819534b11d4efc1eb76047488fddd0c85a3948099d5da4d504636"}, + {file = "multidict-6.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:81a4f0b34bd92df3da93315c6a59034df95866014ac08535fc819f043bfd51f0"}, + {file = "multidict-6.0.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:67040058f37a2a51ed8ea8f6b0e6ee5bd78ca67f169ce6122f3e2ec80dfe9b78"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:853888594621e6604c978ce2a0444a1e6e70c8d253ab65ba11657659dcc9100f"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:39ff62e7d0f26c248b15e364517a72932a611a9b75f35b45be078d81bdb86603"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:af048912e045a2dc732847d33821a9d84ba553f5c5f028adbd364dd4765092ac"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e8b901e607795ec06c9e42530788c45ac21ef3aaa11dbd0c69de543bfb79a9"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62501642008a8b9871ddfccbf83e4222cf8ac0d5aeedf73da36153ef2ec222d2"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:99b76c052e9f1bc0721f7541e5e8c05db3941eb9ebe7b8553c625ef88d6eefde"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:509eac6cf09c794aa27bcacfd4d62c885cce62bef7b2c3e8b2e49d365b5003fe"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:21a12c4eb6ddc9952c415f24eef97e3e55ba3af61f67c7bc388dcdec1404a067"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:5cad9430ab3e2e4fa4a2ef4450f548768400a2ac635841bc2a56a2052cdbeb87"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ab55edc2e84460694295f401215f4a58597f8f7c9466faec545093045476327d"}, + {file = "multidict-6.0.4-cp37-cp37m-win32.whl", hash = "sha256:5a4dcf02b908c3b8b17a45fb0f15b695bf117a67b76b7ad18b73cf8e92608775"}, + {file = "multidict-6.0.4-cp37-cp37m-win_amd64.whl", hash = "sha256:6ed5f161328b7df384d71b07317f4d8656434e34591f20552c7bcef27b0ab88e"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5fc1b16f586f049820c5c5b17bb4ee7583092fa0d1c4e28b5239181ff9532e0c"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1502e24330eb681bdaa3eb70d6358e818e8e8f908a22a1851dfd4e15bc2f8161"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b692f419760c0e65d060959df05f2a531945af31fda0c8a3b3195d4efd06de11"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45e1ecb0379bfaab5eef059f50115b54571acfbe422a14f668fc8c27ba410e7e"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ddd3915998d93fbcd2566ddf9cf62cdb35c9e093075f862935573d265cf8f65d"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:59d43b61c59d82f2effb39a93c48b845efe23a3852d201ed2d24ba830d0b4cf2"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc8e1d0c705233c5dd0c5e6460fbad7827d5d36f310a0fadfd45cc3029762258"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6aa0418fcc838522256761b3415822626f866758ee0bc6632c9486b179d0b52"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6748717bb10339c4760c1e63da040f5f29f5ed6e59d76daee30305894069a660"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4d1a3d7ef5e96b1c9e92f973e43aa5e5b96c659c9bc3124acbbd81b0b9c8a951"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4372381634485bec7e46718edc71528024fcdc6f835baefe517b34a33c731d60"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:fc35cb4676846ef752816d5be2193a1e8367b4c1397b74a565a9d0389c433a1d"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4b9d9e4e2b37daddb5c23ea33a3417901fa7c7b3dee2d855f63ee67a0b21e5b1"}, + {file = "multidict-6.0.4-cp38-cp38-win32.whl", hash = "sha256:e41b7e2b59679edfa309e8db64fdf22399eec4b0b24694e1b2104fb789207779"}, + {file = "multidict-6.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:d6c254ba6e45d8e72739281ebc46ea5eb5f101234f3ce171f0e9f5cc86991480"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:16ab77bbeb596e14212e7bab8429f24c1579234a3a462105cda4a66904998664"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc779e9e6f7fda81b3f9aa58e3a6091d49ad528b11ed19f6621408806204ad35"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4ceef517eca3e03c1cceb22030a3e39cb399ac86bff4e426d4fc6ae49052cc60"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:281af09f488903fde97923c7744bb001a9b23b039a909460d0f14edc7bf59706"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:52f2dffc8acaba9a2f27174c41c9e57f60b907bb9f096b36b1a1f3be71c6284d"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b41156839806aecb3641f3208c0dafd3ac7775b9c4c422d82ee2a45c34ba81ca"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5e3fc56f88cc98ef8139255cf8cd63eb2c586531e43310ff859d6bb3a6b51f1"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8316a77808c501004802f9beebde51c9f857054a0c871bd6da8280e718444449"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f70b98cd94886b49d91170ef23ec5c0e8ebb6f242d734ed7ed677b24d50c82cf"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bf6774e60d67a9efe02b3616fee22441d86fab4c6d335f9d2051d19d90a40063"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:e69924bfcdda39b722ef4d9aa762b2dd38e4632b3641b1d9a57ca9cd18f2f83a"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:6b181d8c23da913d4ff585afd1155a0e1194c0b50c54fcfe286f70cdaf2b7176"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:52509b5be062d9eafc8170e53026fbc54cf3b32759a23d07fd935fb04fc22d95"}, + {file = "multidict-6.0.4-cp39-cp39-win32.whl", hash = "sha256:27c523fbfbdfd19c6867af7346332b62b586eed663887392cff78d614f9ec313"}, + {file = "multidict-6.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:33029f5734336aa0d4c0384525da0387ef89148dc7191aae00ca5fb23d7aafc2"}, + {file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"}, +] + +[[package]] +name = "numpy" +version = "1.24.2" +description = "Fundamental package for array computing in Python" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "numpy-1.24.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eef70b4fc1e872ebddc38cddacc87c19a3709c0e3e5d20bf3954c147b1dd941d"}, + {file = "numpy-1.24.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e8d2859428712785e8a8b7d2b3ef0a1d1565892367b32f915c4a4df44d0e64f5"}, + {file = "numpy-1.24.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6524630f71631be2dabe0c541e7675db82651eb998496bbe16bc4f77f0772253"}, + {file = "numpy-1.24.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a51725a815a6188c662fb66fb32077709a9ca38053f0274640293a14fdd22978"}, + {file = "numpy-1.24.2-cp310-cp310-win32.whl", hash = "sha256:2620e8592136e073bd12ee4536149380695fbe9ebeae845b81237f986479ffc9"}, + {file = "numpy-1.24.2-cp310-cp310-win_amd64.whl", hash = "sha256:97cf27e51fa078078c649a51d7ade3c92d9e709ba2bfb97493007103c741f1d0"}, + {file = "numpy-1.24.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7de8fdde0003f4294655aa5d5f0a89c26b9f22c0a58790c38fae1ed392d44a5a"}, + {file = "numpy-1.24.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4173bde9fa2a005c2c6e2ea8ac1618e2ed2c1c6ec8a7657237854d42094123a0"}, + {file = "numpy-1.24.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4cecaed30dc14123020f77b03601559fff3e6cd0c048f8b5289f4eeabb0eb281"}, + {file = "numpy-1.24.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a23f8440561a633204a67fb44617ce2a299beecf3295f0d13c495518908e910"}, + {file = "numpy-1.24.2-cp311-cp311-win32.whl", hash = "sha256:e428c4fbfa085f947b536706a2fc349245d7baa8334f0c5723c56a10595f9b95"}, + {file = "numpy-1.24.2-cp311-cp311-win_amd64.whl", hash = "sha256:557d42778a6869c2162deb40ad82612645e21d79e11c1dc62c6e82a2220ffb04"}, + {file = "numpy-1.24.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d0a2db9d20117bf523dde15858398e7c0858aadca7c0f088ac0d6edd360e9ad2"}, + {file = "numpy-1.24.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c72a6b2f4af1adfe193f7beb91ddf708ff867a3f977ef2ec53c0ffb8283ab9f5"}, + {file = "numpy-1.24.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c29e6bd0ec49a44d7690ecb623a8eac5ab8a923bce0bea6293953992edf3a76a"}, + {file = "numpy-1.24.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2eabd64ddb96a1239791da78fa5f4e1693ae2dadc82a76bc76a14cbb2b966e96"}, + {file = "numpy-1.24.2-cp38-cp38-win32.whl", hash = "sha256:e3ab5d32784e843fc0dd3ab6dcafc67ef806e6b6828dc6af2f689be0eb4d781d"}, + {file = "numpy-1.24.2-cp38-cp38-win_amd64.whl", hash = "sha256:76807b4063f0002c8532cfeac47a3068a69561e9c8715efdad3c642eb27c0756"}, + {file = "numpy-1.24.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4199e7cfc307a778f72d293372736223e39ec9ac096ff0a2e64853b866a8e18a"}, + {file = "numpy-1.24.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:adbdce121896fd3a17a77ab0b0b5eedf05a9834a18699db6829a64e1dfccca7f"}, + {file = "numpy-1.24.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:889b2cc88b837d86eda1b17008ebeb679d82875022200c6e8e4ce6cf549b7acb"}, + {file = "numpy-1.24.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f64bb98ac59b3ea3bf74b02f13836eb2e24e48e0ab0145bbda646295769bd780"}, + {file = "numpy-1.24.2-cp39-cp39-win32.whl", hash = "sha256:63e45511ee4d9d976637d11e6c9864eae50e12dc9598f531c035265991910468"}, + {file = "numpy-1.24.2-cp39-cp39-win_amd64.whl", hash = "sha256:a77d3e1163a7770164404607b7ba3967fb49b24782a6ef85d9b5f54126cc39e5"}, + {file = "numpy-1.24.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:92011118955724465fb6853def593cf397b4a1367495e0b59a7e69d40c4eb71d"}, + {file = "numpy-1.24.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9006288bcf4895917d02583cf3411f98631275bc67cce355a7f39f8c14338fa"}, + {file = "numpy-1.24.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:150947adbdfeceec4e5926d956a06865c1c690f2fd902efede4ca6fe2e657c3f"}, + {file = "numpy-1.24.2.tar.gz", hash = "sha256:003a9f530e880cb2cd177cba1af7220b9aa42def9c4afc2a2fc3ee6be7eb2b22"}, +] + +[[package]] +name = "openai" +version = "0.27.2" +description = "Python client library for the OpenAI API" +category = "main" +optional = false +python-versions = ">=3.7.1" +files = [ + {file = "openai-0.27.2-py3-none-any.whl", hash = "sha256:6df674cf257e9e0504f1fd191c333d3f6a2442b13218d0eccf06230eb24d320e"}, + {file = "openai-0.27.2.tar.gz", hash = "sha256:5869fdfa34b0ec66c39afa22f4a0fb83a135dff81f6505f52834c6ab3113f762"}, +] + +[package.dependencies] +aiohttp = "*" +requests = ">=2.20" +tqdm = "*" + +[package.extras] +datalib = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] +dev = ["black (>=21.6b0,<22.0)", "pytest (>=6.0.0,<7.0.0)", "pytest-asyncio", "pytest-mock"] +embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"] +wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"] + +[[package]] +name = "packaging" +version = "23.0" +description = "Core utilities for Python packages" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packaging-23.0-py3-none-any.whl", hash = "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2"}, + {file = "packaging-23.0.tar.gz", hash = "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"}, +] + +[[package]] +name = "pandas" +version = "1.5.3" +description = "Powerful data structures for data analysis, time series, and statistics" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pandas-1.5.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3749077d86e3a2f0ed51367f30bf5b82e131cc0f14260c4d3e499186fccc4406"}, + {file = "pandas-1.5.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:972d8a45395f2a2d26733eb8d0f629b2f90bebe8e8eddbb8829b180c09639572"}, + {file = "pandas-1.5.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:50869a35cbb0f2e0cd5ec04b191e7b12ed688874bd05dd777c19b28cbea90996"}, + {file = "pandas-1.5.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3ac844a0fe00bfaeb2c9b51ab1424e5c8744f89860b138434a363b1f620f354"}, + {file = "pandas-1.5.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a0a56cef15fd1586726dace5616db75ebcfec9179a3a55e78f72c5639fa2a23"}, + {file = "pandas-1.5.3-cp310-cp310-win_amd64.whl", hash = "sha256:478ff646ca42b20376e4ed3fa2e8d7341e8a63105586efe54fa2508ee087f328"}, + {file = "pandas-1.5.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6973549c01ca91ec96199e940495219c887ea815b2083722821f1d7abfa2b4dc"}, + {file = "pandas-1.5.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c39a8da13cede5adcd3be1182883aea1c925476f4e84b2807a46e2775306305d"}, + {file = "pandas-1.5.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f76d097d12c82a535fda9dfe5e8dd4127952b45fea9b0276cb30cca5ea313fbc"}, + {file = "pandas-1.5.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e474390e60ed609cec869b0da796ad94f420bb057d86784191eefc62b65819ae"}, + {file = "pandas-1.5.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f2b952406a1588ad4cad5b3f55f520e82e902388a6d5a4a91baa8d38d23c7f6"}, + {file = "pandas-1.5.3-cp311-cp311-win_amd64.whl", hash = "sha256:bc4c368f42b551bf72fac35c5128963a171b40dce866fb066540eeaf46faa003"}, + {file = "pandas-1.5.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:14e45300521902689a81f3f41386dc86f19b8ba8dd5ac5a3c7010ef8d2932813"}, + {file = "pandas-1.5.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9842b6f4b8479e41968eced654487258ed81df7d1c9b7b870ceea24ed9459b31"}, + {file = "pandas-1.5.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:26d9c71772c7afb9d5046e6e9cf42d83dd147b5cf5bcb9d97252077118543792"}, + {file = "pandas-1.5.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5fbcb19d6fceb9e946b3e23258757c7b225ba450990d9ed63ccceeb8cae609f7"}, + {file = "pandas-1.5.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:565fa34a5434d38e9d250af3c12ff931abaf88050551d9fbcdfafca50d62babf"}, + {file = "pandas-1.5.3-cp38-cp38-win32.whl", hash = "sha256:87bd9c03da1ac870a6d2c8902a0e1fd4267ca00f13bc494c9e5a9020920e1d51"}, + {file = "pandas-1.5.3-cp38-cp38-win_amd64.whl", hash = "sha256:41179ce559943d83a9b4bbacb736b04c928b095b5f25dd2b7389eda08f46f373"}, + {file = "pandas-1.5.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c74a62747864ed568f5a82a49a23a8d7fe171d0c69038b38cedf0976831296fa"}, + {file = "pandas-1.5.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c4c00e0b0597c8e4f59e8d461f797e5d70b4d025880516a8261b2817c47759ee"}, + {file = "pandas-1.5.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a50d9a4336a9621cab7b8eb3fb11adb82de58f9b91d84c2cd526576b881a0c5a"}, + {file = "pandas-1.5.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd05f7783b3274aa206a1af06f0ceed3f9b412cf665b7247eacd83be41cf7bf0"}, + {file = "pandas-1.5.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f69c4029613de47816b1bb30ff5ac778686688751a5e9c99ad8c7031f6508e5"}, + {file = "pandas-1.5.3-cp39-cp39-win32.whl", hash = "sha256:7cec0bee9f294e5de5bbfc14d0573f65526071029d036b753ee6507d2a21480a"}, + {file = "pandas-1.5.3-cp39-cp39-win_amd64.whl", hash = "sha256:dfd681c5dc216037e0b0a2c821f5ed99ba9f03ebcf119c7dac0e9a7b960b9ec9"}, + {file = "pandas-1.5.3.tar.gz", hash = "sha256:74a3fd7e5a7ec052f183273dc7b0acd3a863edf7520f5d3a1765c04ffdb3b0b1"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, + {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, +] +python-dateutil = ">=2.8.1" +pytz = ">=2020.1" + +[package.extras] +test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"] + +[[package]] +name = "pillow" +version = "9.4.0" +description = "Python Imaging Library (Fork)" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "Pillow-9.4.0-1-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1b4b4e9dda4f4e4c4e6896f93e84a8f0bcca3b059de9ddf67dac3c334b1195e1"}, + {file = "Pillow-9.4.0-1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:fb5c1ad6bad98c57482236a21bf985ab0ef42bd51f7ad4e4538e89a997624e12"}, + {file = "Pillow-9.4.0-1-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:f0caf4a5dcf610d96c3bd32932bfac8aee61c96e60481c2a0ea58da435e25acd"}, + {file = "Pillow-9.4.0-1-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:3f4cc516e0b264c8d4ccd6b6cbc69a07c6d582d8337df79be1e15a5056b258c9"}, + {file = "Pillow-9.4.0-1-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:b8c2f6eb0df979ee99433d8b3f6d193d9590f735cf12274c108bd954e30ca858"}, + {file = "Pillow-9.4.0-1-pp38-pypy38_pp73-macosx_10_10_x86_64.whl", hash = "sha256:b70756ec9417c34e097f987b4d8c510975216ad26ba6e57ccb53bc758f490dab"}, + {file = "Pillow-9.4.0-1-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:43521ce2c4b865d385e78579a082b6ad1166ebed2b1a2293c3be1d68dd7ca3b9"}, + {file = "Pillow-9.4.0-2-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:9d9a62576b68cd90f7075876f4e8444487db5eeea0e4df3ba298ee38a8d067b0"}, + {file = "Pillow-9.4.0-2-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:87708d78a14d56a990fbf4f9cb350b7d89ee8988705e58e39bdf4d82c149210f"}, + {file = "Pillow-9.4.0-2-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:8a2b5874d17e72dfb80d917213abd55d7e1ed2479f38f001f264f7ce7bae757c"}, + {file = "Pillow-9.4.0-2-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:83125753a60cfc8c412de5896d10a0a405e0bd88d0470ad82e0869ddf0cb3848"}, + {file = "Pillow-9.4.0-2-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:9e5f94742033898bfe84c93c831a6f552bb629448d4072dd312306bab3bd96f1"}, + {file = "Pillow-9.4.0-2-pp38-pypy38_pp73-macosx_10_10_x86_64.whl", hash = "sha256:013016af6b3a12a2f40b704677f8b51f72cb007dac785a9933d5c86a72a7fe33"}, + {file = "Pillow-9.4.0-2-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:99d92d148dd03fd19d16175b6d355cc1b01faf80dae93c6c3eb4163709edc0a9"}, + {file = "Pillow-9.4.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:2968c58feca624bb6c8502f9564dd187d0e1389964898f5e9e1fbc8533169157"}, + {file = "Pillow-9.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c5c1362c14aee73f50143d74389b2c158707b4abce2cb055b7ad37ce60738d47"}, + {file = "Pillow-9.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd752c5ff1b4a870b7661234694f24b1d2b9076b8bf337321a814c612665f343"}, + {file = "Pillow-9.4.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9a3049a10261d7f2b6514d35bbb7a4dfc3ece4c4de14ef5876c4b7a23a0e566d"}, + {file = "Pillow-9.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:16a8df99701f9095bea8a6c4b3197da105df6f74e6176c5b410bc2df2fd29a57"}, + {file = "Pillow-9.4.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:94cdff45173b1919350601f82d61365e792895e3c3a3443cf99819e6fbf717a5"}, + {file = "Pillow-9.4.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:ed3e4b4e1e6de75fdc16d3259098de7c6571b1a6cc863b1a49e7d3d53e036070"}, + {file = "Pillow-9.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d5b2f8a31bd43e0f18172d8ac82347c8f37ef3e0b414431157718aa234991b28"}, + {file = "Pillow-9.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:09b89ddc95c248ee788328528e6a2996e09eaccddeeb82a5356e92645733be35"}, + {file = "Pillow-9.4.0-cp310-cp310-win32.whl", hash = "sha256:f09598b416ba39a8f489c124447b007fe865f786a89dbfa48bb5cf395693132a"}, + {file = "Pillow-9.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:f6e78171be3fb7941f9910ea15b4b14ec27725865a73c15277bc39f5ca4f8391"}, + {file = "Pillow-9.4.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:3fa1284762aacca6dc97474ee9c16f83990b8eeb6697f2ba17140d54b453e133"}, + {file = "Pillow-9.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:eaef5d2de3c7e9b21f1e762f289d17b726c2239a42b11e25446abf82b26ac132"}, + {file = "Pillow-9.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4dfdae195335abb4e89cc9762b2edc524f3c6e80d647a9a81bf81e17e3fb6f0"}, + {file = "Pillow-9.4.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6abfb51a82e919e3933eb137e17c4ae9c0475a25508ea88993bb59faf82f3b35"}, + {file = "Pillow-9.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:451f10ef963918e65b8869e17d67db5e2f4ab40e716ee6ce7129b0cde2876eab"}, + {file = "Pillow-9.4.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:6663977496d616b618b6cfa43ec86e479ee62b942e1da76a2c3daa1c75933ef4"}, + {file = "Pillow-9.4.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:60e7da3a3ad1812c128750fc1bc14a7ceeb8d29f77e0a2356a8fb2aa8925287d"}, + {file = "Pillow-9.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:19005a8e58b7c1796bc0167862b1f54a64d3b44ee5d48152b06bb861458bc0f8"}, + {file = "Pillow-9.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f715c32e774a60a337b2bb8ad9839b4abf75b267a0f18806f6f4f5f1688c4b5a"}, + {file = "Pillow-9.4.0-cp311-cp311-win32.whl", hash = "sha256:b222090c455d6d1a64e6b7bb5f4035c4dff479e22455c9eaa1bdd4c75b52c80c"}, + {file = "Pillow-9.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:ba6612b6548220ff5e9df85261bddc811a057b0b465a1226b39bfb8550616aee"}, + {file = "Pillow-9.4.0-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:5f532a2ad4d174eb73494e7397988e22bf427f91acc8e6ebf5bb10597b49c493"}, + {file = "Pillow-9.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5dd5a9c3091a0f414a963d427f920368e2b6a4c2f7527fdd82cde8ef0bc7a327"}, + {file = "Pillow-9.4.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef21af928e807f10bf4141cad4746eee692a0dd3ff56cfb25fce076ec3cc8abe"}, + {file = "Pillow-9.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:847b114580c5cc9ebaf216dd8c8dbc6b00a3b7ab0131e173d7120e6deade1f57"}, + {file = "Pillow-9.4.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:653d7fb2df65efefbcbf81ef5fe5e5be931f1ee4332c2893ca638c9b11a409c4"}, + {file = "Pillow-9.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:46f39cab8bbf4a384ba7cb0bc8bae7b7062b6a11cfac1ca4bc144dea90d4a9f5"}, + {file = "Pillow-9.4.0-cp37-cp37m-win32.whl", hash = "sha256:7ac7594397698f77bce84382929747130765f66406dc2cd8b4ab4da68ade4c6e"}, + {file = "Pillow-9.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:46c259e87199041583658457372a183636ae8cd56dbf3f0755e0f376a7f9d0e6"}, + {file = "Pillow-9.4.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:0e51f608da093e5d9038c592b5b575cadc12fd748af1479b5e858045fff955a9"}, + {file = "Pillow-9.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:765cb54c0b8724a7c12c55146ae4647e0274a839fb6de7bcba841e04298e1011"}, + {file = "Pillow-9.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:519e14e2c49fcf7616d6d2cfc5c70adae95682ae20f0395e9280db85e8d6c4df"}, + {file = "Pillow-9.4.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d197df5489004db87d90b918033edbeee0bd6df3848a204bca3ff0a903bef837"}, + {file = "Pillow-9.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0845adc64fe9886db00f5ab68c4a8cd933ab749a87747555cec1c95acea64b0b"}, + {file = "Pillow-9.4.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:e1339790c083c5a4de48f688b4841f18df839eb3c9584a770cbd818b33e26d5d"}, + {file = "Pillow-9.4.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:a96e6e23f2b79433390273eaf8cc94fec9c6370842e577ab10dabdcc7ea0a66b"}, + {file = "Pillow-9.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7cfc287da09f9d2a7ec146ee4d72d6ea1342e770d975e49a8621bf54eaa8f30f"}, + {file = "Pillow-9.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d7081c084ceb58278dd3cf81f836bc818978c0ccc770cbbb202125ddabec6628"}, + {file = "Pillow-9.4.0-cp38-cp38-win32.whl", hash = "sha256:df41112ccce5d47770a0c13651479fbcd8793f34232a2dd9faeccb75eb5d0d0d"}, + {file = "Pillow-9.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:7a21222644ab69ddd9967cfe6f2bb420b460dae4289c9d40ff9a4896e7c35c9a"}, + {file = "Pillow-9.4.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:0f3269304c1a7ce82f1759c12ce731ef9b6e95b6df829dccd9fe42912cc48569"}, + {file = "Pillow-9.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cb362e3b0976dc994857391b776ddaa8c13c28a16f80ac6522c23d5257156bed"}, + {file = "Pillow-9.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a2e0f87144fcbbe54297cae708c5e7f9da21a4646523456b00cc956bd4c65815"}, + {file = "Pillow-9.4.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:28676836c7796805914b76b1837a40f76827ee0d5398f72f7dcc634bae7c6264"}, + {file = "Pillow-9.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0884ba7b515163a1a05440a138adeb722b8a6ae2c2b33aea93ea3118dd3a899e"}, + {file = "Pillow-9.4.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:53dcb50fbdc3fb2c55431a9b30caeb2f7027fcd2aeb501459464f0214200a503"}, + {file = "Pillow-9.4.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:e8c5cf126889a4de385c02a2c3d3aba4b00f70234bfddae82a5eaa3ee6d5e3e6"}, + {file = "Pillow-9.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:6c6b1389ed66cdd174d040105123a5a1bc91d0aa7059c7261d20e583b6d8cbd2"}, + {file = "Pillow-9.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0dd4c681b82214b36273c18ca7ee87065a50e013112eea7d78c7a1b89a739153"}, + {file = "Pillow-9.4.0-cp39-cp39-win32.whl", hash = "sha256:6d9dfb9959a3b0039ee06c1a1a90dc23bac3b430842dcb97908ddde05870601c"}, + {file = "Pillow-9.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:54614444887e0d3043557d9dbc697dbb16cfb5a35d672b7a0fcc1ed0cf1c600b"}, + {file = "Pillow-9.4.0-pp38-pypy38_pp73-macosx_10_10_x86_64.whl", hash = "sha256:b9b752ab91e78234941e44abdecc07f1f0d8f51fb62941d32995b8161f68cfe5"}, + {file = "Pillow-9.4.0-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d3b56206244dc8711f7e8b7d6cad4663917cd5b2d950799425076681e8766286"}, + {file = "Pillow-9.4.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aabdab8ec1e7ca7f1434d042bf8b1e92056245fb179790dc97ed040361f16bfd"}, + {file = "Pillow-9.4.0-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:db74f5562c09953b2c5f8ec4b7dfd3f5421f31811e97d1dbc0a7c93d6e3a24df"}, + {file = "Pillow-9.4.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e9d7747847c53a16a729b6ee5e737cf170f7a16611c143d95aa60a109a59c336"}, + {file = "Pillow-9.4.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:b52ff4f4e002f828ea6483faf4c4e8deea8d743cf801b74910243c58acc6eda3"}, + {file = "Pillow-9.4.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:575d8912dca808edd9acd6f7795199332696d3469665ef26163cd090fa1f8bfa"}, + {file = "Pillow-9.4.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3c4ed2ff6760e98d262e0cc9c9a7f7b8a9f61aa4d47c58835cdaf7b0b8811bb"}, + {file = "Pillow-9.4.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e621b0246192d3b9cb1dc62c78cfa4c6f6d2ddc0ec207d43c0dedecb914f152a"}, + {file = "Pillow-9.4.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:8f127e7b028900421cad64f51f75c051b628db17fb00e099eb148761eed598c9"}, + {file = "Pillow-9.4.0.tar.gz", hash = "sha256:a1c2d7780448eb93fbcc3789bf3916aa5720d942e37945f4056680317f1cd23e"}, +] + +[package.extras] +docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinx-issues (>=3.0.1)", "sphinx-removed-in", "sphinxext-opengraph"] +tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] + +[[package]] +name = "pinecone-client" +version = "2.2.1" +description = "Pinecone client and SDK" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pinecone-client-2.2.1.tar.gz", hash = "sha256:0878dcaee447c46c8d1b3d71c854689daa7e548e5009a171780907c7d4e74789"}, + {file = "pinecone_client-2.2.1-py3-none-any.whl", hash = "sha256:6976a22aee57a9813378607506c8c36b0317dfa36a08a5397aaaeab2eef66c1b"}, +] + +[package.dependencies] +dnspython = ">=2.0.0" +loguru = ">=0.5.0" +numpy = "*" +python-dateutil = ">=2.5.3" +pyyaml = ">=5.4" +requests = ">=2.19.0" +tqdm = ">=4.64.1" +typing-extensions = ">=3.7.4" +urllib3 = ">=1.21.1" + +[package.extras] +grpc = ["googleapis-common-protos (>=1.53.0)", "grpc-gateway-protoc-gen-openapiv2 (==0.1.0)", "grpcio (>=1.44.0)", "lz4 (>=3.1.3)", "protobuf (==3.19.3)"] + +[[package]] +name = "pluggy" +version = "1.0.0" +description = "plugin and hook calling mechanisms for python" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, + {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "protobuf" +version = "3.20.3" +description = "Protocol Buffers" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "protobuf-3.20.3-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:f4bd856d702e5b0d96a00ec6b307b0f51c1982c2bf9c0052cf9019e9a544ba99"}, + {file = "protobuf-3.20.3-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9aae4406ea63d825636cc11ffb34ad3379335803216ee3a856787bcf5ccc751e"}, + {file = "protobuf-3.20.3-cp310-cp310-win32.whl", hash = "sha256:28545383d61f55b57cf4df63eebd9827754fd2dc25f80c5253f9184235db242c"}, + {file = "protobuf-3.20.3-cp310-cp310-win_amd64.whl", hash = "sha256:67a3598f0a2dcbc58d02dd1928544e7d88f764b47d4a286202913f0b2801c2e7"}, + {file = "protobuf-3.20.3-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:899dc660cd599d7352d6f10d83c95df430a38b410c1b66b407a6b29265d66469"}, + {file = "protobuf-3.20.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e64857f395505ebf3d2569935506ae0dfc4a15cb80dc25261176c784662cdcc4"}, + {file = "protobuf-3.20.3-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:d9e4432ff660d67d775c66ac42a67cf2453c27cb4d738fc22cb53b5d84c135d4"}, + {file = "protobuf-3.20.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:74480f79a023f90dc6e18febbf7b8bac7508420f2006fabd512013c0c238f454"}, + {file = "protobuf-3.20.3-cp37-cp37m-win32.whl", hash = "sha256:b6cc7ba72a8850621bfec987cb72623e703b7fe2b9127a161ce61e61558ad905"}, + {file = "protobuf-3.20.3-cp37-cp37m-win_amd64.whl", hash = "sha256:8c0c984a1b8fef4086329ff8dd19ac77576b384079247c770f29cc8ce3afa06c"}, + {file = "protobuf-3.20.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:de78575669dddf6099a8a0f46a27e82a1783c557ccc38ee620ed8cc96d3be7d7"}, + {file = "protobuf-3.20.3-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:f4c42102bc82a51108e449cbb32b19b180022941c727bac0cfd50170341f16ee"}, + {file = "protobuf-3.20.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:44246bab5dd4b7fbd3c0c80b6f16686808fab0e4aca819ade6e8d294a29c7050"}, + {file = "protobuf-3.20.3-cp38-cp38-win32.whl", hash = "sha256:c02ce36ec760252242a33967d51c289fd0e1c0e6e5cc9397e2279177716add86"}, + {file = "protobuf-3.20.3-cp38-cp38-win_amd64.whl", hash = "sha256:447d43819997825d4e71bf5769d869b968ce96848b6479397e29fc24c4a5dfe9"}, + {file = "protobuf-3.20.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:398a9e0c3eaceb34ec1aee71894ca3299605fa8e761544934378bbc6c97de23b"}, + {file = "protobuf-3.20.3-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:bf01b5720be110540be4286e791db73f84a2b721072a3711efff6c324cdf074b"}, + {file = "protobuf-3.20.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:daa564862dd0d39c00f8086f88700fdbe8bc717e993a21e90711acfed02f2402"}, + {file = "protobuf-3.20.3-cp39-cp39-win32.whl", hash = "sha256:819559cafa1a373b7096a482b504ae8a857c89593cf3a25af743ac9ecbd23480"}, + {file = "protobuf-3.20.3-cp39-cp39-win_amd64.whl", hash = "sha256:03038ac1cfbc41aa21f6afcbcd357281d7521b4157926f30ebecc8d4ea59dcb7"}, + {file = "protobuf-3.20.3-py2.py3-none-any.whl", hash = "sha256:a7ca6d488aa8ff7f329d4c545b2dbad8ac31464f1d8b1c87ad1346717731e4db"}, + {file = "protobuf-3.20.3.tar.gz", hash = "sha256:2e3427429c9cffebf259491be0af70189607f365c2f41c7c3764af6f337105f2"}, +] + +[[package]] +name = "pycparser" +version = "2.21" +description = "C parser in Python" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, + {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, +] + +[[package]] +name = "pycryptodomex" +version = "3.17" +description = "Cryptographic library for Python" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "pycryptodomex-3.17-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:12056c38e49d972f9c553a3d598425f8a1c1d35b2e4330f89d5ff1ffb70de041"}, + {file = "pycryptodomex-3.17-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:ab33c2d9f275e05e235dbca1063753b5346af4a5cac34a51fa0da0d4edfb21d7"}, + {file = "pycryptodomex-3.17-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:caa937ff29d07a665dfcfd7a84f0d4207b2ebf483362fa9054041d67fdfacc20"}, + {file = "pycryptodomex-3.17-cp27-cp27m-manylinux2014_aarch64.whl", hash = "sha256:db23d7341e21b273d2440ec6faf6c8b1ca95c8894da612e165be0b89a8688340"}, + {file = "pycryptodomex-3.17-cp27-cp27m-musllinux_1_1_aarch64.whl", hash = "sha256:f854c8476512cebe6a8681cc4789e4fcff6019c17baa0fd72b459155dc605ab4"}, + {file = "pycryptodomex-3.17-cp27-cp27m-win32.whl", hash = "sha256:a57e3257bacd719769110f1f70dd901c5b6955e9596ad403af11a3e6e7e3311c"}, + {file = "pycryptodomex-3.17-cp27-cp27m-win_amd64.whl", hash = "sha256:d38ab9e53b1c09608ba2d9b8b888f1e75d6f66e2787e437adb1fecbffec6b112"}, + {file = "pycryptodomex-3.17-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:3c2516b42437ae6c7a29ef3ddc73c8d4714e7b6df995b76be4695bbe4b3b5cd2"}, + {file = "pycryptodomex-3.17-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:5c23482860302d0d9883404eaaa54b0615eefa5274f70529703e2c43cc571827"}, + {file = "pycryptodomex-3.17-cp27-cp27mu-manylinux2014_aarch64.whl", hash = "sha256:7a8dc3ee7a99aae202a4db52de5a08aa4d01831eb403c4d21da04ec2f79810db"}, + {file = "pycryptodomex-3.17-cp27-cp27mu-musllinux_1_1_aarch64.whl", hash = "sha256:7cc28dd33f1f3662d6da28ead4f9891035f63f49d30267d3b41194c8778997c8"}, + {file = "pycryptodomex-3.17-cp35-abi3-macosx_10_9_universal2.whl", hash = "sha256:2d4d395f109faba34067a08de36304e846c791808524614c731431ee048fe70a"}, + {file = "pycryptodomex-3.17-cp35-abi3-macosx_10_9_x86_64.whl", hash = "sha256:55eed98b4150a744920597c81b3965b632038781bab8a08a12ea1d004213c600"}, + {file = "pycryptodomex-3.17-cp35-abi3-manylinux2014_aarch64.whl", hash = "sha256:7fa0b52df90343fafe319257b31d909be1d2e8852277fb0376ba89d26d2921db"}, + {file = "pycryptodomex-3.17-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78f0ddd4adc64baa39b416f3637aaf99f45acb0bcdc16706f0cc7ebfc6f10109"}, + {file = "pycryptodomex-3.17-cp35-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4fa037078e92c7cc49f6789a8bac3de06856740bb2038d05f2d9a2e4b165d59"}, + {file = "pycryptodomex-3.17-cp35-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:88b0d5bb87eaf2a31e8a759302b89cf30c97f2f8ca7d83b8c9208abe8acb447a"}, + {file = "pycryptodomex-3.17-cp35-abi3-musllinux_1_1_i686.whl", hash = "sha256:6feedf4b0e36b395329b4186a805f60f900129cdf0170e120ecabbfcb763995d"}, + {file = "pycryptodomex-3.17-cp35-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:7a6651a07f67c28b6e978d63aa3a3fccea0feefed9a8453af3f7421a758461b7"}, + {file = "pycryptodomex-3.17-cp35-abi3-win32.whl", hash = "sha256:32e764322e902bbfac49ca1446604d2839381bbbdd5a57920c9daaf2e0b778df"}, + {file = "pycryptodomex-3.17-cp35-abi3-win_amd64.whl", hash = "sha256:4b51e826f0a04d832eda0790bbd0665d9bfe73e5a4d8ea93b6a9b38beeebe935"}, + {file = "pycryptodomex-3.17-pp27-pypy_73-macosx_10_9_x86_64.whl", hash = "sha256:d4cf0128da167562c49b0e034f09e9cedd733997354f2314837c2fa461c87bb1"}, + {file = "pycryptodomex-3.17-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:c92537b596bd5bffb82f8964cabb9fef1bca8a28a9e0a69ffd3ec92a4a7ad41b"}, + {file = "pycryptodomex-3.17-pp27-pypy_73-win32.whl", hash = "sha256:599bb4ae4bbd614ca05f49bd4e672b7a250b80b13ae1238f05fd0f09d87ed80a"}, + {file = "pycryptodomex-3.17-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:4c4674f4b040321055c596aac926d12f7f6859dfe98cd12f4d9453b43ab6adc8"}, + {file = "pycryptodomex-3.17-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67a3648025e4ddb72d43addab764336ba2e670c8377dba5dd752e42285440d31"}, + {file = "pycryptodomex-3.17-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40e8a11f578bd0851b02719c862d55d3ee18d906c8b68a9c09f8c564d6bb5b92"}, + {file = "pycryptodomex-3.17-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:23d83b610bd97704f0cd3acc48d99b76a15c8c1540d8665c94d514a49905bad7"}, + {file = "pycryptodomex-3.17-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:fd29d35ac80755e5c0a99d96b44fb9abbd7e871849581ea6a4cb826d24267537"}, + {file = "pycryptodomex-3.17-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64b876d57cb894b31056ad8dd6a6ae1099b117ae07a3d39707221133490e5715"}, + {file = "pycryptodomex-3.17-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee8bf4fdcad7d66beb744957db8717afc12d176e3fd9c5d106835133881a049b"}, + {file = "pycryptodomex-3.17-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c84689c73358dfc23f9fdcff2cb9e7856e65e2ce3b5ed8ff630d4c9bdeb1867b"}, + {file = "pycryptodomex-3.17.tar.gz", hash = "sha256:0af93aad8d62e810247beedef0261c148790c52f3cd33643791cc6396dd217c1"}, +] + +[[package]] +name = "pydantic" +version = "1.10.7" +description = "Data validation and settings management using python type hints" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pydantic-1.10.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e79e999e539872e903767c417c897e729e015872040e56b96e67968c3b918b2d"}, + {file = "pydantic-1.10.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:01aea3a42c13f2602b7ecbbea484a98169fb568ebd9e247593ea05f01b884b2e"}, + {file = "pydantic-1.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:516f1ed9bc2406a0467dd777afc636c7091d71f214d5e413d64fef45174cfc7a"}, + {file = "pydantic-1.10.7-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae150a63564929c675d7f2303008d88426a0add46efd76c3fc797cd71cb1b46f"}, + {file = "pydantic-1.10.7-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ecbbc51391248116c0a055899e6c3e7ffbb11fb5e2a4cd6f2d0b93272118a209"}, + {file = "pydantic-1.10.7-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f4a2b50e2b03d5776e7f21af73e2070e1b5c0d0df255a827e7c632962f8315af"}, + {file = "pydantic-1.10.7-cp310-cp310-win_amd64.whl", hash = "sha256:a7cd2251439988b413cb0a985c4ed82b6c6aac382dbaff53ae03c4b23a70e80a"}, + {file = "pydantic-1.10.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:68792151e174a4aa9e9fc1b4e653e65a354a2fa0fed169f7b3d09902ad2cb6f1"}, + {file = "pydantic-1.10.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dfe2507b8ef209da71b6fb5f4e597b50c5a34b78d7e857c4f8f3115effaef5fe"}, + {file = "pydantic-1.10.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10a86d8c8db68086f1e30a530f7d5f83eb0685e632e411dbbcf2d5c0150e8dcd"}, + {file = "pydantic-1.10.7-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d75ae19d2a3dbb146b6f324031c24f8a3f52ff5d6a9f22f0683694b3afcb16fb"}, + {file = "pydantic-1.10.7-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:464855a7ff7f2cc2cf537ecc421291b9132aa9c79aef44e917ad711b4a93163b"}, + {file = "pydantic-1.10.7-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:193924c563fae6ddcb71d3f06fa153866423ac1b793a47936656e806b64e24ca"}, + {file = "pydantic-1.10.7-cp311-cp311-win_amd64.whl", hash = "sha256:b4a849d10f211389502059c33332e91327bc154acc1845f375a99eca3afa802d"}, + {file = "pydantic-1.10.7-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cc1dde4e50a5fc1336ee0581c1612215bc64ed6d28d2c7c6f25d2fe3e7c3e918"}, + {file = "pydantic-1.10.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0cfe895a504c060e5d36b287ee696e2fdad02d89e0d895f83037245218a87fe"}, + {file = "pydantic-1.10.7-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:670bb4683ad1e48b0ecb06f0cfe2178dcf74ff27921cdf1606e527d2617a81ee"}, + {file = "pydantic-1.10.7-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:950ce33857841f9a337ce07ddf46bc84e1c4946d2a3bba18f8280297157a3fd1"}, + {file = "pydantic-1.10.7-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:c15582f9055fbc1bfe50266a19771bbbef33dd28c45e78afbe1996fd70966c2a"}, + {file = "pydantic-1.10.7-cp37-cp37m-win_amd64.whl", hash = "sha256:82dffb306dd20bd5268fd6379bc4bfe75242a9c2b79fec58e1041fbbdb1f7914"}, + {file = "pydantic-1.10.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8c7f51861d73e8b9ddcb9916ae7ac39fb52761d9ea0df41128e81e2ba42886cd"}, + {file = "pydantic-1.10.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6434b49c0b03a51021ade5c4daa7d70c98f7a79e95b551201fff682fc1661245"}, + {file = "pydantic-1.10.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64d34ab766fa056df49013bb6e79921a0265204c071984e75a09cbceacbbdd5d"}, + {file = "pydantic-1.10.7-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:701daea9ffe9d26f97b52f1d157e0d4121644f0fcf80b443248434958fd03dc3"}, + {file = "pydantic-1.10.7-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:cf135c46099ff3f919d2150a948ce94b9ce545598ef2c6c7bf55dca98a304b52"}, + {file = "pydantic-1.10.7-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b0f85904f73161817b80781cc150f8b906d521fa11e3cdabae19a581c3606209"}, + {file = "pydantic-1.10.7-cp38-cp38-win_amd64.whl", hash = "sha256:9f6f0fd68d73257ad6685419478c5aece46432f4bdd8d32c7345f1986496171e"}, + {file = "pydantic-1.10.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c230c0d8a322276d6e7b88c3f7ce885f9ed16e0910354510e0bae84d54991143"}, + {file = "pydantic-1.10.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:976cae77ba6a49d80f461fd8bba183ff7ba79f44aa5cfa82f1346b5626542f8e"}, + {file = "pydantic-1.10.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d45fc99d64af9aaf7e308054a0067fdcd87ffe974f2442312372dfa66e1001d"}, + {file = "pydantic-1.10.7-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d2a5ebb48958754d386195fe9e9c5106f11275867051bf017a8059410e9abf1f"}, + {file = "pydantic-1.10.7-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:abfb7d4a7cd5cc4e1d1887c43503a7c5dd608eadf8bc615413fc498d3e4645cd"}, + {file = "pydantic-1.10.7-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:80b1fab4deb08a8292d15e43a6edccdffa5377a36a4597bb545b93e79c5ff0a5"}, + {file = "pydantic-1.10.7-cp39-cp39-win_amd64.whl", hash = "sha256:d71e69699498b020ea198468e2480a2f1e7433e32a3a99760058c6520e2bea7e"}, + {file = "pydantic-1.10.7-py3-none-any.whl", hash = "sha256:0cd181f1d0b1d00e2b705f1bf1ac7799a2d938cce3376b8007df62b29be3c2c6"}, + {file = "pydantic-1.10.7.tar.gz", hash = "sha256:cfc83c0678b6ba51b0532bea66860617c4cd4251ecf76e9846fa5a9f3454e97e"}, +] + +[package.dependencies] +typing-extensions = ">=4.2.0" + +[package.extras] +dotenv = ["python-dotenv (>=0.10.4)"] +email = ["email-validator (>=1.0.3)"] + +[[package]] +name = "pymilvus" +version = "2.2.3" +description = "Python Sdk for Milvus" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pymilvus-2.2.3-py3-none-any.whl", hash = "sha256:99eab5a249bc6ca03a8e2b5169708acab22e657ab4672c89e0ad7bb00c1cd283"}, + {file = "pymilvus-2.2.3.tar.gz", hash = "sha256:88b75ac073e2b30264f89e5897450825112610230791a91a55653d5191649000"}, +] + +[package.dependencies] +grpcio = ">=1.47.0,<=1.48.0" +grpcio-tools = ">=1.47.0,<=1.48.0" +mmh3 = ">=2.0,<=3.0.0" +pandas = {version = ">=1.2.4", markers = "python_version > \"3.6\""} +ujson = ">=2.0.0,<=5.4.0" + +[[package]] +name = "pypdf2" +version = "3.0.1" +description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyPDF2-3.0.1.tar.gz", hash = "sha256:a74408f69ba6271f71b9352ef4ed03dc53a31aa404d29b5d31f53bfecfee1440"}, + {file = "pypdf2-3.0.1-py3-none-any.whl", hash = "sha256:d16e4205cfee272fbdc0568b68d82be796540b1537508cef59388f839c191928"}, +] + +[package.extras] +crypto = ["PyCryptodome"] +dev = ["black", "flit", "pip-tools", "pre-commit (<2.18.0)", "pytest-cov", "wheel"] +docs = ["myst_parser", "sphinx", "sphinx_rtd_theme"] +full = ["Pillow", "PyCryptodome"] +image = ["Pillow"] + +[[package]] +name = "pytest" +version = "7.2.2" +description = "pytest: simple powerful testing with Python" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-7.2.2-py3-none-any.whl", hash = "sha256:130328f552dcfac0b1cec75c12e3f005619dc5f874f0a06e8ff7263f0ee6225e"}, + {file = "pytest-7.2.2.tar.gz", hash = "sha256:c99ab0c73aceb050f68929bc93af19ab6db0558791c6a0715723abe9d0ade9d4"}, +] + +[package.dependencies] +attrs = ">=19.2.0" +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] + +[[package]] +name = "pytest-asyncio" +version = "0.20.3" +description = "Pytest support for asyncio" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-asyncio-0.20.3.tar.gz", hash = "sha256:83cbf01169ce3e8eb71c6c278ccb0574d1a7a3bb8eaaf5e50e0ad342afb33b36"}, + {file = "pytest_asyncio-0.20.3-py3-none-any.whl", hash = "sha256:f129998b209d04fcc65c96fc85c11e5316738358909a8399e93be553d7656442"}, +] + +[package.dependencies] +pytest = ">=6.1.0" + +[package.extras] +docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"] +testing = ["coverage (>=6.2)", "flaky (>=3.5.0)", "hypothesis (>=5.7.1)", "mypy (>=0.931)", "pytest-trio (>=0.7.0)"] + +[[package]] +name = "pytest-cov" +version = "4.0.0" +description = "Pytest plugin for measuring coverage." +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pytest-cov-4.0.0.tar.gz", hash = "sha256:996b79efde6433cdbd0088872dbc5fb3ed7fe1578b68cdbba634f14bb8dd0470"}, + {file = "pytest_cov-4.0.0-py3-none-any.whl", hash = "sha256:2feb1b751d66a8bd934e5edfa2e961d11309dc37b73b0eabe73b5945fee20f6b"}, +] + +[package.dependencies] +coverage = {version = ">=5.2.1", extras = ["toml"]} +pytest = ">=4.6" + +[package.extras] +testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"] + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "python-dotenv" +version = "0.21.1" +description = "Read key-value pairs from a .env file and set them as environment variables" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "python-dotenv-0.21.1.tar.gz", hash = "sha256:1c93de8f636cde3ce377292818d0e440b6e45a82f215c3744979151fa8151c49"}, + {file = "python_dotenv-0.21.1-py3-none-any.whl", hash = "sha256:41e12e0318bebc859fcc4d97d4db8d20ad21721a6aa5047dd59f090391cb549a"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + +[[package]] +name = "python-multipart" +version = "0.0.6" +description = "A streaming multipart parser for Python" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "python_multipart-0.0.6-py3-none-any.whl", hash = "sha256:ee698bab5ef148b0a760751c261902cd096e57e10558e11aca17646b74ee1c18"}, + {file = "python_multipart-0.0.6.tar.gz", hash = "sha256:e9925a80bb668529f1b67c7fdb0a5dacdd7cbfc6fb0bff3ea443fe22bdd62132"}, +] + +[package.extras] +dev = ["atomicwrites (==1.2.1)", "attrs (==19.2.0)", "coverage (==6.5.0)", "hatch", "invoke (==1.7.3)", "more-itertools (==4.3.0)", "pbr (==4.3.0)", "pluggy (==1.0.0)", "py (==1.11.0)", "pytest (==7.2.0)", "pytest-cov (==4.0.0)", "pytest-timeout (==2.1.0)", "pyyaml (==5.1)"] + +[[package]] +name = "python-pptx" +version = "0.6.21" +description = "Generate and manipulate Open XML PowerPoint (.pptx) files" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "python-pptx-0.6.21.tar.gz", hash = "sha256:7798a2aaf89563565b3c7120c0acfe9aff775db0db3580544e3bf4840c2e378f"}, +] + +[package.dependencies] +lxml = ">=3.1.0" +Pillow = ">=3.3.2" +XlsxWriter = ">=0.5.7" + +[[package]] +name = "pytz" +version = "2022.7.1" +description = "World timezone definitions, modern and historical" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "pytz-2022.7.1-py2.py3-none-any.whl", hash = "sha256:78f4f37d8198e0627c5f1143240bb0206b8691d8d7ac6d78fee88b78733f8c4a"}, + {file = "pytz-2022.7.1.tar.gz", hash = "sha256:01a0681c4b9684a28304615eba55d1ab31ae00bf68ec157ec3708a8182dbbcd0"}, +] + +[[package]] +name = "pyyaml" +version = "6.0" +description = "YAML parser and emitter for Python" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"}, + {file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"}, + {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"}, + {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"}, + {file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"}, + {file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"}, + {file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"}, + {file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"}, + {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"}, + {file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"}, + {file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"}, + {file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"}, + {file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"}, + {file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"}, + {file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"}, + {file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"}, + {file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"}, + {file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"}, + {file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"}, + {file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"}, + {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"}, + {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"}, +] + +[[package]] +name = "qdrant-client" +version = "1.1.0" +description = "Client library for the Qdrant vector search engine" +category = "main" +optional = false +python-versions = ">=3.7,<3.12" +files = [ + {file = "qdrant_client-1.1.0-py3-none-any.whl", hash = "sha256:60aa8f76a78b07980b5d8a602632c576a3ed8f446f900ab47446886e3b35a1af"}, + {file = "qdrant_client-1.1.0.tar.gz", hash = "sha256:b6258f4178d891433beeb80a61b406e23762f5cfc8d964ccab9cbef732dac7fd"}, +] + +[package.dependencies] +grpcio = ">=1.41.0" +grpcio-tools = ">=1.41.0" +httpx = {version = ">=0.14.0", extras = ["http2"]} +numpy = {version = ">=1.21", markers = "python_version >= \"3.8\""} +pydantic = ">=1.8,<2.0" +typing-extensions = ">=4.0.0,<5.0.0" +urllib3 = ">=1.26.14,<2.0.0" + +[[package]] +name = "redis" +version = "4.5.1" +description = "Python client for Redis database and key-value store" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "redis-4.5.1-py3-none-any.whl", hash = "sha256:5deb072d26e67d2be1712603bfb7947ec3431fb0eec9c578994052e33035af6d"}, + {file = "redis-4.5.1.tar.gz", hash = "sha256:1eec3741cda408d3a5f84b78d089c8b8d895f21b3b050988351e925faf202864"}, +] + +[package.dependencies] +async-timeout = ">=4.0.2" + +[package.extras] +hiredis = ["hiredis (>=1.0.0)"] +ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==20.0.1)", "requests (>=2.26.0)"] + +[[package]] +name = "regex" +version = "2023.3.22" +description = "Alternative regular expression module, to replace re." +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "regex-2023.3.22-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:68e9add923bda8357e6fe65a568766feae369063cb7210297067675cce65272f"}, + {file = "regex-2023.3.22-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b280cb303fed94199f0b976595af71ebdcd388fb5e377a8198790f1016a23476"}, + {file = "regex-2023.3.22-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:328a70e578f37f59eb54e8450b5042190bbadf2ef7f5c0b60829574b62955ed7"}, + {file = "regex-2023.3.22-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c00c357a4914f58398503c7f716cf1646b1e36b8176efa35255f5ebfacedfa46"}, + {file = "regex-2023.3.22-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d40cecf4bcb2cb37c59e3c79e5bbc45d47e3f3e07edf24e35fc5775db2570058"}, + {file = "regex-2023.3.22-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43469c22fcf705a7cb59c7e01d6d96975bdbc54c1138900f04d11496489a0054"}, + {file = "regex-2023.3.22-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d4d3571c8eb21f0fbe9f0b21b49092c24d442f9a295f079949df3551b2886f29"}, + {file = "regex-2023.3.22-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:148ad520f41021b97870e9c80420e6cdaadcc5e4306e613aed84cd5d53f8a7ca"}, + {file = "regex-2023.3.22-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:24242e5f26823e95edd64969bd206d4752c1a56a744d8cbcf58461f9788bc0c7"}, + {file = "regex-2023.3.22-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:60fcef5c3144d861b623456d87ca7fff7af59a4a918e1364cdd0687b48285285"}, + {file = "regex-2023.3.22-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:533ba64d67d882286557106a1c5f12b4c2825f11b47a7c209a8c22922ca882be"}, + {file = "regex-2023.3.22-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:80a288b21b17e39fb3630cf1d14fd704499bb11d9c8fc110662a0c57758d3d3e"}, + {file = "regex-2023.3.22-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fa41a427d4f03ec6d6da2fd8a230f4f388f336cd7ca46b46c4d2a1bca3ead85a"}, + {file = "regex-2023.3.22-cp310-cp310-win32.whl", hash = "sha256:3c4fa90fd91cc2957e66195ce374331bebbc816964864f64b42bd14bda773b53"}, + {file = "regex-2023.3.22-cp310-cp310-win_amd64.whl", hash = "sha256:a4c7b8c5a3a186b49415af3be18e4b8f93b33d6853216c0a1d7401736b703bce"}, + {file = "regex-2023.3.22-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0a2a851d0548a4e298d88e3ceeb4bad4aab751cf1883edf6150f25718ce0207a"}, + {file = "regex-2023.3.22-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f2bc8a9076ea7add860d57dbee0554a212962ecf2a900344f2fc7c56a02463b0"}, + {file = "regex-2023.3.22-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e30d9a6fd7a7a6a4da6f80d167ce8eda4a993ff24282cbc73f34186c46a498db"}, + {file = "regex-2023.3.22-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3371975b165c1e859e1990e5069e8606f00b25aed961cfd25b7bac626b1eb5a9"}, + {file = "regex-2023.3.22-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33c887b658afb144cdc8ce9156a0e1098453060c18b8bd5177f831ad58e0d60d"}, + {file = "regex-2023.3.22-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd47362e03acc780aad5a5bc4624d495594261b55a1f79a5b775b6be865a5911"}, + {file = "regex-2023.3.22-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7798b3d662f70cea425637c54da30ef1894d426cab24ee7ffaaccb24a8b17bb8"}, + {file = "regex-2023.3.22-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bdab2c90665b88faf5cc5e11bf835d548f4b8d8060c89fc70782b6020850aa1c"}, + {file = "regex-2023.3.22-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:55f907c4d18a5a40da0ceb339a0beda77c9df47c934adad987793632fb4318c3"}, + {file = "regex-2023.3.22-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e00b046000b313ffaa2f6e8d7290b33b08d2005150eff4c8cf3ad74d011888d1"}, + {file = "regex-2023.3.22-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:20ce96da2093e72e151d6af8217a629aeb5f48f1ac543c2fffd1d87c57699d7e"}, + {file = "regex-2023.3.22-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8527ea0978ed6dc58ccb3935bd2883537b455c97ec44b5d8084677dfa817f96b"}, + {file = "regex-2023.3.22-cp311-cp311-win32.whl", hash = "sha256:4c9c3db90acd17e4231344a23616f33fd79837809584ce30e2450ca312fa47aa"}, + {file = "regex-2023.3.22-cp311-cp311-win_amd64.whl", hash = "sha256:e1b56dac5e86ab52e0443d63b02796357202a8f8c5966b69f8d4c03a94778e98"}, + {file = "regex-2023.3.22-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:33bab9c9af936123b70b9874ce83f2bcd54be76b97637b33d31560fba8ad5d78"}, + {file = "regex-2023.3.22-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b59233cb8df6b60fff5f3056f6f342a8f5f04107a11936bf49ebff87dd4ace34"}, + {file = "regex-2023.3.22-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f6f29cb134d782685f8eda01d72073c483c7f87b318b5101c7001faef7850f5"}, + {file = "regex-2023.3.22-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d15a0cc48f7a3055e89df1bd6623a907c407d1f58f67ff47064e598d4a550de4"}, + {file = "regex-2023.3.22-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:159c7b83488a056365119ada0bceddc06a455d3db7a7aa3cf07f13b2878b885f"}, + {file = "regex-2023.3.22-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aff7c778d9229d66f716ad98a701fa91cf97935ae4a32a145ae9e61619906aaa"}, + {file = "regex-2023.3.22-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3e66cfc915f5f7e2c8a0af8a27f87aa857f440de7521fd7f2682e23f082142a1"}, + {file = "regex-2023.3.22-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3b4da28d89527572f0d4a24814e353e1228a7aeda965e5d9265c1435a154b17a"}, + {file = "regex-2023.3.22-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5da83c964aecb6c3f2a6c9a03f3d0fa579e1ad208e2c264ba826cecd19da11fa"}, + {file = "regex-2023.3.22-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:81291006a934052161eae8340e7731ea6b8595b0c27dd4927c4e8a489e1760e2"}, + {file = "regex-2023.3.22-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:c95a977cfdccb8ddef95ddd77cf586fe9dc327c7c93cf712983cece70cdaa1be"}, + {file = "regex-2023.3.22-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:cdd3d2df486c9a8c6d08f78bdfa8ea7cf6191e037fde38c2cf6f5f0559e9d353"}, + {file = "regex-2023.3.22-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f311ca33fcb9f8fb060c1fa76238d8d029f33b71a2021bafa5d423cc25965b54"}, + {file = "regex-2023.3.22-cp38-cp38-win32.whl", hash = "sha256:2e2e6baf4a1108f84966f44870b26766d8f6d104c9959aae329078327c677122"}, + {file = "regex-2023.3.22-cp38-cp38-win_amd64.whl", hash = "sha256:60b545806a433cc752b9fa936f1c0a63bf96a3872965b958b35bd0d5d788d411"}, + {file = "regex-2023.3.22-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5826e7fb443acb49f64f9648a2852efc8d9af2f4c67f6c3dca69dccd9e8e1d15"}, + {file = "regex-2023.3.22-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:59b3aab231c27cd754d6452c43b12498d34e7ab87d69a502bd0220f4b1c090c4"}, + {file = "regex-2023.3.22-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97326d62255203c6026896d4b1ad6b5a0141ba097cae00ed3a508fe454e96baf"}, + {file = "regex-2023.3.22-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:59a15c2803c20702d7f2077807d9a2b7d9a168034b87fd3f0d8361de60019a1e"}, + {file = "regex-2023.3.22-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4ad467524cb6879ce42107cf02a49cdb4a06f07fe0e5f1160d7db865a8d25d4b"}, + {file = "regex-2023.3.22-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:617d101b95151d827d5366e9c4225a68c64d56065e41ab9c7ef51bb87f347a8a"}, + {file = "regex-2023.3.22-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:548257463696daf919d2fdfc53ee4b98e29e3ffc5afddd713d83aa849d1fa178"}, + {file = "regex-2023.3.22-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:1937946dd03818845bd9c1713dfd3173a7b9a324e6593a235fc8c51c9cd460eb"}, + {file = "regex-2023.3.22-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d94a0d25e517c76c9ce9e2e2635d9d1a644b894f466a66a10061f4e599cdc019"}, + {file = "regex-2023.3.22-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:87016850c13082747bd120558e6750746177bd492b103b2fca761c8a1c43fba9"}, + {file = "regex-2023.3.22-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:3582db55372eaee9e998d378109c4b9b15beb2c84624c767efe351363fada9c4"}, + {file = "regex-2023.3.22-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:88552925fd22320600c59ee80342d6eb06bfa9503c3a402d7327983f5fa999d9"}, + {file = "regex-2023.3.22-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8d7477ebaf5d3621c763702e1ec0daeede8863fb22459c5e26ddfd17e9b1999c"}, + {file = "regex-2023.3.22-cp39-cp39-win32.whl", hash = "sha256:dcc5b0d6a94637c071a427dc4469efd0ae4fda8ff384790bc8b5baaf9308dc3e"}, + {file = "regex-2023.3.22-cp39-cp39-win_amd64.whl", hash = "sha256:f1977c1fe28173f2349d42c59f80f10a97ce34f2bedb7b7f55e2e8a8de9b7dfb"}, + {file = "regex-2023.3.22.tar.gz", hash = "sha256:f579a202b90c1110d0894a86b32a89bf550fdb34bdd3f9f550115706be462e19"}, +] + +[[package]] +name = "requests" +version = "2.28.2" +description = "Python HTTP for Humans." +category = "main" +optional = false +python-versions = ">=3.7, <4" +files = [ + {file = "requests-2.28.2-py3-none-any.whl", hash = "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa"}, + {file = "requests-2.28.2.tar.gz", hash = "sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<1.27" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "rfc3986" +version = "1.5.0" +description = "Validating URI References per RFC 3986" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "rfc3986-1.5.0-py2.py3-none-any.whl", hash = "sha256:a86d6e1f5b1dc238b218b012df0aa79409667bb209e58da56d0b94704e712a97"}, + {file = "rfc3986-1.5.0.tar.gz", hash = "sha256:270aaf10d87d0d4e095063c65bf3ddbc6ee3d0b226328ce21e036f946e421835"}, +] + +[package.dependencies] +idna = {version = "*", optional = true, markers = "extra == \"idna2008\""} + +[package.extras] +idna2008 = ["idna"] + +[[package]] +name = "setuptools" +version = "67.6.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "setuptools-67.6.0-py3-none-any.whl", hash = "sha256:b78aaa36f6b90a074c1fa651168723acbf45d14cb1196b6f02c0fd07f17623b2"}, + {file = "setuptools-67.6.0.tar.gz", hash = "sha256:2ee892cd5f29f3373097f5a814697e397cf3ce313616df0af11231e2ad118077"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "sniffio" +version = "1.3.0" +description = "Sniff out which async library your code is running under" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"}, + {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"}, +] + +[[package]] +name = "starlette" +version = "0.25.0" +description = "The little ASGI library that shines." +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "starlette-0.25.0-py3-none-any.whl", hash = "sha256:774f1df1983fd594b9b6fb3ded39c2aa1979d10ac45caac0f4255cbe2acb8628"}, + {file = "starlette-0.25.0.tar.gz", hash = "sha256:854c71e73736c429c2bdb07801f2c76c9cba497e7c3cf4988fde5e95fe4cdb3c"}, +] + +[package.dependencies] +anyio = ">=3.4.0,<5" + +[package.extras] +full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"] + +[[package]] +name = "tenacity" +version = "8.2.2" +description = "Retry code until it succeeds" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "tenacity-8.2.2-py3-none-any.whl", hash = "sha256:2f277afb21b851637e8f52e6a613ff08734c347dc19ade928e519d7d2d8569b0"}, + {file = "tenacity-8.2.2.tar.gz", hash = "sha256:43af037822bd0029025877f3b2d97cc4d7bb0c2991000a3d59d71517c5c969e0"}, +] + +[package.extras] +doc = ["reno", "sphinx", "tornado (>=4.5)"] + +[[package]] +name = "tiktoken" +version = "0.2.0" +description = "" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tiktoken-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d06705b55bb5f6c194285b6d15ad31bd7586d44fe433be31bc3694cf8c70169c"}, + {file = "tiktoken-0.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29f2969945fc430f817c907f59a2da9e7b797fe65527ba5b9442618643a0dc86"}, + {file = "tiktoken-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:546455f27b6f7981d17de265b8b99e2fef980fbc3fde1d94b551f8354902000e"}, + {file = "tiktoken-0.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:54b5dc05f934ac68e8da4d2cc3acd77bc6968114b09669056f1bff12acc57049"}, + {file = "tiktoken-0.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:5d3c48cb5649ce6bb2b207377dfdaa855e1e771b2e7f59fb251182c227573619"}, + {file = "tiktoken-0.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a55f983735745df9a87161d9e0ce9ef7d216039d389246be98c6d416bbb2452f"}, + {file = "tiktoken-0.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:175de868393039a85fdf4c7cfb9b8883d1b248b9a3d9d0129d30414f5a59c333"}, + {file = "tiktoken-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6cd97b8cd14e3fe6647baa71c67f7f6b21a401fa996ccc3d93bf0ae02162af2"}, + {file = "tiktoken-0.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:806e2b8c0b9786c0e3212e8b3a6ac8f5840066c00a31b89e6c8d9ba0421e77d7"}, + {file = "tiktoken-0.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:57b753aa9813f06fa5a26da2622114bf9769a8d1dca1b276d3613ee15da5b09d"}, + {file = "tiktoken-0.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:aa3c15b87bb2cea56ecc8fe4c7bf105c5c2dc4090c2df97c141100488297173a"}, + {file = "tiktoken-0.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bd98fc4a9ec967a089c62497f21277b53aa3e15a6fec731ac707eea4d5527938"}, + {file = "tiktoken-0.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab10ad3280f348a0d3bfea6d503c6aa84676b159692701bc7604e67129bd2135"}, + {file = "tiktoken-0.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:59296d495aa6aec375a75f07da44fabb9720632c9404b41b9cbfe95e17966345"}, + {file = "tiktoken-0.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:3b078e6109d522c5ffc52859520eef6c17a3b120ed52b79f48cae0badff08fe0"}, + {file = "tiktoken-0.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:aef47e8037652b18d2665b77e1f9416d3a86ccd383b039d0dfcb7d92085cef6d"}, + {file = "tiktoken-0.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d0f62f8349a5412962326dbc41c3823a1f381d8ab62afbee94480d8296499d8e"}, + {file = "tiktoken-0.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d0dbf7e1940427c11f0c8ab9046ad98d774850b21559b37ca60ff30d3a14620"}, + {file = "tiktoken-0.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8f1a7c6bec42a2fb5309a161d1b891fe5e181d4b620a962923a925f45fe25697"}, + {file = "tiktoken-0.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:3349fd809d17b722814a6a700e4bc0125527f39057b57a02ed42f53bb4e6e2f5"}, + {file = "tiktoken-0.2.0.tar.gz", hash = "sha256:df41a3d478499757b5b32eae5e97657cf159d8d9e6764049dd7c3abb49e1b40f"}, +] + +[package.dependencies] +blobfile = ">=2" +regex = ">=2022.1.18" +requests = ">=2.26.0" + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + +[[package]] +name = "tqdm" +version = "4.65.0" +description = "Fast, Extensible Progress Meter" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tqdm-4.65.0-py3-none-any.whl", hash = "sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671"}, + {file = "tqdm-4.65.0.tar.gz", hash = "sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["py-make (>=0.1.0)", "twine", "wheel"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + +[[package]] +name = "typing-extensions" +version = "4.5.0" +description = "Backported and Experimental Type Hints for Python 3.7+" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "typing_extensions-4.5.0-py3-none-any.whl", hash = "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"}, + {file = "typing_extensions-4.5.0.tar.gz", hash = "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb"}, +] + +[[package]] +name = "ujson" +version = "5.4.0" +description = "Ultra fast JSON encoder and decoder for Python" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "ujson-5.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:511aa641a5b91d19280183b134fb6c473039d4dd82e987ac810cffba783521ac"}, + {file = "ujson-5.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b045ca5497a950cc3492840adb3bcb3b9e305ed6599ed14c6aeaa08011aa463f"}, + {file = "ujson-5.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa00b746138835271653b0c3da171d2a8b510c579381f71e8b8e03484d50d825"}, + {file = "ujson-5.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:91edcf9978ee401119e9c8589376ae37fd3e6e75ee365c49385cb005eaff1535"}, + {file = "ujson-5.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:05e411627e5d6ee773232960ca7307e66017f78e3fa74f7e95c3a8cc5cb05415"}, + {file = "ujson-5.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7d4c9ccd30e621e714ec24ca911ad8873567dc1ac1e5e914405ea9dd16b9d40c"}, + {file = "ujson-5.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:dd0d4ec694cab8a0a4d85f45f81ae0065465c4670f0db72ba48d6c4e7ae42834"}, + {file = "ujson-5.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:aaa77af91df3f71858a1f792c74d3f2d3abf3875f93ab1a2b9a24b3797743b02"}, + {file = "ujson-5.4.0-cp310-cp310-win32.whl", hash = "sha256:fbea46c0fbc1c3bc8f957afd8dbb25b4ea3a356e18ee6dd79ace6cf32bd4cff7"}, + {file = "ujson-5.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:2d98248f1df1e1aab67e0374ab98945dd36bc1764753d71fd8aea5f296360b76"}, + {file = "ujson-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:f5c547d49a7e9d3f231e9323171bbbbcef63173fb007a2787cd4f05ac6269315"}, + {file = "ujson-5.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39bb702ca1612253b5e4b6004e0f20208c98a446606aa351f9a7ba5ceaff0eb8"}, + {file = "ujson-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:381c97d326d1ec569d318cc0ae83940ea2df125ede1000871680fefd5b7fdea9"}, + {file = "ujson-5.4.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3a0707f381f97e1287c0dbf94d95bd6c0bbf6e4eeeaa656f0076b7883010c818"}, + {file = "ujson-5.4.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:6a20f2f6e8818c1ab89dd4be6bbad3fc2ddb15287f89e7ea35f3eb849afebbd9"}, + {file = "ujson-5.4.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:8cd6117e33233f2de6bc896eea6a5a59b58a37db08f371157264e0ec5e51c76a"}, + {file = "ujson-5.4.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:31bdb6d771d5ef6d37134b42211500bfe176c55d399f3317e569783dc42ed38e"}, + {file = "ujson-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:baa76a6f707a6d22437fe9c7ec9719672fb04d4d9435a3e80ee9b1aaeb2089d9"}, + {file = "ujson-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:ee29cf5cfc1e841708297633e1ce749aa851fb96830bbe51f2e5940741ff2441"}, + {file = "ujson-5.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7e12272361e9722777c83b3f5b0bb91d402531f36e80c6e5fafb6acb89e897e3"}, + {file = "ujson-5.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3212847d3885bfd4f5fd56cdc37645a8f8e8a80d6cb569505da22fd9eb0e1a02"}, + {file = "ujson-5.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bcde3135265ecdd5714a7de4fdc167925390d7b17ca325e59980f4114c962b8"}, + {file = "ujson-5.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0551c1ba0bc9e05b69d9c18266dbc93252b5fa3cd9940051bc88a0dd33607b19"}, + {file = "ujson-5.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:13297a7d501f9c8c53e409d4fa57cc574e4fbfbe8807ef2c4c7ce2e3ec933a85"}, + {file = "ujson-5.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2974b17bc522ef86d98b498959d82f03c02e07d9eb08746026415298f4a4bca3"}, + {file = "ujson-5.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5df8b6369ee5ee2685fcc917f6c46b34e599c6e9a512fada6dfd752b909fa06a"}, + {file = "ujson-5.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:754f422aba8db8201a1073f25e2f732effc6471f8755708b16e6ebf19dd23634"}, + {file = "ujson-5.4.0-cp38-cp38-win32.whl", hash = "sha256:ea7fbc540bc04d5b05e5cd54e60ee8745ac665eedf2bad2ba9d12d5c7a7b7d2e"}, + {file = "ujson-5.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:8d472efa9c92e1b2933a22d2f1dbd5237087997136b24ac2b913bf4e8be03135"}, + {file = "ujson-5.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e2a9ddb5c6d1427056b8d62a1a172a18ae522b14d9ba5996b8281b09cba87edd"}, + {file = "ujson-5.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1120c8263f7d85e89533a2b46d80cc6def15114772010ede4d197739e111dba6"}, + {file = "ujson-5.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:400e4ca8a59f71398e8fa56c4d2d6f535e2a121ddb57284ec15752ffce2dd63a"}, + {file = "ujson-5.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e844be0831042aa91e847e5ab03bddd1089ab1a8dd0a1bf90411abf864f058b2"}, + {file = "ujson-5.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0b46aee21e5d75426c4058dfdb42f7e7b1d130c664ee5027a8dbbc50872dc32b"}, + {file = "ujson-5.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:326a96324ed9215b0bc9f1a5af324fb33900b6b0901516bcc421475d6596de0d"}, + {file = "ujson-5.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:fd82932aaa224abd7d01e823b77aef9970f5ac1695027331d99e7f5fda9d37f5"}, + {file = "ujson-5.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8cce79ce47c37132373fbdf55b683883c262a3a60763130e080b8394c1201d32"}, + {file = "ujson-5.4.0-cp39-cp39-win32.whl", hash = "sha256:191f88d5865740497b9827ef9b7c12f37a79872ac984e09f0901a10024019380"}, + {file = "ujson-5.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:68c7f753aec490c6566fd3cd301887c413ac3a588316e446f30a4134ac665668"}, + {file = "ujson-5.4.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1a2e645325f844f9c890c9d956fc2d35ca91f38c857278238ef6516c2f99cf7c"}, + {file = "ujson-5.4.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cec010d318a0238b1333ea9f40d5603d374cc026c29c4471e2661712c6682da1"}, + {file = "ujson-5.4.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b40a3757a563ef77c3f2f9ea1732c2924e8b3b2bda3fa89513f949472ad40b6e"}, + {file = "ujson-5.4.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:67f4e2fa81e1d99c01e7b1978ab0cbf3c9a8b663f683a709f87baad110d5b940"}, + {file = "ujson-5.4.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:9ae1d0094ce730e39e09656bc14074d9573cdd80adec1a55b06d8bf1f9613a01"}, + {file = "ujson-5.4.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:784dbd12925845a3f0757a956447e2fd31418abb5aeaebf3aca1203195f16fd1"}, + {file = "ujson-5.4.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:422653083c6df6cec17fdb5d6106c209aad9b0c94131c53b073980403db22167"}, + {file = "ujson-5.4.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e91947fda8354ea7faf698b084ebcdbabd239e7b15d8436fb74394f59a207ac9"}, + {file = "ujson-5.4.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef985eb2770900a485431910bd3f333b56d1a34b65f8c26a6ed8e8adf55f98d9"}, + {file = "ujson-5.4.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:025758cf6561af6986d77cd4af9367ab56dde5c7c50f13f59e6964b4b25df73e"}, + {file = "ujson-5.4.0.tar.gz", hash = "sha256:6b953e09441e307504130755e5bd6b15850178d591f66292bba4608c4f7f9b00"}, +] + +[[package]] +name = "urllib3" +version = "1.26.15" +description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +files = [ + {file = "urllib3-1.26.15-py2.py3-none-any.whl", hash = "sha256:aa751d169e23c7479ce47a0cb0da579e3ede798f994f5816a74e4f4500dcea42"}, + {file = "urllib3-1.26.15.tar.gz", hash = "sha256:8a388717b9476f934a21484e8c8e61875ab60644d29b9b39e11e4b9dc1c6b305"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + +[[package]] +name = "uvicorn" +version = "0.20.0" +description = "The lightning-fast ASGI server." +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "uvicorn-0.20.0-py3-none-any.whl", hash = "sha256:c3ed1598a5668208723f2bb49336f4509424ad198d6ab2615b7783db58d919fd"}, + {file = "uvicorn-0.20.0.tar.gz", hash = "sha256:a4e12017b940247f836bc90b72e725d7dfd0c8ed1c51eb365f5ba30d9f5127d8"}, +] + +[package.dependencies] +click = ">=7.0" +h11 = ">=0.8" + +[package.extras] +standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] + +[[package]] +name = "validators" +version = "0.19.0" +description = "Python Data Validation for Humans™." +category = "main" +optional = false +python-versions = ">=3.4" +files = [ + {file = "validators-0.19.0.tar.gz", hash = "sha256:dec45f4381f042f1e705cfa74949505b77f1e27e8b05409096fee8152c839cbe"}, +] + +[package.dependencies] +decorator = ">=3.4.0" + +[package.extras] +test = ["flake8 (>=2.4.0)", "isort (>=4.2.2)", "pytest (>=2.2.3)"] + +[[package]] +name = "weaviate-client" +version = "3.15.2" +description = "A python native weaviate client" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "weaviate-client-3.15.2.tar.gz", hash = "sha256:fe3e48d87411e6dc8ba9814e4c5e6341b1d418d5a289098e386a6d1d6edd2ed6"}, + {file = "weaviate_client-3.15.2-py3-none-any.whl", hash = "sha256:ed8c5a72fa20b3793449a56312976de42c43633f2121b1224e6c044e86b7263f"}, +] + +[package.dependencies] +authlib = ">=1.1.0" +requests = ">=2.28.0,<2.29.0" +tqdm = ">=4.59.0,<5.0.0" +validators = ">=0.18.2,<0.20.0" + +[[package]] +name = "win32-setctime" +version = "1.1.0" +description = "A small Python utility to set file creation time on Windows" +category = "main" +optional = false +python-versions = ">=3.5" +files = [ + {file = "win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad"}, + {file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"}, +] + +[package.extras] +dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] + +[[package]] +name = "xlsxwriter" +version = "3.0.9" +description = "A Python module for creating Excel XLSX files." +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "XlsxWriter-3.0.9-py3-none-any.whl", hash = "sha256:5eaaf3c6f791cba1dd1c3065147c35982180f693436093aabe5b7d6c16148e95"}, + {file = "XlsxWriter-3.0.9.tar.gz", hash = "sha256:7216d39a2075afac7a28cad81f6ac31b0b16d8976bf1b775577d157346f891dd"}, +] + +[[package]] +name = "yarl" +version = "1.8.2" +description = "Yet another URL library" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "yarl-1.8.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:bb81f753c815f6b8e2ddd2eef3c855cf7da193b82396ac013c661aaa6cc6b0a5"}, + {file = "yarl-1.8.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:47d49ac96156f0928f002e2424299b2c91d9db73e08c4cd6742923a086f1c863"}, + {file = "yarl-1.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3fc056e35fa6fba63248d93ff6e672c096f95f7836938241ebc8260e062832fe"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58a3c13d1c3005dbbac5c9f0d3210b60220a65a999b1833aa46bd6677c69b08e"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:10b08293cda921157f1e7c2790999d903b3fd28cd5c208cf8826b3b508026996"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:de986979bbd87272fe557e0a8fcb66fd40ae2ddfe28a8b1ce4eae22681728fef"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c4fcfa71e2c6a3cb568cf81aadc12768b9995323186a10827beccf5fa23d4f8"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae4d7ff1049f36accde9e1ef7301912a751e5bae0a9d142459646114c70ecba6"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:bf071f797aec5b96abfc735ab97da9fd8f8768b43ce2abd85356a3127909d146"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:74dece2bfc60f0f70907c34b857ee98f2c6dd0f75185db133770cd67300d505f"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:df60a94d332158b444301c7f569659c926168e4d4aad2cfbf4bce0e8fb8be826"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:63243b21c6e28ec2375f932a10ce7eda65139b5b854c0f6b82ed945ba526bff3"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cfa2bbca929aa742b5084fd4663dd4b87c191c844326fcb21c3afd2d11497f80"}, + {file = "yarl-1.8.2-cp310-cp310-win32.whl", hash = "sha256:b05df9ea7496df11b710081bd90ecc3a3db6adb4fee36f6a411e7bc91a18aa42"}, + {file = "yarl-1.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:24ad1d10c9db1953291f56b5fe76203977f1ed05f82d09ec97acb623a7976574"}, + {file = "yarl-1.8.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2a1fca9588f360036242f379bfea2b8b44cae2721859b1c56d033adfd5893634"}, + {file = "yarl-1.8.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f37db05c6051eff17bc832914fe46869f8849de5b92dc4a3466cd63095d23dfd"}, + {file = "yarl-1.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:77e913b846a6b9c5f767b14dc1e759e5aff05502fe73079f6f4176359d832581"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0978f29222e649c351b173da2b9b4665ad1feb8d1daa9d971eb90df08702668a"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:388a45dc77198b2460eac0aca1efd6a7c09e976ee768b0d5109173e521a19daf"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2305517e332a862ef75be8fad3606ea10108662bc6fe08509d5ca99503ac2aee"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42430ff511571940d51e75cf42f1e4dbdded477e71c1b7a17f4da76c1da8ea76"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3150078118f62371375e1e69b13b48288e44f6691c1069340081c3fd12c94d5b"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c15163b6125db87c8f53c98baa5e785782078fbd2dbeaa04c6141935eb6dab7a"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4d04acba75c72e6eb90745447d69f84e6c9056390f7a9724605ca9c56b4afcc6"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e7fd20d6576c10306dea2d6a5765f46f0ac5d6f53436217913e952d19237efc4"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:75c16b2a900b3536dfc7014905a128a2bea8fb01f9ee26d2d7d8db0a08e7cb2c"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6d88056a04860a98341a0cf53e950e3ac9f4e51d1b6f61a53b0609df342cc8b2"}, + {file = "yarl-1.8.2-cp311-cp311-win32.whl", hash = "sha256:fb742dcdd5eec9f26b61224c23baea46c9055cf16f62475e11b9b15dfd5c117b"}, + {file = "yarl-1.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:8c46d3d89902c393a1d1e243ac847e0442d0196bbd81aecc94fcebbc2fd5857c"}, + {file = "yarl-1.8.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ceff9722e0df2e0a9e8a79c610842004fa54e5b309fe6d218e47cd52f791d7ef"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f6b4aca43b602ba0f1459de647af954769919c4714706be36af670a5f44c9c1"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1684a9bd9077e922300ecd48003ddae7a7474e0412bea38d4631443a91d61077"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ebb78745273e51b9832ef90c0898501006670d6e059f2cdb0e999494eb1450c2"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3adeef150d528ded2a8e734ebf9ae2e658f4c49bf413f5f157a470e17a4a2e89"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57a7c87927a468e5a1dc60c17caf9597161d66457a34273ab1760219953f7f4c"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:efff27bd8cbe1f9bd127e7894942ccc20c857aa8b5a0327874f30201e5ce83d0"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a783cd344113cb88c5ff7ca32f1f16532a6f2142185147822187913eb989f739"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:705227dccbe96ab02c7cb2c43e1228e2826e7ead880bb19ec94ef279e9555b5b"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:34c09b43bd538bf6c4b891ecce94b6fa4f1f10663a8d4ca589a079a5018f6ed7"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a48f4f7fea9a51098b02209d90297ac324241bf37ff6be6d2b0149ab2bd51b37"}, + {file = "yarl-1.8.2-cp37-cp37m-win32.whl", hash = "sha256:0414fd91ce0b763d4eadb4456795b307a71524dbacd015c657bb2a39db2eab89"}, + {file = "yarl-1.8.2-cp37-cp37m-win_amd64.whl", hash = "sha256:d881d152ae0007809c2c02e22aa534e702f12071e6b285e90945aa3c376463c5"}, + {file = "yarl-1.8.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5df5e3d04101c1e5c3b1d69710b0574171cc02fddc4b23d1b2813e75f35a30b1"}, + {file = "yarl-1.8.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7a66c506ec67eb3159eea5096acd05f5e788ceec7b96087d30c7d2865a243918"}, + {file = "yarl-1.8.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2b4fa2606adf392051d990c3b3877d768771adc3faf2e117b9de7eb977741229"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e21fb44e1eff06dd6ef971d4bdc611807d6bd3691223d9c01a18cec3677939e"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:93202666046d9edadfe9f2e7bf5e0782ea0d497b6d63da322e541665d65a044e"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fc77086ce244453e074e445104f0ecb27530d6fd3a46698e33f6c38951d5a0f1"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dd68a92cab699a233641f5929a40f02a4ede8c009068ca8aa1fe87b8c20ae3"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1b372aad2b5f81db66ee7ec085cbad72c4da660d994e8e590c997e9b01e44901"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e6f3515aafe0209dd17fb9bdd3b4e892963370b3de781f53e1746a521fb39fc0"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:dfef7350ee369197106805e193d420b75467b6cceac646ea5ed3049fcc950a05"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:728be34f70a190566d20aa13dc1f01dc44b6aa74580e10a3fb159691bc76909d"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:ff205b58dc2929191f68162633d5e10e8044398d7a45265f90a0f1d51f85f72c"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:baf211dcad448a87a0d9047dc8282d7de59473ade7d7fdf22150b1d23859f946"}, + {file = "yarl-1.8.2-cp38-cp38-win32.whl", hash = "sha256:272b4f1599f1b621bf2aabe4e5b54f39a933971f4e7c9aa311d6d7dc06965165"}, + {file = "yarl-1.8.2-cp38-cp38-win_amd64.whl", hash = "sha256:326dd1d3caf910cd26a26ccbfb84c03b608ba32499b5d6eeb09252c920bcbe4f"}, + {file = "yarl-1.8.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f8ca8ad414c85bbc50f49c0a106f951613dfa5f948ab69c10ce9b128d368baf8"}, + {file = "yarl-1.8.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:418857f837347e8aaef682679f41e36c24250097f9e2f315d39bae3a99a34cbf"}, + {file = "yarl-1.8.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ae0eec05ab49e91a78700761777f284c2df119376e391db42c38ab46fd662b77"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:009a028127e0a1755c38b03244c0bea9d5565630db9c4cf9572496e947137a87"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3edac5d74bb3209c418805bda77f973117836e1de7c000e9755e572c1f7850d0"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:da65c3f263729e47351261351b8679c6429151ef9649bba08ef2528ff2c423b2"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ef8fb25e52663a1c85d608f6dd72e19bd390e2ecaf29c17fb08f730226e3a08"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bcd7bb1e5c45274af9a1dd7494d3c52b2be5e6bd8d7e49c612705fd45420b12d"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:44ceac0450e648de86da8e42674f9b7077d763ea80c8ceb9d1c3e41f0f0a9951"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:97209cc91189b48e7cfe777237c04af8e7cc51eb369004e061809bcdf4e55220"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:48dd18adcf98ea9cd721a25313aef49d70d413a999d7d89df44f469edfb38a06"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:e59399dda559688461762800d7fb34d9e8a6a7444fd76ec33220a926c8be1516"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d617c241c8c3ad5c4e78a08429fa49e4b04bedfc507b34b4d8dceb83b4af3588"}, + {file = "yarl-1.8.2-cp39-cp39-win32.whl", hash = "sha256:cb6d48d80a41f68de41212f3dfd1a9d9898d7841c8f7ce6696cf2fd9cb57ef83"}, + {file = "yarl-1.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:6604711362f2dbf7160df21c416f81fac0de6dbcf0b5445a2ef25478ecc4c778"}, + {file = "yarl-1.8.2.tar.gz", hash = "sha256:49d43402c6e3013ad0978602bf6bf5328535c48d192304b91b97a3c6790b1562"}, +] + +[package.dependencies] +idna = ">=2.0" +multidict = ">=4.0" + +[metadata] +lock-version = "2.0" +python-versions = "^3.10" +content-hash = "9ce6f944faf7bda2d1964aff9cc36c2220b6b152e36d29895cf27ec2c0de9938" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..b3bf7ad2e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,41 @@ +[tool.poetry] +name = "chatgpt-retrieval-plugin" +version = "0.1.0" +description = "" +authors = ["isafulf "] +readme = "README.md" +packages = [{include = "server"}] + +[tool.poetry.dependencies] +python = "^3.10" +fastapi = "^0.92.0" +uvicorn = "^0.20.0" +openai = "^0.27.2" +python-dotenv = "^0.21.1" +pytest = "^7.2.1" +pydantic = "^1.10.5" +tenacity = "^8.2.1" +tiktoken = "^0.2.0" +numpy="^1.24.2" +docx2txt = "^0.8" +PyPDF2 = "^3.0.1" +python-pptx = "^0.6.21" +python-multipart = "^0.0.6" +arrow = "^1.2.3" +pinecone-client = "^2.1.0" +weaviate-client = "^3.12.0" +pymilvus = "^2.2.2" +qdrant-client = {version = "^1.0.4", python = "<3.12"} +redis = "4.5.1" + +[tool.poetry.scripts] +start = "server.main:start" + +[tool.poetry.group.dev.dependencies] +httpx = "^0.23.3" +pytest-cov = "^4.0.0" +pytest-asyncio = "^0.20.3" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/scripts/process_json/README.md b/scripts/process_json/README.md new file mode 100644 index 000000000..fe738c8a3 --- /dev/null +++ b/scripts/process_json/README.md @@ -0,0 +1,24 @@ +## Process a JSON File + +This script is a utility to process a file dump of documents in a JSON format and store them in the vector database with some metadata. It can also optionally screen the documents for personally identifiable information (PII) using a language model, and skip them if detected. Additionally, the script can extract metadata from the document using a language model. You can customize the PII detection function in [`services/pii_detection`](../../services/pii_detection.py) and the metadata extraction function in [`services/extract_metadata`](../../services/extract_metadata.py) for your use case. + +## Usage + +To run this script from the terminal, navigate to this folder and use the following command: + +``` +python process_json.py --filepath path/to/file_dump.json --custom_metadata '{"source": "file"}' --screen_for_pii True --extract_metadata True +``` + +where: + +- `path/to/file_dump.json` is the name or path to the file dump to be processed. The format of this JSON file should be a list of JSON objects, where each object represents a document. The JSON object should have a subset of the following fields: `id`, `text`, `source`, `source_id`, `url`, `created_at`, and `author`. The `text` field is required, while the rest are optional and will be used to populate the metadata of the document. If the `id` field is not specified, a random UUID will be generated for the document. +- `--custom_metadata` is an optional JSON string of key-value pairs to update the metadata of the documents. For example, `{"source": "file"}` will add a `source` field with the value `file` to the metadata of each document. The default value is an empty JSON object (`{}`). +- `--screen_for_pii` is an optional boolean flag to indicate whether to use the PII detection function or not. If set to `True`, the script will use the `screen_text_for_pii` function from the [`services/pii_detection`](../../services/pii_detection.py) module to check if the document text contains any PII using a language model. If PII is detected, the script will print a warning and skip the document. The default value is `False`. +- `--extract_metadata` is an optional boolean flag to indicate whether to try to extract metadata from the document using a language model. If set to `True`, the script will use the `extract_metadata_from_document` function from the [`services/extract_metadata`](../../services/extract_metadata.py) module to extract metadata from the document text and update the metadata object accordingly. The default value is`False`. + +The script will load the JSON file as a list of dictionaries, iterate over the data, create document objects, and batch upsert them into the database. It will also print some progress messages and error messages if any, as well as the number and content of the skipped items due to errors or PII detection. + +You can use `python process_json.py -h` to get a summary of the options and their descriptions. + +Test the script with the example file, [example.json](example.json). diff --git a/scripts/process_json/example.json b/scripts/process_json/example.json new file mode 100644 index 000000000..dc8f12bc1 --- /dev/null +++ b/scripts/process_json/example.json @@ -0,0 +1,25 @@ +[ + { + "id": "123", + "text": "This is a document about something", + "source": "file", + "source_id": "https://example.com/doc1", + "url": "https://example.com/doc1", + "created_at": "2021-01-01T12:00:00Z", + "author": "Alice" + }, + { + "text": "This is another document about something else", + "source": "file", + "source_id": "doc2.txt", + "author": "Bob" + }, + { + "id": "456", + "text": "This is Alice's phone number: 123-456-7890", + "source": "email", + "source_id": "567", + "created_at": "2021-01-02T13:00:00Z", + "author": "Alice" + } +] \ No newline at end of file diff --git a/scripts/process_json/process_json.py b/scripts/process_json/process_json.py new file mode 100644 index 000000000..a4ef5096d --- /dev/null +++ b/scripts/process_json/process_json.py @@ -0,0 +1,146 @@ +import uuid +import json +import argparse +import asyncio + +from models.models import Document, DocumentMetadata, Source +from datastore.datastore import DataStore +from datastore.factory import get_datastore +from services.extract_metadata import extract_metadata_from_document +from services.pii_detection import screen_text_for_pii + +DOCUMENT_UPSERT_BATCH_SIZE = 50 + + +async def process_json_dump( + filepath: str, + datastore: DataStore, + custom_metadata: dict, + screen_for_pii: bool, + extract_metadata: bool, +): + # load the json file as a list of dictionaries + with open(filepath) as json_file: + data = json.load(json_file) + + documents = [] + skipped_items = [] + # iterate over the data and create document objects + for item in data: + if len(documents) % 20 == 0: + print(f"Processed {len(documents)} documents") + + try: + # get the id, text, source, source_id, url, created_at and author from the item + # use default values if not specified + id = item.get("id", None) + text = item.get("text", None) + source = item.get("source", None) + source_id = item.get("source_id", None) + url = item.get("url", None) + created_at = item.get("created_at", None) + author = item.get("author", None) + + if not text: + print("No document text, skipping...") + continue + + # create a metadata object with the source, source_id, url, created_at and author + metadata = DocumentMetadata( + source=source, + source_id=source_id, + url=url, + created_at=created_at, + author=author, + ) + print("metadata: ", str(metadata)) + + # update metadata with custom values + for key, value in custom_metadata.items(): + if hasattr(metadata, key): + setattr(metadata, key, value) + + # screen for pii if requested + if screen_for_pii: + pii_detected = screen_text_for_pii(text) + # if pii detected, print a warning and skip the document + if pii_detected: + print("PII detected in document, skipping") + skipped_items.append(item) # add the skipped item to the list + continue + + # extract metadata if requested + if extract_metadata: + # extract metadata from the document text + extracted_metadata = extract_metadata_from_document( + f"Text: {text}; Metadata: {str(metadata)}" + ) + # get a Metadata object from the extracted metadata + metadata = DocumentMetadata(**extracted_metadata) + + # create a document object with the id or a random id, text and metadata + document = Document( + id=id or str(uuid.uuid4()), + text=text, + metadata=metadata, + ) + documents.append(document) + except Exception as e: + # log the error and continue with the next item + print(f"Error processing {item}: {e}") + skipped_items.append(item) # add the skipped item to the list + + # do this in batches, the upsert method already batches documents but this allows + # us to add more descriptive logging + for i in range(0, len(documents), DOCUMENT_UPSERT_BATCH_SIZE): + # Get the text of the chunks in the current batch + batch_documents = documents[i : i + DOCUMENT_UPSERT_BATCH_SIZE] + print(f"Upserting batch of {len(batch_documents)} documents, batch {i}") + print("documents: ", documents) + await datastore.upsert(batch_documents) + + # print the skipped items + print(f"Skipped {len(skipped_items)} items due to errors or PII detection") + for item in skipped_items: + print(item) + + +async def main(): + # parse the command-line arguments + parser = argparse.ArgumentParser() + parser.add_argument("--filepath", required=True, help="The path to the json dump") + parser.add_argument( + "--custom_metadata", + default="{}", + help="A JSON string of key-value pairs to update the metadata of the documents", + ) + parser.add_argument( + "--screen_for_pii", + default=False, + type=bool, + help="A boolean flag to indicate whether to try to the PII detection function (using a language model)", + ) + parser.add_argument( + "--extract_metadata", + default=False, + type=bool, + help="A boolean flag to indicate whether to try to extract metadata from the document (using a language model)", + ) + args = parser.parse_args() + + # get the arguments + filepath = args.filepath + custom_metadata = json.loads(args.custom_metadata) + screen_for_pii = args.screen_for_pii + extract_metadata = args.extract_metadata + + # initialize the db instance once as a global variable + datastore = await get_datastore() + # process the json dump + await process_json_dump( + filepath, datastore, custom_metadata, screen_for_pii, extract_metadata + ) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/scripts/process_jsonl/README.md b/scripts/process_jsonl/README.md new file mode 100644 index 000000000..df4073978 --- /dev/null +++ b/scripts/process_jsonl/README.md @@ -0,0 +1,24 @@ +## Process a JSONL File + +This script is a utility to process a file dump of documents in a JSONL format and store them in the vector database with some metadata. It can also optionally screen the documents for personally identifiable information (PII) using a language model, and skip them if detected. Additionally, the script can extract metadata from the document using a language model. You can customize the PII detection function in [`services/pii_detection`](../../services/pii_detection.py) and the metadata extraction function in [`services/extract_metadata`](../../services/extract_metadata.py) for your use case. + +## Usage + +To run this script from the terminal, navigate to this folder and use the following command: + +``` +python process_jsonl.py --filepath path/to/file_dump.jsonl --custom_metadata '{"source": "email"}' --screen_for_pii True --extract_metadata True +``` + +where: + +- `path/to/file_dump.jsonl` is the name or path to the file dump to be processed. The format of this JSONL file should be a newline-delimited JSON file, where each line is a valid JSON object representing a document. The JSON object should have a subset of the following fields: `id`, `text`, `source`, `source_id`, `url`, `created_at`, and `author`. The `text` field is required, while the rest are optional and will be used to populate the metadata of the document. If the `id` field is not specified, a random UUID will be generated for the document. +- `--custom_metadata` is an optional JSON string of key-value pairs to update the metadata of the documents. For example, `{"source": "file"}` will add a `source` field with the value `file` to the metadata of each document. The default value is an empty JSON object (`{}`). +- `--screen_for_pii` is an optional boolean flag to indicate whether to use the PII detection function or not. If set to `True`, the script will use the `screen_text_for_pii` function from the [`services/pii_detection`](../../services/pii_detection.py) module to check if the document text contains any PII using a language model. If PII is detected, the script will print a warning and skip the document. The default value is `False`. +- `--extract_metadata` is an optional boolean flag to indicate whether to try to extract metadata from the document using a language model. If set to `True`, the script will use the `extract_metadata_from_document` function from the [`services/extract_metadata`](../../services/extract_metadata.py) module to extract metadata from the document text and update the metadata object accordingly. The default value is`False`. + +The script will open the JSONL file as a generator of dictionaries, iterate over the data, create document objects, and batch upsert them into the database. It will also print some progress messages and error messages if any, as well as the number and content of the skipped items due to errors, PII detection, or metadata extraction issues. + +You can use `python process_jsonl.py -h` to get a summary of the options and their descriptions. + +Test the script with the example file, [example.jsonl](example.jsonl). diff --git a/scripts/process_jsonl/example.jsonl b/scripts/process_jsonl/example.jsonl new file mode 100644 index 000000000..6335bc4cd --- /dev/null +++ b/scripts/process_jsonl/example.jsonl @@ -0,0 +1,6 @@ +{"id": "4", "text": "This document only has an ID and text. The other fields are missing."} +{"text": "This document has no ID, but it has text and a source.", "source": "email"} +{"id": "6", "text": "This document has an ID, text, and author, but no source information.", "author": "John Doe"} +{"text": "This document has text, a source, and a URL, but no ID or author.", "source": "file", "url": "https://example.com/file/2"} +{"id": "8", "text": "This document has an ID, text, source, and created_at timestamp, but no author or URL.", "source": "chat", "created_at": "2022-01-04T00:00:00"} +{"id": "9", "text": "This document contains PII. John Smith's email address is john.smith@example.com and his phone number is +1 (555) 123-4567.", "source": "email", "source_id": "email_2", "url": "https://example.com/email/2", "created_at": "2022-01-05T00:00:00", "author": "John Smith"} \ No newline at end of file diff --git a/scripts/process_jsonl/process_jsonl.py b/scripts/process_jsonl/process_jsonl.py new file mode 100644 index 000000000..5efa9164c --- /dev/null +++ b/scripts/process_jsonl/process_jsonl.py @@ -0,0 +1,144 @@ +import uuid +import json +import argparse +import asyncio + +from models.models import Document, DocumentMetadata, Source +from datastore.datastore import DataStore +from datastore.factory import get_datastore +from services.extract_metadata import extract_metadata_from_document +from services.pii_detection import screen_text_for_pii + +DOCUMENT_UPSERT_BATCH_SIZE = 50 + + +async def process_jsonl_dump( + filepath: str, + datastore: DataStore, + custom_metadata: dict, + screen_for_pii: bool, + extract_metadata: bool, +): + # open the jsonl file as a generator of dictionaries + with open(filepath) as jsonl_file: + data = [json.loads(line) for line in jsonl_file] + + documents = [] + skipped_items = [] + # iterate over the data and create document objects + for item in data: + if len(documents) % 20 == 0: + print(f"Processed {len(documents)} documents") + + try: + # get the id, text, source, source_id, url, created_at and author from the item + # use default values if not specified + id = item.get("id", None) + text = item.get("text", None) + source = item.get("source", None) + source_id = item.get("source_id", None) + url = item.get("url", None) + created_at = item.get("created_at", None) + author = item.get("author", None) + + if not text: + print("No document text, skipping...") + continue + + # create a metadata object with the source, source_id, url, created_at and author + metadata = DocumentMetadata( + source=source, + source_id=source_id, + url=url, + created_at=created_at, + author=author, + ) + + # update metadata with custom values + for key, value in custom_metadata.items(): + if hasattr(metadata, key): + setattr(metadata, key, value) + + # screen for pii if requested + if screen_for_pii: + pii_detected = screen_text_for_pii(text) + # if pii detected, print a warning and skip the document + if pii_detected: + print("PII detected in document, skipping") + skipped_items.append(item) # add the skipped item to the list + continue + + # extract metadata if requested + if extract_metadata: + # extract metadata from the document text + extracted_metadata = extract_metadata_from_document( + f"Text: {text}; Metadata: {str(metadata)}" + ) + # get a Metadata object from the extracted metadata + metadata = DocumentMetadata(**extracted_metadata) + + # create a document object with the id, text and metadata + document = Document( + id=id, + text=text, + metadata=metadata, + ) + documents.append(document) + except Exception as e: + # log the error and continue with the next item + print(f"Error processing {item}: {e}") + skipped_items.append(item) # add the skipped item to the list + + # do this in batches, the upsert method already batches documents but this allows + # us to add more descriptive logging + for i in range(0, len(documents), DOCUMENT_UPSERT_BATCH_SIZE): + # Get the text of the chunks in the current batch + batch_documents = documents[i : i + DOCUMENT_UPSERT_BATCH_SIZE] + print(f"Upserting batch of {len(batch_documents)} documents, batch {i}") + await datastore.upsert(batch_documents) + + # print the skipped items + print(f"Skipped {len(skipped_items)} items due to errors or PII detection") + for item in skipped_items: + print(item) + + +async def main(): + # parse the command-line arguments + parser = argparse.ArgumentParser() + parser.add_argument("--filepath", required=True, help="The path to the jsonl dump") + parser.add_argument( + "--custom_metadata", + default="{}", + help="A JSON string of key-value pairs to update the metadata of the documents", + ) + parser.add_argument( + "--screen_for_pii", + default=False, + type=bool, + help="A boolean flag to indicate whether to try to the PII detection function (using a language model)", + ) + parser.add_argument( + "--extract_metadata", + default=False, + type=bool, + help="A boolean flag to indicate whether to try to extract metadata from the document (using a language model)", + ) + args = parser.parse_args() + + # get the arguments + filepath = args.filepath + custom_metadata = json.loads(args.custom_metadata) + screen_for_pii = args.screen_for_pii + extract_metadata = args.extract_metadata + + # initialize the db instance once as a global variable + datastore = await get_datastore() + # process the jsonl dump + await process_jsonl_dump( + filepath, datastore, custom_metadata, screen_for_pii, extract_metadata + ) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/scripts/process_zip/README.md b/scripts/process_zip/README.md new file mode 100644 index 000000000..181a733aa --- /dev/null +++ b/scripts/process_zip/README.md @@ -0,0 +1,24 @@ +## Process a ZIP File + +This script is a utility to process a file dump of documents in a zip file and store them in the vector database with some metadata. It can also optionally screen the documents for personally identifiable information (PII) using a language model, and skip them if detected. Additionally, the script can extract metadata from the document using a language model. You can customize the PII detection function in [`services/pii_detection`](../../services/pii_detection.py) and the metadata extraction function in [`services/extract_metadata`](../../services/extract_metadata.py) for your use case. + +## Usage + +To run this script from the terminal, navigate to this folder and use the following command: + +``` +python process_zip.py --filepath path/to/file_dump.zip --custom_metadata '{"source": "email"}' --screen_for_pii True --extract_metadata True +``` + +where: + +- `path/to/file_dump.zip` is the name or path to the file dump to be processed. The format of this zip file should be a zip file containing of docx, pdf, txt, md and pptx files (any internal folder structure is acceptable). +- `--custom_metadata` is an optional JSON string of key-value pairs to update the metadata of the documents. For example, `{"source": "file"}` will add a `source` field with the value `file` to the metadata of each document. The default value is an empty JSON object (`{}`). +- `--screen_for_pii` is an optional boolean flag to indicate whether to use the PII detection function or not. If set to `True`, the script will use the `screen_text_for_pii` function from the [`services/pii_detection`](../../services/pii_detection.py) module to check if the document text contains any PII using a language model. If PII is detected, the script will print a warning and skip the document. The default value is `False`. +- `--extract_metadata` is an optional boolean flag to indicate whether to try to extract metadata from the document using a language model. If set to `True`, the script will use the `extract_metadata_from_document` function from the [`services/extract_metadata`](../../services/extract_metadata.py) module to extract metadata from the document text and update the metadata object accordingly. The default value is`False`. + +The script will extract the files from the zip file into a temporary directory named `dump`, process each file and store the document text and metadata in the database, and then delete the temporary directory and its contents. It will also print some progress messages and error messages if any. + +You can use `python process_zip.py -h` to get a summary of the options and their descriptions. + +Test the script with the example file, [example.zip](example.zip). diff --git a/scripts/process_zip/example.zip b/scripts/process_zip/example.zip new file mode 100644 index 000000000..d35d27639 Binary files /dev/null and b/scripts/process_zip/example.zip differ diff --git a/scripts/process_zip/process_zip.py b/scripts/process_zip/process_zip.py new file mode 100644 index 000000000..1e50ca6e1 --- /dev/null +++ b/scripts/process_zip/process_zip.py @@ -0,0 +1,151 @@ +import uuid +import zipfile +import os +import json +import argparse +import asyncio + +from models.models import Document, DocumentMetadata, Source +from datastore.datastore import DataStore +from datastore.factory import get_datastore +from services.extract_metadata import extract_metadata_from_document +from services.file import extract_text_from_filepath +from services.pii_detection import screen_text_for_pii + +DOCUMENT_UPSERT_BATCH_SIZE = 50 + + +async def process_file_dump( + filepath: str, + datastore: DataStore, + custom_metadata: dict, + screen_for_pii: bool, + extract_metadata: bool, +): + # create a ZipFile object and extract all the files into a directory named 'dump' + with zipfile.ZipFile(filepath) as zip_file: + zip_file.extractall("dump") + + documents = [] + skipped_files = [] + # use os.walk to traverse the dump directory and its subdirectories + for root, dirs, files in os.walk("dump"): + for filename in files: + if len(documents) % 20 == 0: + print(f"Processed {len(documents)} documents") + + filepath = os.path.join(root, filename) + + try: + extracted_text = extract_text_from_filepath(filepath) + print(f"extracted_text from {filepath}") + + # create a metadata object with the source and source_id fields + metadata = DocumentMetadata( + source=Source.file, + source_id=filename, + ) + + # update metadata with custom values + for key, value in custom_metadata.items(): + if hasattr(metadata, key): + setattr(metadata, key, value) + + # screen for pii if requested + if screen_for_pii: + pii_detected = screen_text_for_pii(extracted_text) + # if pii detected, print a warning and skip the document + if pii_detected: + print("PII detected in document, skipping") + skipped_files.append( + filepath + ) # add the skipped file to the list + continue + + # extract metadata if requested + if extract_metadata: + # extract metadata from the document text + extracted_metadata = extract_metadata_from_document( + f"Text: {extracted_text}; Metadata: {str(metadata)}" + ) + # get a Metadata object from the extracted metadata + metadata = DocumentMetadata(**extracted_metadata) + + # create a document object with a random id, text and metadata + document = Document( + id=str(uuid.uuid4()), + text=extracted_text, + metadata=metadata, + ) + documents.append(document) + except Exception as e: + # log the error and continue with the next file + print(f"Error processing {filepath}: {e}") + skipped_files.append(filepath) # add the skipped file to the list + + # do this in batches, the upsert method already batches documents but this allows + # us to add more descriptive logging + for i in range(0, len(documents), DOCUMENT_UPSERT_BATCH_SIZE): + # Get the text of the chunks in the current batch + batch_documents = [doc for doc in documents[i : i + DOCUMENT_UPSERT_BATCH_SIZE]] + print(f"Upserting batch of {len(batch_documents)} documents, batch {i}") + print("documents: ", documents) + await datastore.upsert(batch_documents) + + # delete all files in the dump directory + for root, dirs, files in os.walk("dump", topdown=False): + for filename in files: + filepath = os.path.join(root, filename) + os.remove(filepath) + for dirname in dirs: + dirpath = os.path.join(root, dirname) + os.rmdir(dirpath) + + # delete the dump directory + os.rmdir("dump") + + # print the skipped files + print(f"Skipped {len(skipped_files)} files due to errors or PII detection") + for file in skipped_files: + print(file) + + +async def main(): + # parse the command-line arguments + parser = argparse.ArgumentParser() + parser.add_argument("--filepath", required=True, help="The path to the file dump") + parser.add_argument( + "--custom_metadata", + default="{}", + help="A JSON string of key-value pairs to update the metadata of the documents", + ) + parser.add_argument( + "--screen_for_pii", + default=False, + type=bool, + help="A boolean flag to indicate whether to try to the PII detection function (using a language model)", + ) + parser.add_argument( + "--extract_metadata", + default=False, + type=bool, + help="A boolean flag to indicate whether to try to extract metadata from the document (using a language model)", + ) + args = parser.parse_args() + + # get the arguments + filepath = args.filepath + custom_metadata = json.loads(args.custom_metadata) + screen_for_pii = args.screen_for_pii + extract_metadata = args.extract_metadata + + # initialize the db instance once as a global variable + datastore = await get_datastore() + # process the file dump + await process_file_dump( + filepath, datastore, custom_metadata, screen_for_pii, extract_metadata + ) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/server/main.py b/server/main.py new file mode 100644 index 000000000..df909c874 --- /dev/null +++ b/server/main.py @@ -0,0 +1,145 @@ +import os +import uvicorn +from fastapi import FastAPI, File, HTTPException, Depends, Body, UploadFile +from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials +from fastapi.staticfiles import StaticFiles + +from models.api import ( + DeleteRequest, + DeleteResponse, + QueryRequest, + QueryResponse, + UpsertRequest, + UpsertResponse, +) +from datastore.factory import get_datastore +from services.file import get_document_from_file + + +app = FastAPI() +app.mount("/.well-known", StaticFiles(directory=".well-known"), name="static") + +# Create a sub-application, in order to access just the query endpoint in an OpenAPI schema, found at http://0.0.0.0:8000/sub/openapi.json when the app is running locally +sub_app = FastAPI( + title="Retrieval Plugin API", + description="A retrieval API for querying and filtering documents based on natural language queries and metadata", + version="1.0.0", + servers=[{"url": "https://your-app-url.com"}], +) +app.mount("/sub", sub_app) + +bearer_scheme = HTTPBearer() +BEARER_TOKEN = os.environ.get("BEARER_TOKEN") +assert BEARER_TOKEN is not None + + +def validate_token(credentials: HTTPAuthorizationCredentials = Depends(bearer_scheme)): + if credentials.scheme != "Bearer" or credentials.credentials != BEARER_TOKEN: + raise HTTPException(status_code=401, detail="Invalid or missing token") + return credentials + + +@app.post( + "/upsert-file", + response_model=UpsertResponse, +) +async def upsert_file( + file: UploadFile = File(...), + token: HTTPAuthorizationCredentials = Depends(validate_token), +): + document = await get_document_from_file(file) + + try: + ids = await datastore.upsert([document]) + return UpsertResponse(ids=ids) + except Exception as e: + print("Error:", e) + raise HTTPException(status_code=500, detail=f"str({e})") + + +@app.post( + "/upsert", + response_model=UpsertResponse, +) +async def upsert( + request: UpsertRequest = Body(...), + token: HTTPAuthorizationCredentials = Depends(validate_token), +): + try: + ids = await datastore.upsert(request.documents) + return UpsertResponse(ids=ids) + except Exception as e: + print("Error:", e) + raise HTTPException(status_code=500, detail="Internal Service Error") + + +@app.post( + "/query", + response_model=QueryResponse, +) +async def query_main( + request: QueryRequest = Body(...), + token: HTTPAuthorizationCredentials = Depends(validate_token), +): + try: + results = await datastore.query( + request.queries, + ) + return QueryResponse(results=results) + except Exception as e: + print("Error:", e) + raise HTTPException(status_code=500, detail="Internal Service Error") + + +@sub_app.post( + "/query", + response_model=QueryResponse, + description='Accepts an array of search query objects, each with a natural language query string ("query") and an optional metadata filter ("filter"). Filters are not necessary in most cases, but can sometimes help refine search results based on criteria such as document source or time period. Send multiple queries to compare information from different sources or break down complex questions into sub-questions. If you receive a ResponseTooLargeError, try splitting up the queries into multiple calls to this endpoint.', +) +async def query( + request: QueryRequest = Body(...), + token: HTTPAuthorizationCredentials = Depends(validate_token), +): + try: + results = await datastore.query( + request.queries, + ) + return QueryResponse(results=results) + except Exception as e: + print("Error:", e) + raise HTTPException(status_code=500, detail="Internal Service Error") + + +@app.delete( + "/delete", + response_model=DeleteResponse, +) +async def delete( + request: DeleteRequest = Body(...), + token: HTTPAuthorizationCredentials = Depends(validate_token), +): + if not (request.ids or request.filter or request.delete_all): + raise HTTPException( + status_code=400, + detail="One of ids, filter, or delete_all is required", + ) + try: + success = await datastore.delete( + ids=request.ids, + filter=request.filter, + delete_all=request.delete_all, + ) + return DeleteResponse(success=success) + except Exception as e: + print("Error:", e) + raise HTTPException(status_code=500, detail="Internal Service Error") + + +@app.on_event("startup") +async def startup(): + global datastore + datastore = await get_datastore() + + +def start(): + uvicorn.run("server.main:app", host="0.0.0.0", port=8000, reload=True) diff --git a/services/chunks.py b/services/chunks.py new file mode 100644 index 000000000..73cefd77d --- /dev/null +++ b/services/chunks.py @@ -0,0 +1,206 @@ +from typing import Dict, List, Optional, Tuple +import uuid +from models.models import Document, DocumentChunk, DocumentChunkMetadata + +import tiktoken + +from services.openai import get_embeddings + +# Global variables +tokenizer = tiktoken.get_encoding( + "cl100k_base" +) # The encoding scheme to use for tokenization + +# Constants +CHUNK_SIZE = 200 # The target size of each text chunk in tokens +MIN_CHUNK_SIZE_CHARS = 350 # The minimum size of each text chunk in characters +MIN_CHUNK_LENGTH_TO_EMBED = 30 # Discard chunks shorter than this +EMBEDDINGS_BATCH_SIZE = 128 # The number of embeddings to request at a time +MAX_NUM_CHUNKS = 10000 # The maximum number of chunks to generate from a text + + +def get_text_chunks(text: str, chunk_token_size: Optional[int]) -> List[str]: + """ + Split a text into chunks of ~CHUNK_SIZE tokens, based on punctuation and newline boundaries. + + Args: + text: The text to split into chunks. + chunk_token_size: The target size of each chunk in tokens, or None to use the default CHUNK_SIZE. + + Returns: + A list of text chunks, each of which is a string of ~CHUNK_SIZE tokens. + """ + # Return an empty list if the text is empty or whitespace + if not text or text.isspace(): + return [] + + # Tokenize the text + tokens = tokenizer.encode(text, disallowed_special=()) + + # Initialize an empty list of chunks + chunks = [] + + # Use the provided chunk token size or the default one + chunk_size = chunk_token_size or CHUNK_SIZE + + # Initialize a counter for the number of chunks + num_chunks = 0 + + # Loop until all tokens are consumed + while tokens and num_chunks < MAX_NUM_CHUNKS: + # Take the first chunk_size tokens as a chunk + chunk = tokens[:chunk_size] + + # Decode the chunk into text + chunk_text = tokenizer.decode(chunk) + + # Skip the chunk if it is empty or whitespace + if not chunk_text or chunk_text.isspace(): + # Remove the tokens corresponding to the chunk text from the remaining tokens + tokens = tokens[len(chunk) :] + # Continue to the next iteration of the loop + continue + + # Find the last period or punctuation mark in the chunk + last_punctuation = max( + chunk_text.rfind("."), + chunk_text.rfind("?"), + chunk_text.rfind("!"), + chunk_text.rfind("\n"), + ) + + # If there is a punctuation mark, and the last punctuation index is before MIN_CHUNK_SIZE_CHARS + if ( + last_punctuation != -1 + and last_punctuation > MIN_CHUNK_SIZE_CHARS + and last_punctuation > MIN_CHUNK_SIZE_CHARS + ): + # Truncate the chunk text at the punctuation mark + chunk_text = chunk_text[: last_punctuation + 1] + + # Remove any newline characters and strip any leading or trailing whitespace + chunk_text = chunk_text.replace("\n", " ").strip() + + if len(chunk_text) > MIN_CHUNK_LENGTH_TO_EMBED: + # Append the chunk text to the list of chunks + chunks.append(chunk_text) + + # Remove the tokens corresponding to the chunk text from the remaining tokens + tokens = tokens[len(tokenizer.encode(chunk_text, disallowed_special=())) :] + + # Increment the number of chunks + num_chunks += 1 + + # Handle the remaining tokens + if tokens: + remaining_text = tokenizer.decode(tokens).replace("\n", " ").strip() + if len(remaining_text) > MIN_CHUNK_LENGTH_TO_EMBED: + chunks.append(remaining_text) + + return chunks + + +def create_document_chunks( + doc: Document, chunk_token_size: Optional[int] +) -> Tuple[List[DocumentChunk], str]: + """ + Create a list of document chunks from a document object and return the document id. + + Args: + doc: The document object to create chunks from. It should have a text attribute and optionally an id and a metadata attribute. + chunk_token_size: The target size of each chunk in tokens, or None to use the default CHUNK_SIZE. + + Returns: + A tuple of (doc_chunks, doc_id), where doc_chunks is a list of document chunks, each of which is a DocumentChunk object with an id, a document_id, a text, and a metadata attribute, + and doc_id is the id of the document object, generated if not provided. The id of each chunk is generated from the document id and a sequential number, and the metadata is copied from the document object. + """ + # Check if the document text is empty or whitespace + if not doc.text or doc.text.isspace(): + return [], doc.id or str(uuid.uuid4()) + + # Generate a document id if not provided + doc_id = doc.id or str(uuid.uuid4()) + + # Split the document text into chunks + text_chunks = get_text_chunks(doc.text, chunk_token_size) + + metadata = ( + DocumentChunkMetadata(**doc.metadata.__dict__) + if doc.metadata is not None + else DocumentChunkMetadata() + ) + + metadata.document_id = doc_id + + # Initialize an empty list of chunks for this document + doc_chunks = [] + + # Assign each chunk a sequential number and create a DocumentChunk object + for i, text_chunk in enumerate(text_chunks): + chunk_id = f"{doc_id}_{i}" + doc_chunk = DocumentChunk( + id=chunk_id, + text=text_chunk, + metadata=metadata, + ) + # Append the chunk object to the list of chunks for this document + doc_chunks.append(doc_chunk) + + # Return the list of chunks and the document id + return doc_chunks, doc_id + + +def get_document_chunks( + documents: List[Document], chunk_token_size: Optional[int] +) -> Dict[str, List[DocumentChunk]]: + """ + Convert a list of documents into a dictionary from document id to list of document chunks. + + Args: + documents: The list of documents to convert. + chunk_token_size: The target size of each chunk in tokens, or None to use the default CHUNK_SIZE. + + Returns: + A dictionary mapping each document id to a list of document chunks, each of which is a DocumentChunk object + with text, metadata, and embedding attributes. + """ + # Initialize an empty dictionary of lists of chunks + chunks: Dict[str, List[DocumentChunk]] = {} + + # Initialize an empty list of all chunks + all_chunks: List[DocumentChunk] = [] + + # Loop over each document and create chunks + for doc in documents: + doc_chunks, doc_id = create_document_chunks(doc, chunk_token_size) + + # Append the chunks for this document to the list of all chunks + all_chunks.extend(doc_chunks) + + # Add the list of chunks for this document to the dictionary with the document id as the key + chunks[doc_id] = doc_chunks + + # Check if there are no chunks + if not all_chunks: + return {} + + # Get all the embeddings for the document chunks in batches, using get_embeddings + embeddings: List[List[float]] = [] + for i in range(0, len(all_chunks), EMBEDDINGS_BATCH_SIZE): + # Get the text of the chunks in the current batch + batch_texts = [ + chunk.text for chunk in all_chunks[i : i + EMBEDDINGS_BATCH_SIZE] + ] + + # Get the embeddings for the batch texts + batch_embeddings = get_embeddings(batch_texts) + + # Append the batch embeddings to the embeddings list + embeddings.extend(batch_embeddings) + + # Update the document chunk objects with the embeddings + for i, chunk in enumerate(all_chunks): + # Assign the embedding from the embeddings list to the chunk object + chunk.embedding = embeddings[i] + + return chunks diff --git a/services/date.py b/services/date.py new file mode 100644 index 000000000..afb8bf1ce --- /dev/null +++ b/services/date.py @@ -0,0 +1,24 @@ +import arrow + + +def to_unix_timestamp(date_str: str) -> int: + """ + Convert a date string to a unix timestamp (seconds since epoch). + + Args: + date_str: The date string to convert. + + Returns: + The unix timestamp corresponding to the date string. + + If the date string cannot be parsed as a valid date format, returns the current unix timestamp and prints a warning. + """ + # Try to parse the date string using arrow, which supports many common date formats + try: + date_obj = arrow.get(date_str) + print() + return int(date_obj.timestamp()) + except arrow.parser.ParserError: + # If the parsing fails, return the current unix timestamp and print a warning + print(f"Invalid date format: {date_str}") + return int(arrow.now().timestamp()) diff --git a/services/extract_metadata.py b/services/extract_metadata.py new file mode 100644 index 000000000..2117b9df3 --- /dev/null +++ b/services/extract_metadata.py @@ -0,0 +1,38 @@ +from models.models import Source +from services.openai import get_chat_completion +import json +from typing import Dict + + +def extract_metadata_from_document(text: str) -> Dict[str, str]: + sources = Source.__members__.keys() + sources_string = ", ".join(sources) + # This prompt is just an example, change it to fit your use case + messages = [ + { + "role": "system", + "content": f""" + Given a document from a user, try to extract the following metadata: + - source: string, one of {sources_string} + - url: string or don't specify + - created_at: string or don't specify + - author: string or don't specify + + Respond with a JSON containing the extracted metadata in key value pairs. If you don't find a metadata field, don't specify it. + """, + }, + {"role": "user", "content": text}, + ] + + completion = get_chat_completion( + messages, "gpt-4" + ) # TODO: change to your preferred model name + + print(f"completion: {completion}") + + try: + metadata = json.loads(completion) + except: + metadata = {} + + return metadata diff --git a/services/file.py b/services/file.py new file mode 100644 index 000000000..ffe27b9a1 --- /dev/null +++ b/services/file.py @@ -0,0 +1,117 @@ +import os +from io import BufferedReader +from typing import Optional +from fastapi import UploadFile +import mimetypes +from PyPDF2 import PdfReader +import docx2txt +import csv +import pptx + +from models.models import Document, DocumentMetadata + + +async def get_document_from_file(file: UploadFile) -> Document: + extracted_text = await extract_text_from_form_file(file) + print(f"extracted_text:") + # get metadata + metadata = DocumentMetadata() + doc = Document(text=extracted_text, metadata=metadata) + + return doc + + +def extract_text_from_filepath(filepath: str, mimetype: Optional[str] = None) -> str: + """Return the text content of a file given its filepath.""" + + if mimetype is None: + # Get the mimetype of the file based on its extension + mimetype, _ = mimetypes.guess_type(filepath) + + if not mimetype: + if filepath.endswith(".md"): + mimetype = "text/markdown" + else: + raise Exception("Unsupported file type") + + # Open the file in binary mode + file = open(filepath, "rb") + extracted_text = extract_text_from_file(file, mimetype) + + return extracted_text + + +def extract_text_from_file(file: BufferedReader, mimetype: str) -> str: + if mimetype == "application/pdf": + # Extract text from pdf using PyPDF2 + reader = PdfReader(file) + extracted_text = "" + for page in reader.pages: + extracted_text += page.extract_text() + elif mimetype == "text/plain" or mimetype == "text/markdown": + # Read text from plain text file + extracted_text = file.read().decode("utf-8") + elif ( + mimetype + == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + ): + # Extract text from docx using docx2txt + extracted_text = docx2txt.process(file) + elif mimetype == "text/csv": + # Extract text from csv using csv module + extracted_text = "" + decoded_buffer = (line.decode("utf-8") for line in file) + reader = csv.reader(decoded_buffer) + for row in reader: + extracted_text += " ".join(row) + "\n" + elif ( + mimetype + == "application/vnd.openxmlformats-officedocument.presentationml.presentation" + ): + # Extract text from pptx using python-pptx + extracted_text = "" + presentation = pptx.Presentation(file) + for slide in presentation.slides: + for shape in slide.shapes: + if shape.has_text_frame: + for paragraph in shape.text_frame.paragraphs: + for run in paragraph.runs: + extracted_text += run.text + " " + extracted_text += "\n" + else: + # Unsupported file type + file.close() + raise ValueError("Unsupported file type: {}".format(mimetype)) + + file.close() + return extracted_text + + +# Extract text from a file based on its mimetype +async def extract_text_from_form_file(file: UploadFile): + """Return the text content of a file.""" + # get the file body from the upload file object + mimetype = file.content_type + print(f"mimetype: {mimetype}") + print(f"file.file: {file.file}") + print("file: ", file) + + file_stream = await file.read() + + temp_file_path = "/tmp/temp_file" + + # write the file to a temporary locatoin + with open(temp_file_path, "wb") as f: + f.write(file_stream) + + try: + extracted_text = extract_text_from_filepath(temp_file_path, mimetype) + except Exception as e: + print(f"Error: {e}") + os.remove(temp_file_path) + raise e + + # remove file from temp location + os.remove(temp_file_path) + + return extracted_text diff --git a/services/openai.py b/services/openai.py new file mode 100644 index 000000000..a3ff1a46a --- /dev/null +++ b/services/openai.py @@ -0,0 +1,59 @@ +from typing import List +import openai + + +from tenacity import retry, wait_random_exponential, stop_after_attempt + + +@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(3)) +def get_embeddings(texts: List[str]) -> List[List[float]]: + """ + Embed texts using OpenAI's ada model. + + Args: + texts: The list of texts to embed. + + Returns: + A list of embeddings, each of which is a list of floats. + + Raises: + Exception: If the OpenAI API call fails. + """ + # Call the OpenAI API to get the embeddings + response = openai.Embedding.create(input=texts, model="text-embedding-ada-002") + + # Extract the embedding data from the response + data = response["data"] # type: ignore + + # Return the embeddings as a list of lists of floats + return [result["embedding"] for result in data] + + +@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(3)) +def get_chat_completion( + messages, + model="gpt-3.5-turbo", # use "gpt-4" for better results +): + """ + Generate a chat completion using OpenAI's chat completion API. + + Args: + messages: The list of messages in the chat history. + model: The name of the model to use for the completion. Default is gpt-3.5-turbo, which is a fast, cheap and versatile model. Use gpt-4 for higher quality but slower results. + + Returns: + A string containing the chat completion. + + Raises: + Exception: If the OpenAI API call fails. + """ + # call the OpenAI chat completion API with the given messages + response = openai.ChatCompletion.create( + model=model, + messages=messages, + ) + + choices = response["choices"] # type: ignore + completion = choices[0].message.content.strip() + print(f"Completion: {completion}") + return completion diff --git a/services/pii_detection.py b/services/pii_detection.py new file mode 100644 index 000000000..fcdf4629c --- /dev/null +++ b/services/pii_detection.py @@ -0,0 +1,30 @@ +from services.openai import get_chat_completion + + +def screen_text_for_pii(text: str) -> bool: + # This prompt is just an example, change it to fit your use case + messages = [ + { + "role": "system", + "content": f""" + You can only respond with the word "True" or "False", where your answer indicates whether the text in the user's message contains PII. + Do not explain your answer, and do not use punctuation. + Your task is to identify whether the text extracted from your company files + contains sensitive PII information that should not be shared with the broader company. Here are some things to look out for: + - An email address that identifies a specific person in either the local-part or the domain + - The postal address of a private residence (must include at least a street name) + - The postal address of a public place (must include either a street name or business name) + - Notes about hiring decisions with mentioned names of candidates. The user will send a document for you to analyze. + """, + }, + {"role": "user", "content": text}, + ] + + completion = get_chat_completion( + messages, + ) + + if completion.startswith("True"): + return True + + return False diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/datastore/providers/milvus/test_milvus_datastore.py b/tests/datastore/providers/milvus/test_milvus_datastore.py new file mode 100644 index 000000000..6588a9f87 --- /dev/null +++ b/tests/datastore/providers/milvus/test_milvus_datastore.py @@ -0,0 +1,342 @@ +# from pathlib import Path +# from dotenv import find_dotenv, load_dotenv +# env_path = Path(".") / "milvus.env" +# load_dotenv(dotenv_path=env_path, verbose=True) + +import pytest +from models.models import ( + DocumentChunkMetadata, + DocumentMetadataFilter, + DocumentChunk, + Query, + QueryWithEmbedding, + Source, +) +from datastore.providers.milvus_datastore import ( + OUTPUT_DIM, + MilvusDataStore, +) + + +@pytest.fixture +def milvus_datastore(): + return MilvusDataStore() + + +@pytest.fixture +def document_chunk_one(): + doc_id = "zerp" + doc_chunks = [] + + ids = ["abc_123", "def_456", "ghi_789"] + texts = [ + "lorem ipsum dolor sit amet", + "consectetur adipiscing elit", + "sed do eiusmod tempor incididunt", + ] + sources = [Source.email, Source.file, Source.chat] + source_ids = ["foo", "bar", "baz"] + urls = ["foo.com", "bar.net", "baz.org"] + created_ats = [ + "1929-10-28T09:30:00-05:00", + "2009-01-03T16:39:57-08:00", + "2021-01-21T10:00:00-02:00", + ] + authors = ["Max Mustermann", "John Doe", "Jane Doe"] + embeddings = [[x] * OUTPUT_DIM for x in range(3)] + + for i in range(3): + chunk = DocumentChunk( + id=ids[i], + text=texts[i], + metadata=DocumentChunkMetadata( + document_id=doc_id, + source=sources[i], + source_id=source_ids[i], + url=urls[i], + created_at=created_ats[i], + author=authors[i], + ), + embedding=embeddings[i], # type: ignore + ) + + doc_chunks.append(chunk) + + return {doc_id: doc_chunks} + + +@pytest.fixture +def document_chunk_two(): + doc_id_1 = "zerp" + doc_chunks_1 = [] + + ids = ["abc_123", "def_456", "ghi_789"] + texts = [ + "1lorem ipsum dolor sit amet", + "2consectetur adipiscing elit", + "3sed do eiusmod tempor incididunt", + ] + sources = [Source.email, Source.file, Source.chat] + source_ids = ["foo", "bar", "baz"] + urls = ["foo.com", "bar.net", "baz.org"] + created_ats = [ + "1929-10-28T09:30:00-05:00", + "2009-01-03T16:39:57-08:00", + "3021-01-21T10:00:00-02:00", + ] + authors = ["Max Mustermann", "John Doe", "Jane Doe"] + embeddings = [[x] * OUTPUT_DIM for x in range(3)] + + for i in range(3): + chunk = DocumentChunk( + id=ids[i], + text=texts[i], + metadata=DocumentChunkMetadata( + document_id=doc_id_1, + source=sources[i], + source_id=source_ids[i], + url=urls[i], + created_at=created_ats[i], + author=authors[i], + ), + embedding=embeddings[i], # type: ignore + ) + + doc_chunks_1.append(chunk) + + doc_id_2 = "merp" + doc_chunks_2 = [] + + ids = ["jkl_123", "lmn_456", "opq_789"] + texts = [ + "3sdsc efac feas sit qweas", + "4wert sdfas fdsc", + "52dsc fdsf eiusmod asdasd incididunt", + ] + sources = [Source.email, Source.file, Source.chat] + source_ids = ["foo", "bar", "baz"] + urls = ["foo.com", "bar.net", "baz.org"] + created_ats = [ + "4929-10-28T09:30:00-05:00", + "5009-01-03T16:39:57-08:00", + "6021-01-21T10:00:00-02:00", + ] + authors = ["Max Mustermann", "John Doe", "Jane Doe"] + embeddings = [[x] * OUTPUT_DIM for x in range(3, 6)] + + for i in range(3): + chunk = DocumentChunk( + id=ids[i], + text=texts[i], + metadata=DocumentChunkMetadata( + document_id=doc_id_2, + source=sources[i], + source_id=source_ids[i], + url=urls[i], + created_at=created_ats[i], + author=authors[i], + ), + embedding=embeddings[i], # type: ignore + ) + + doc_chunks_2.append(chunk) + + return {doc_id_1: doc_chunks_1, doc_id_2: doc_chunks_2} + + +@pytest.mark.asyncio +async def test_upsert(milvus_datastore, document_chunk_one): + await milvus_datastore.delete(delete_all=True) + res = await milvus_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + milvus_datastore.col.flush() + assert 3 == milvus_datastore.col.num_entities + + + +@pytest.mark.asyncio +async def test_reload(milvus_datastore, document_chunk_one, document_chunk_two): + await milvus_datastore.delete(delete_all=True) + + res = await milvus_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + milvus_datastore.col.flush() + assert 3 == milvus_datastore.col.num_entities + new_store = MilvusDataStore() + another_in = {i:document_chunk_two[i] for i in document_chunk_two if i!=res[0]} + res = await new_store._upsert(another_in) + new_store.col.flush() + assert 6 == new_store.col.num_entities + query = QueryWithEmbedding( + query="lorem", + top_k=10, + embedding=[0.5] * OUTPUT_DIM, + ) + query_results = await milvus_datastore._query(queries=[query]) + assert 1 == len(query_results) + + + + + +@pytest.mark.asyncio +async def test_upsert_query_all(milvus_datastore, document_chunk_two): + await milvus_datastore.delete(delete_all=True) + res = await milvus_datastore._upsert(document_chunk_two) + assert res == list(document_chunk_two.keys()) + milvus_datastore.col.flush() + + # Num entities currently doesnt track deletes + query = QueryWithEmbedding( + query="lorem", + top_k=10, + embedding=[0.5] * OUTPUT_DIM, + ) + query_results = await milvus_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert 6 == len(query_results[0].results) + + + + +@pytest.mark.asyncio +async def test_query_accuracy(milvus_datastore, document_chunk_one): + await milvus_datastore.delete(delete_all=True) + res = await milvus_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + milvus_datastore.col.flush() + query = QueryWithEmbedding( + query="lorem", + top_k=1, + embedding=[0] * OUTPUT_DIM, + ) + query_results = await milvus_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert 1 == len(query_results[0].results) + assert 0 == query_results[0].results[0].score + assert "abc_123" == query_results[0].results[0].id + + + +@pytest.mark.asyncio +async def test_query_filter(milvus_datastore, document_chunk_one): + await milvus_datastore.delete(delete_all=True) + res = await milvus_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + milvus_datastore.col.flush() + query = QueryWithEmbedding( + query="lorem", + top_k=1, + embedding=[0] * OUTPUT_DIM, + filter=DocumentMetadataFilter( + start_date="2000-01-03T16:39:57-08:00", end_date="2010-01-03T16:39:57-08:00" + ), + ) + query_results = await milvus_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert 1 == len(query_results[0].results) + assert 0 != query_results[0].results[0].score + assert "def_456" == query_results[0].results[0].id + + + +@pytest.mark.asyncio +async def test_delete_with_date_filter(milvus_datastore, document_chunk_one): + await milvus_datastore.delete(delete_all=True) + res = await milvus_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + milvus_datastore.col.flush() + await milvus_datastore.delete( + filter=DocumentMetadataFilter( + end_date="2009-01-03T16:39:57-08:00", + ) + ) + + query = QueryWithEmbedding( + query="lorem", + top_k=9, + embedding=[0] * OUTPUT_DIM, + ) + query_results = await milvus_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert 1 == len(query_results[0].results) + assert "ghi_789" == query_results[0].results[0].id + + + +@pytest.mark.asyncio +async def test_delete_with_source_filter(milvus_datastore, document_chunk_one): + await milvus_datastore.delete(delete_all=True) + res = await milvus_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + milvus_datastore.col.flush() + await milvus_datastore.delete( + filter=DocumentMetadataFilter( + source=Source.email, + ) + ) + + query = QueryWithEmbedding( + query="lorem", + top_k=9, + embedding=[0] * OUTPUT_DIM, + ) + query_results = await milvus_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert 2 == len(query_results[0].results) + assert "def_456" == query_results[0].results[0].id + + + +@pytest.mark.asyncio +async def test_delete_with_document_id_filter(milvus_datastore, document_chunk_one): + await milvus_datastore.delete(delete_all=True) + res = await milvus_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + milvus_datastore.col.flush() + await milvus_datastore.delete( + filter=DocumentMetadataFilter( + document_id=res[0], + ) + ) + query = QueryWithEmbedding( + query="lorem", + top_k=9, + embedding=[0] * OUTPUT_DIM, + ) + query_results = await milvus_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert 0 == len(query_results[0].results) + + + +@pytest.mark.asyncio +async def test_delete_with_document_id(milvus_datastore, document_chunk_one): + await milvus_datastore.delete(delete_all=True) + res = await milvus_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + milvus_datastore.col.flush() + await milvus_datastore.delete([res[0]]) + + query = QueryWithEmbedding( + query="lorem", + top_k=9, + embedding=[0] * OUTPUT_DIM, + ) + query_results = await milvus_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert 0 == len(query_results[0].results) + + + +# if __name__ == '__main__': +# import sys +# import pytest +# pytest.main(sys.argv) diff --git a/tests/datastore/providers/qdrant/test_qdrant_datastore.py b/tests/datastore/providers/qdrant/test_qdrant_datastore.py new file mode 100644 index 000000000..850e0a936 --- /dev/null +++ b/tests/datastore/providers/qdrant/test_qdrant_datastore.py @@ -0,0 +1,280 @@ +from typing import Dict, List + +import pytest +import qdrant_client +from qdrant_client.http.models import PayloadSchemaType + +from datastore.providers.qdrant_datastore import QdrantDataStore +from models.models import ( + DocumentChunk, + DocumentChunkMetadata, + QueryWithEmbedding, + DocumentMetadataFilter, + Source, +) + + +def create_embedding(non_zero_pos: int, size: int) -> List[float]: + vector = [0.0] * size + vector[non_zero_pos % size] = 1.0 + return vector + + +@pytest.fixture +def qdrant_datastore() -> QdrantDataStore: + return QdrantDataStore( + collection_name="documents", vector_size=5, recreate_collection=True + ) + + +@pytest.fixture +def client() -> qdrant_client.QdrantClient: + return qdrant_client.QdrantClient() + + +@pytest.fixture +def initial_document_chunks() -> Dict[str, List[DocumentChunk]]: + first_doc_chunks = [ + DocumentChunk( + id=f"first-doc-{i}", + text=f"Lorem ipsum {i}", + metadata=DocumentChunkMetadata(), + embedding=create_embedding(i, 5), + ) + for i in range(4, 7) + ] + return { + "first-doc": first_doc_chunks, + } + + +@pytest.fixture +def document_chunks() -> Dict[str, List[DocumentChunk]]: + first_doc_chunks = [ + DocumentChunk( + id=f"first-doc_{i}", + text=f"Lorem ipsum {i}", + metadata=DocumentChunkMetadata( + source=Source.email, created_at="2023-03-05", document_id="first-doc" + ), + embedding=create_embedding(i, 5), + ) + for i in range(3) + ] + second_doc_chunks = [ + DocumentChunk( + id=f"second-doc_{i}", + text=f"Dolor sit amet {i}", + metadata=DocumentChunkMetadata( + created_at="2023-03-04", document_id="second-doc" + ), + embedding=create_embedding(i + len(first_doc_chunks), 5), + ) + for i in range(2) + ] + return { + "first-doc": first_doc_chunks, + "second-doc": second_doc_chunks, + } + + +@pytest.mark.asyncio +async def test_datastore_creates_payload_indexes( + qdrant_datastore, + client, +): + collection_info = client.get_collection(collection_name="documents") + + assert 2 == len(collection_info.payload_schema) + assert "created_at" in collection_info.payload_schema + created_at = collection_info.payload_schema["created_at"] + assert PayloadSchemaType.INTEGER == created_at.data_type + assert "metadata.document_id" in collection_info.payload_schema + document_id = collection_info.payload_schema["metadata.document_id"] + assert PayloadSchemaType.KEYWORD == document_id.data_type + + +@pytest.mark.asyncio +async def test_upsert_creates_all_points( + qdrant_datastore, + client, + document_chunks, +): + document_ids = await qdrant_datastore._upsert(document_chunks) + + assert 2 == len(document_ids) + assert 5 == client.count(collection_name="documents").count + + +@pytest.mark.asyncio +async def test_upsert_does_not_remove_existing_documents_but_store_new( + qdrant_datastore, + client, + initial_document_chunks, + document_chunks, +): + """ + This test ensures calling ._upsert no longer removes the existing document chunks, + as they are currently removed in the .upsert method directly. + """ + # Fill the database with document chunks before running the actual test + await qdrant_datastore._upsert(initial_document_chunks) + + await qdrant_datastore._upsert(document_chunks) + + assert 8 == client.count(collection_name="documents").count + + +@pytest.mark.asyncio +async def test_query_returns_all_on_single_query(qdrant_datastore, document_chunks): + # Fill the database with document chunks before running the actual test + await qdrant_datastore._upsert(document_chunks) + + query = QueryWithEmbedding( + query="lorem", + top_k=5, + embedding=[0.5, 0.5, 0.5, 0.5, 0.5], + ) + query_results = await qdrant_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert "lorem" == query_results[0].query + assert 5 == len(query_results[0].results) + + +@pytest.mark.asyncio +async def test_query_returns_closest_entry(qdrant_datastore, document_chunks): + # Fill the database with document chunks before running the actual test + await qdrant_datastore._upsert(document_chunks) + + query = QueryWithEmbedding( + query="ipsum", + top_k=1, + embedding=[0.0, 0.0, 0.5, 0.0, 0.0], + ) + query_results = await qdrant_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert "ipsum" == query_results[0].query + assert 1 == len(query_results[0].results) + first_document_chunk = query_results[0].results[0] + assert 0.0 <= first_document_chunk.score <= 1.0 + assert Source.email == first_document_chunk.metadata.source + assert "2023-03-05" == first_document_chunk.metadata.created_at + assert "first-doc" == first_document_chunk.metadata.document_id + + +@pytest.mark.asyncio +async def test_query_filter_by_document_id_returns_this_document_chunks( + qdrant_datastore, document_chunks +): + # Fill the database with document chunks before running the actual test + await qdrant_datastore._upsert(document_chunks) + + first_query = QueryWithEmbedding( + query="dolor", + filter=DocumentMetadataFilter(document_id="first-doc"), + top_k=5, + embedding=[0.0, 0.0, 0.5, 0.0, 0.0], + ) + second_query = QueryWithEmbedding( + query="dolor", + filter=DocumentMetadataFilter(document_id="second-doc"), + top_k=5, + embedding=[0.0, 0.0, 0.5, 0.0, 0.0], + ) + query_results = await qdrant_datastore._query(queries=[first_query, second_query]) + + assert 2 == len(query_results) + assert "dolor" == query_results[0].query + assert "dolor" == query_results[1].query + assert 3 == len(query_results[0].results) + assert 2 == len(query_results[1].results) + + +@pytest.mark.asyncio +@pytest.mark.parametrize("start_date", ["2023-03-05T00:00:00", "2023-03-05"]) +async def test_query_start_date_converts_datestring( + qdrant_datastore, + document_chunks, + start_date, +): + # Fill the database with document chunks before running the actual test + await qdrant_datastore._upsert(document_chunks) + + query = QueryWithEmbedding( + query="sit amet", + filter=DocumentMetadataFilter(start_date=start_date), + top_k=5, + embedding=[0.0, 0.0, 0.5, 0.0, 0.0], + ) + query_results = await qdrant_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert 3 == len(query_results[0].results) + + +@pytest.mark.asyncio +@pytest.mark.parametrize("end_date", ["2023-03-04T00:00:00", "2023-03-04"]) +async def test_query_end_date_converts_datestring( + qdrant_datastore, + document_chunks, + end_date, +): + # Fill the database with document chunks before running the actual test + await qdrant_datastore._upsert(document_chunks) + + query = QueryWithEmbedding( + query="sit amet", + filter=DocumentMetadataFilter(end_date=end_date), + top_k=5, + embedding=[0.0, 0.0, 0.5, 0.0, 0.0], + ) + query_results = await qdrant_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert 2 == len(query_results[0].results) + + +@pytest.mark.asyncio +async def test_delete_removes_by_ids( + qdrant_datastore, + client, + document_chunks, +): + # Fill the database with document chunks before running the actual test + await qdrant_datastore._upsert(document_chunks) + + await qdrant_datastore.delete(ids=["first-doc"]) + + assert 2 == client.count(collection_name="documents").count + + +@pytest.mark.asyncio +async def test_delete_removes_by_document_id_filter( + qdrant_datastore, + client, + document_chunks, +): + # Fill the database with document chunks before running the actual test + await qdrant_datastore._upsert(document_chunks) + + await qdrant_datastore.delete( + filter=DocumentMetadataFilter(document_id="first-doc") + ) + + assert 2 == client.count(collection_name="documents").count + + +@pytest.mark.asyncio +async def test_delete_removes_all( + qdrant_datastore, + client, + document_chunks, +): + # Fill the database with document chunks before running the actual test + await qdrant_datastore._upsert(document_chunks) + + await qdrant_datastore.delete(delete_all=True) + + assert 0 == client.count(collection_name="documents").count diff --git a/tests/datastore/providers/weaviate/docker-compose.yml b/tests/datastore/providers/weaviate/docker-compose.yml new file mode 100644 index 000000000..1900aa580 --- /dev/null +++ b/tests/datastore/providers/weaviate/docker-compose.yml @@ -0,0 +1,25 @@ +--- +version: '3.4' +services: + weaviate: + command: + - --host + - 0.0.0.0 + - --port + - '8080' + - --scheme + - http + image: semitechnologies/weaviate:1.18.0 + ports: + - 8080:8080 + restart: on-failure:0 + environment: + QUERY_DEFAULTS_LIMIT: 25 + AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true' + PERSISTENCE_DATA_PATH: '/var/lib/weaviate' + DEFAULT_VECTORIZER_MODULE: 'none' + ENABLE_MODULES: '' + CLUSTER_HOSTNAME: 'node1' + LOG_LEVEL: debug + AUTOSCHEMA_ENABLED: 'false' +... \ No newline at end of file diff --git a/tests/datastore/providers/weaviate/test_weaviate_datastore.py b/tests/datastore/providers/weaviate/test_weaviate_datastore.py new file mode 100644 index 000000000..311dd2bb6 --- /dev/null +++ b/tests/datastore/providers/weaviate/test_weaviate_datastore.py @@ -0,0 +1,521 @@ +import pytest +from fastapi.testclient import TestClient +from weaviate import Client +import weaviate +import os +from models.models import DocumentMetadataFilter +from server.main import app +from datastore.providers.weaviate_datastore import ( + SCHEMA, + WeaviateDataStore, + extract_schema_properties, +) +import logging +from loguru import logger +from _pytest.logging import LogCaptureFixture + +BEARER_TOKEN = os.getenv("BEARER_TOKEN") + +client = TestClient(app) +client.headers["Authorization"] = f"Bearer {BEARER_TOKEN}" + + +@pytest.fixture +def weaviate_client(): + host = os.getenv("WEAVIATE_HOST", "http://localhost") + port = os.getenv("WEAVIATE_PORT", "8080") + client = Client(f"{host}:{port}") + + yield client + + client.schema.delete_all() + + +@pytest.fixture +def test_db(weaviate_client, documents): + weaviate_client.schema.delete_all() + weaviate_client.schema.create_class(SCHEMA) + + response = client.post("/upsert", json={"documents": documents}) + + if response.status_code != 200: + raise Exception( + f"Could not upsert to test client.\nStatus Code: {response.status_code}\nResponse:\n{response.json()}" + ) + + yield client + + +@pytest.fixture +def documents(): + documents = [] + + authors = ["Max Mustermann", "John Doe", "Jane Doe"] + texts = [ + "lorem ipsum dolor sit amet", + "consectetur adipiscing elit", + "sed do eiusmod tempor incididunt", + ] + ids = ["abc_123", "def_456", "ghi_789"] + sources = ["chat", "email", "email"] + created_at = [ + "1929-10-28T09:30:00-05:00", + "2009-01-03T16:39:57-08:00", + "2021-01-21T10:00:00-02:00", + ] + + for i in range(3): + documents.append( + { + "id": ids[i], + "text": texts[i], + "metadata": { + "source": sources[i], + "source_id": "5325", + "url": "http://example.com", + "created_at": created_at[i], + "author": authors[i], + }, + } + ) + + no_metadata_doc = { + "id": "jkl_012", + "text": "no metadata", + } + + documents.append(no_metadata_doc) + + partial_metadata_doc = { + "id": "mno_345", + "text": "partial metadata", + "metadata": { + "source": "file", + }, + } + + documents.append(partial_metadata_doc) + + yield documents + + +@pytest.fixture +def mock_env_public_access(monkeypatch): + monkeypatch.setattr( + "datastore.providers.weaviate_datastore.WEAVIATE_USERNAME", None + ) + monkeypatch.setattr( + "datastore.providers.weaviate_datastore.WEAVIATE_PASSWORD", None + ) + + +@pytest.fixture +def mock_env_resource_owner_password_flow(monkeypatch): + monkeypatch.setattr( + "datastore.providers.weaviate_datastore.WEAVIATE_SCOPES", + ["schema:read", "schema:write"], + ) + monkeypatch.setattr( + "datastore.providers.weaviate_datastore.WEAVIATE_USERNAME", "admin" + ) + monkeypatch.setattr( + "datastore.providers.weaviate_datastore.WEAVIATE_PASSWORD", "abc123" + ) + + +@pytest.fixture +def caplog(caplog: LogCaptureFixture): + handler_id = logger.add(caplog.handler, format="{message}") + yield caplog + logger.remove(handler_id) + + +@pytest.mark.parametrize( + "document_id", [("abc_123"), ("9a253e0b-d2df-5c2e-be6d-8e9b1f4ae345")] +) +def test_upsert(weaviate_client, document_id): + weaviate_client.schema.delete_all() + weaviate_client.schema.create_class(SCHEMA) + + text = """ + Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce in ipsum eget dolor malesuada fermentum at ac massa. + Aliquam erat volutpat. Sed eu velit est. Morbi semper quam id urna fringilla lacinia. Vivamus sit amet velit id lorem + pretium molestie. Nulla tincidunt sapien eu nulla consequat, a lacinia justo facilisis. Maecenas euismod urna sapien, + sit amet tincidunt est dapibus ac. Sed in lorem in nunc tincidunt bibendum. Nullam vel urna vitae nulla iaculis rutrum. + Suspendisse varius, massa a dignissim vehicula, urna ligula tincidunt orci, id fringilla velit tellus eu metus. Sed + vestibulum, nisl in malesuada tempor, nisi turpis facilisis nibh, nec dictum velit velit vel ex. Donec euismod, + leo ut sollicitudin tempor, dolor augue blandit nunc, eu lacinia ipsum turpis vitae nulla. Aenean bibendum + tincidunt magna in pulvinar. Sed tincidunt vel nisi ac maximus. + """ + source = "email" + source_id = "5325" + url = "http://example.com" + created_at = "2022-12-16T08:00:00+01:00" + author = "Max Mustermann" + + documents = { + "documents": [ + { + "id": document_id, + "text": text, + "metadata": { + "source": source, + "source_id": source_id, + "url": url, + "created_at": created_at, + "author": author, + }, + } + ] + } + + response = client.post("/upsert", json=documents) + + assert response.status_code == 200 + assert response.json() == {"ids": [document_id]} + + properties = [ + "chunk_id", + "document_id", + "source", + "source_id", + "url", + "created_at", + "author", + ] + + where_filter = { + "path": ["document_id"], + "operator": "Equal", + "valueString": document_id, + } + + weaviate_doc = ( + weaviate_client.query.get("OpenAIDocument", properties) + .with_additional("vector") + .with_where(where_filter) + .with_sort({"path": ["chunk_id"], "order": "asc"}) + .do() + ) + + weaviate_docs = weaviate_doc["data"]["Get"]["OpenAIDocument"] + + assert len(weaviate_docs) == 2 + + for i, weaviate_doc in enumerate(weaviate_docs): + assert weaviate_doc["chunk_id"] == f"{document_id}_{i}" + + assert weaviate_doc["document_id"] == document_id + + assert weaviate_doc["source"] == source + assert weaviate_doc["source_id"] == source_id + assert weaviate_doc["url"] == url + assert weaviate_doc["created_at"] == created_at + assert weaviate_doc["author"] == author + + assert weaviate_doc["_additional"]["vector"] + + +def test_upsert_no_metadata(weaviate_client): + weaviate_client.schema.delete_all() + weaviate_client.schema.create_class(SCHEMA) + + no_metadata_doc = { + "id": "jkl_012", + "text": "no metadata", + } + + metadata_properties = [ + "source", + "source_id", + "url", + "created_at", + "author", + ] + + response = client.post("/upsert", json={"documents": [no_metadata_doc]}) + + assert response.status_code == 200 + + weaviate_doc = weaviate_client.query.get("OpenAIDocument", metadata_properties).do() + + weaviate_doc = weaviate_doc["data"]["Get"]["OpenAIDocument"][0] + + for _, metadata_value in weaviate_doc.items(): + assert metadata_value is None + + +@pytest.mark.parametrize( + "test_document, expected_status_code", + [ + ({"id": "abc_123", "text": "some text"}, 200), + ({"id": "abc_123"}, 422), + ({"text": "some text"}, 200), + ], +) +def test_upsert_invalid_documents(weaviate_client, test_document, expected_status_code): + weaviate_client.schema.delete_all() + weaviate_client.schema.create_class(SCHEMA) + + response = client.post("/upsert", json={"documents": [test_document]}) + + assert response.status_code == expected_status_code + + +@pytest.mark.parametrize( + "query, expected_num_results", + [ + ({"query": "consectetur adipiscing", "top_k": 3}, 3), + ({"query": "consectetur adipiscing elit", "filter": {"source": "email"}}, 2), + ( + { + "query": "sed do eiusmod tempor", + "filter": { + "start_date": "2020-01-01T00:00:00Z", + "end_date": "2022-12-31T00:00:00Z", + }, + }, + 1, + ), + ( + { + "query": "some random query", + "filter": {"start_date": "2009-01-01T00:00:00Z"}, + "top_k": 3, + }, + 2, + ), + ( + { + "query": "another random query", + "filter": {"end_date": "1929-12-31T00:00:00Z"}, + "top_k": 3, + }, + 1, + ), + ], +) +def test_query(test_db, query, expected_num_results): + queries = {"queries": [query]} + + response = client.post("/query", json=queries) + assert response.status_code == 200 + + num_docs = response.json()["results"][0]["results"] + assert len(num_docs) == expected_num_results + + +def test_delete(test_db, weaviate_client, caplog): + caplog.set_level(logging.DEBUG) + + delete_request = {"ids": ["def_456"]} + + response = client.request(method="delete", url="/delete", json=delete_request) + assert response.status_code == 200 + assert response.json()["success"] + assert weaviate_client.data_object.get()["totalResults"] == 4 + + client.request(method="delete", url="/delete", json=delete_request) + assert "Failed to delete" in caplog.text + caplog.clear() + + delete_request = {"filter": {"source": "email"}} + + response = client.request(method="delete", url="/delete", json=delete_request) + assert response.status_code == 200 + assert response.json()["success"] + assert weaviate_client.data_object.get()["totalResults"] == 3 + + client.request(method="delete", url="/delete", json=delete_request) + assert "Failed to delete" in caplog.text + + delete_request = {"delete_all": True} + + response = client.request(method="delete", url="/delete", json=delete_request) + assert response.status_code == 200 + assert response.json()["success"] + assert not weaviate_client.data_object.get()["objects"] + + +def test_access_with_username_password(mock_env_resource_owner_password_flow): + auth_credentials = WeaviateDataStore._build_auth_credentials() + + assert isinstance(auth_credentials, weaviate.auth.AuthClientPassword) + + +def test_public_access(mock_env_public_access): + auth_credentials = WeaviateDataStore._build_auth_credentials() + + assert auth_credentials is None + + +def test_extract_schema_properties(): + class_schema = { + "class": "Question", + "description": "Information from a Jeopardy! question", + "properties": [ + { + "dataType": ["text"], + "description": "The question", + "name": "question", + }, + { + "dataType": ["text"], + "description": "The answer", + "name": "answer", + }, + { + "dataType": ["text"], + "description": "The category", + "name": "category", + }, + ], + "vectorizer": "text2vec-openai", + } + results = extract_schema_properties(class_schema) + assert results == {"question", "answer", "category"} + + +def test_reuse_schema(weaviate_client, caplog): + caplog.set_level(logging.DEBUG) + + weaviate_client.schema.delete_all() + + WeaviateDataStore() + assert "Creating index" in caplog.text + + WeaviateDataStore() + assert "Will reuse this schema" in caplog.text + + +def test_build_date_filters(): + filter = DocumentMetadataFilter( + document_id=None, + source=None, + source_id=None, + author=None, + start_date="2020-01-01T00:00:00Z", + end_date="2022-12-31T00:00:00Z", + ) + actual_result = WeaviateDataStore.build_filters(filter) + expected_result = { + "operator": "And", + "operands": [ + { + "path": ["created_at"], + "operator": "GreaterThanEqual", + "valueDate": "2020-01-01T00:00:00Z", + }, + { + "path": ["created_at"], + "operator": "LessThanEqual", + "valueDate": "2022-12-31T00:00:00Z", + }, + ], + } + + assert actual_result == expected_result + + +@pytest.mark.parametrize( + "test_input, expected_result", + [ + ("abc_123", False), + ("b2e4133c-c956-5684-bbf5-584e50ec3647", True), # version 5 + ("f6179953-11d8-4ee0-9af8-e51e00dbf727", True), # version 4 + ("16fe8165-3c08-348f-a015-a8bb31e26b5c", True), # version 3 + ("bda85f97-be72-11ed-9291-00000000000a", False), # version 1 + ], +) +def test_is_valid_weaviate_id(test_input, expected_result): + actual_result = WeaviateDataStore._is_valid_weaviate_id(test_input) + assert actual_result == expected_result + + +def test_upsert_same_docid(test_db, weaviate_client): + def get_doc_by_document_id(document_id): + properties = [ + "chunk_id", + "document_id", + "source", + "source_id", + "url", + "created_at", + "author", + ] + where_filter = { + "path": ["document_id"], + "operator": "Equal", + "valueString": document_id, + } + + results = ( + weaviate_client.query.get("OpenAIDocument", properties) + .with_additional("id") + .with_where(where_filter) + .with_sort({"path": ["chunk_id"], "order": "asc"}) + .do() + ) + + return results["data"]["Get"]["OpenAIDocument"] + + def build_upsert_payload(document): + return {"documents": [document]} + + # upsert a new document + # this is a document that has 2 chunks and + # the source is email + doc_id = "abc_123" + text = """ + Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce in ipsum eget dolor malesuada fermentum at ac massa. + Aliquam erat volutpat. Sed eu velit est. Morbi semper quam id urna fringilla lacinia. Vivamus sit amet velit id lorem + pretium molestie. Nulla tincidunt sapien eu nulla consequat, a lacinia justo facilisis. Maecenas euismod urna sapien, + sit amet tincidunt est dapibus ac. Sed in lorem in nunc tincidunt bibendum. Nullam vel urna vitae nulla iaculis rutrum. + Suspendisse varius, massa a dignissim vehicula, urna ligula tincidunt orci, id fringilla velit tellus eu metus. Sed + vestibulum, nisl in malesuada tempor, nisi turpis facilisis nibh, nec dictum velit velit vel ex. Donec euismod, + leo ut sollicitudin tempor, dolor augue blandit nunc, eu lacinia ipsum turpis vitae nulla. Aenean bibendum + tincidunt magna in pulvinar. Sed tincidunt vel nisi ac maximus. + """ + + document = { + "id": doc_id, + "text": text, + "metadata": {"source": "email"}, + } + + response = client.post("/upsert", json=build_upsert_payload(document)) + assert response.status_code == 200 + + weaviate_doc = get_doc_by_document_id(doc_id) + assert len(weaviate_doc) == 2 + for chunk in weaviate_doc: + assert chunk["source"] == "email" + + # now update the source to file + # user still has to specify the text + # because test is a required field + document["metadata"]["source"] = "file" + response = client.post("/upsert", json=build_upsert_payload(document)) + assert response.status_code == 200 + + weaviate_doc = get_doc_by_document_id(doc_id) + assert len(weaviate_doc) == 2 + for chunk in weaviate_doc: + assert chunk["source"] == "file" + + # now update the text so that it is only 1 chunk + # user does not need to specify metadata + # since it is optional + document["text"] = "This is a short text" + document.pop("metadata") + + response = client.post("/upsert", json=build_upsert_payload(document)) + assert response.status_code == 200 + weaviate_doc = get_doc_by_document_id(doc_id) + assert len(weaviate_doc) == 1 + + # TODO: Implement update function + # but the source should still be file + # but it is None right now because an + # update function is out of scope + assert weaviate_doc[0]["source"] is None diff --git a/tests/datastore/providers/zilliz/test_zilliz_datastore.py b/tests/datastore/providers/zilliz/test_zilliz_datastore.py new file mode 100644 index 000000000..7172eb999 --- /dev/null +++ b/tests/datastore/providers/zilliz/test_zilliz_datastore.py @@ -0,0 +1,330 @@ +# from pathlib import Path +# from dotenv import find_dotenv, load_dotenv +# env_path = Path(".") / "zilliz.env" +# load_dotenv(dotenv_path=env_path, verbose=True) + +import pytest +from models.models import ( + DocumentChunkMetadata, + DocumentMetadataFilter, + DocumentChunk, + Query, + QueryWithEmbedding, + Source, +) +from datastore.providers.zilliz_datastore import ( + OUTPUT_DIM, + ZillizDataStore, +) + + +@pytest.fixture +def zilliz_datastore(): + return ZillizDataStore() + + +@pytest.fixture +def document_chunk_one(): + doc_id = "zerp" + doc_chunks = [] + + ids = ["abc_123", "def_456", "ghi_789"] + texts = [ + "lorem ipsum dolor sit amet", + "consectetur adipiscing elit", + "sed do eiusmod tempor incididunt", + ] + sources = [Source.email, Source.file, Source.chat] + source_ids = ["foo", "bar", "baz"] + urls = ["foo.com", "bar.net", "baz.org"] + created_ats = [ + "1929-10-28T09:30:00-05:00", + "2009-01-03T16:39:57-08:00", + "2021-01-21T10:00:00-02:00", + ] + authors = ["Max Mustermann", "John Doe", "Jane Doe"] + embeddings = [[x] * OUTPUT_DIM for x in range(3)] + + for i in range(3): + chunk = DocumentChunk( + id=ids[i], + text=texts[i], + metadata=DocumentChunkMetadata( + document_id=doc_id, + source=sources[i], + source_id=source_ids[i], + url=urls[i], + created_at=created_ats[i], + author=authors[i], + ), + embedding=embeddings[i], # type: ignore + ) + + doc_chunks.append(chunk) + + return {doc_id: doc_chunks} + + +@pytest.fixture +def document_chunk_two(): + doc_id_1 = "zerp" + doc_chunks_1 = [] + + ids = ["abc_123", "def_456", "ghi_789"] + texts = [ + "1lorem ipsum dolor sit amet", + "2consectetur adipiscing elit", + "3sed do eiusmod tempor incididunt", + ] + sources = [Source.email, Source.file, Source.chat] + source_ids = ["foo", "bar", "baz"] + urls = ["foo.com", "bar.net", "baz.org"] + created_ats = [ + "1929-10-28T09:30:00-05:00", + "2009-01-03T16:39:57-08:00", + "3021-01-21T10:00:00-02:00", + ] + authors = ["Max Mustermann", "John Doe", "Jane Doe"] + embeddings = [[x] * OUTPUT_DIM for x in range(3)] + + for i in range(3): + chunk = DocumentChunk( + id=ids[i], + text=texts[i], + metadata=DocumentChunkMetadata( + document_id=doc_id_1, + source=sources[i], + source_id=source_ids[i], + url=urls[i], + created_at=created_ats[i], + author=authors[i], + ), + embedding=embeddings[i], # type: ignore + ) + + doc_chunks_1.append(chunk) + + doc_id_2 = "merp" + doc_chunks_2 = [] + + ids = ["jkl_123", "lmn_456", "opq_789"] + texts = [ + "3sdsc efac feas sit qweas", + "4wert sdfas fdsc", + "52dsc fdsf eiusmod asdasd incididunt", + ] + sources = [Source.email, Source.file, Source.chat] + source_ids = ["foo", "bar", "baz"] + urls = ["foo.com", "bar.net", "baz.org"] + created_ats = [ + "4929-10-28T09:30:00-05:00", + "5009-01-03T16:39:57-08:00", + "6021-01-21T10:00:00-02:00", + ] + authors = ["Max Mustermann", "John Doe", "Jane Doe"] + embeddings = [[x] * OUTPUT_DIM for x in range(3, 6)] + + for i in range(3): + chunk = DocumentChunk( + id=ids[i], + text=texts[i], + metadata=DocumentChunkMetadata( + document_id=doc_id_2, + source=sources[i], + source_id=source_ids[i], + url=urls[i], + created_at=created_ats[i], + author=authors[i], + ), + embedding=embeddings[i], # type: ignore + ) + + doc_chunks_2.append(chunk) + + return {doc_id_1: doc_chunks_1, doc_id_2: doc_chunks_2} + + +@pytest.mark.asyncio +async def test_upsert(zilliz_datastore, document_chunk_one): + await zilliz_datastore.delete(delete_all=True) + res = await zilliz_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + zilliz_datastore.col.flush() + assert 3 == zilliz_datastore.col.num_entities + + +@pytest.mark.asyncio +async def test_reload(zilliz_datastore, document_chunk_one, document_chunk_two): + await zilliz_datastore.delete(delete_all=True) + + res = await zilliz_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + zilliz_datastore.col.flush() + assert 3 == zilliz_datastore.col.num_entities + new_store = ZillizDataStore() + another_in = {i: document_chunk_two[i] for i in document_chunk_two if i != res[0]} + res = await new_store._upsert(another_in) + new_store.col.flush() + assert 6 == new_store.col.num_entities + query = QueryWithEmbedding( + query="lorem", + top_k=10, + embedding=[0.5] * OUTPUT_DIM, + ) + query_results = await zilliz_datastore._query(queries=[query]) + assert 1 == len(query_results) + + +@pytest.mark.asyncio +async def test_upsert_and_query_all(zilliz_datastore, document_chunk_two): + await zilliz_datastore.delete(delete_all=True) + res = await zilliz_datastore._upsert(document_chunk_two) + assert res == list(document_chunk_two.keys()) + zilliz_datastore.col.flush() + + # Num entities currently doesnt track deletes + query = QueryWithEmbedding( + query="lorem", + top_k=9, + embedding=[0.5] * OUTPUT_DIM, + ) + query_results = await zilliz_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert 6 == len(query_results[0].results) + + +@pytest.mark.asyncio +async def test_query_accuracy(zilliz_datastore, document_chunk_one): + await zilliz_datastore.delete(delete_all=True) + res = await zilliz_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + zilliz_datastore.col.flush() + query = QueryWithEmbedding( + query="lorem", + top_k=1, + embedding=[0] * OUTPUT_DIM, + ) + query_results = await zilliz_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert 1 == len(query_results[0].results) + assert 0 == query_results[0].results[0].score + assert "abc_123" == query_results[0].results[0].id + + +@pytest.mark.asyncio +async def test_query_filter(zilliz_datastore, document_chunk_one): + await zilliz_datastore.delete(delete_all=True) + res = await zilliz_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + zilliz_datastore.col.flush() + query = QueryWithEmbedding( + query="lorem", + top_k=1, + embedding=[0] * OUTPUT_DIM, + filter=DocumentMetadataFilter( + start_date="2000-01-03T16:39:57-08:00", end_date="2010-01-03T16:39:57-08:00" + ), + ) + query_results = await zilliz_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert 1 == len(query_results[0].results) + assert 0 != query_results[0].results[0].score + assert "def_456" == query_results[0].results[0].id + + +@pytest.mark.asyncio +async def test_delete_with_date_filter(zilliz_datastore, document_chunk_one): + await zilliz_datastore.delete(delete_all=True) + res = await zilliz_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + zilliz_datastore.col.flush() + await zilliz_datastore.delete( + filter=DocumentMetadataFilter( + end_date="2009-01-03T16:39:57-08:00", + ) + ) + + query = QueryWithEmbedding( + query="lorem", + top_k=9, + embedding=[0] * OUTPUT_DIM, + ) + query_results = await zilliz_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert 1 == len(query_results[0].results) + assert "ghi_789" == query_results[0].results[0].id + + +@pytest.mark.asyncio +async def test_delete_with_source_filter(zilliz_datastore, document_chunk_one): + await zilliz_datastore.delete(delete_all=True) + res = await zilliz_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + zilliz_datastore.col.flush() + await zilliz_datastore.delete( + filter=DocumentMetadataFilter( + source=Source.email, + ) + ) + + query = QueryWithEmbedding( + query="lorem", + top_k=9, + embedding=[0] * OUTPUT_DIM, + ) + query_results = await zilliz_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert 2 == len(query_results[0].results) + assert "def_456" == query_results[0].results[0].id + + +@pytest.mark.asyncio +async def test_delete_with_document_id_filter(zilliz_datastore, document_chunk_one): + await zilliz_datastore.delete(delete_all=True) + res = await zilliz_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + zilliz_datastore.col.flush() + await zilliz_datastore.delete( + filter=DocumentMetadataFilter( + document_id=res[0], + ) + ) + query = QueryWithEmbedding( + query="lorem", + top_k=9, + embedding=[0] * OUTPUT_DIM, + ) + query_results = await zilliz_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert 0 == len(query_results[0].results) + + +@pytest.mark.asyncio +async def test_delete_with_document_id(zilliz_datastore, document_chunk_one): + await zilliz_datastore.delete(delete_all=True) + res = await zilliz_datastore._upsert(document_chunk_one) + assert res == list(document_chunk_one.keys()) + zilliz_datastore.col.flush() + await zilliz_datastore.delete([res[0]]) + + query = QueryWithEmbedding( + query="lorem", + top_k=9, + embedding=[0] * OUTPUT_DIM, + ) + query_results = await zilliz_datastore._query(queries=[query]) + + assert 1 == len(query_results) + assert 0 == len(query_results[0].results) + + +# if __name__ == '__main__': +# import sys +# import pytest +# pytest.main(sys.argv)