-
Notifications
You must be signed in to change notification settings - Fork 3.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Charli Posner
committed
Aug 25, 2023
1 parent
f6a7995
commit 808f96b
Showing
122 changed files
with
18,334 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
[ZoneTransfer] | ||
ZoneId=3 | ||
HostUrl=https://files.slack.com/files-pri/T0HLFPUJE-F05J0S9UA8P/download/chatgpt_retrieval_plugin_qa.ipynb?origin_team=T0HLFPUJE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
|
||
FROM python:3.10 as requirements-stage | ||
|
||
WORKDIR /tmp | ||
|
||
RUN pip install poetry | ||
|
||
COPY ./pyproject.toml ./poetry.lock* /tmp/ | ||
|
||
|
||
RUN poetry export -f requirements.txt --output requirements.txt --without-hashes | ||
|
||
FROM python:3.10 | ||
|
||
WORKDIR /code | ||
|
||
COPY --from=requirements-stage /tmp/requirements.txt /code/requirements.txt | ||
|
||
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt | ||
|
||
COPY . /code/ | ||
|
||
# Heroku uses PORT, Azure App Services uses WEBSITES_PORT, Fly.io uses 8080 by default | ||
CMD ["sh", "-c", "uvicorn server.main:app --host 0.0.0.0 --port ${PORT:-${WEBSITES_PORT:-8080}}"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
MIT License | ||
|
||
Copyright (c) 2023 OpenAI | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# Heroku | ||
# make heroku-login | ||
# make heroku-push | ||
|
||
HEROKU_APP = <your app name> | ||
|
||
heroku-push: | ||
docker buildx build --platform linux/amd64 -t ${HEROKU_APP} . | ||
docker tag ${HEROKU_APP} registry.heroku.com/${HEROKU_APP}/web | ||
docker push registry.heroku.com/${HEROKU_APP}/web | ||
heroku container:release web -a ${HEROKU_APP} | ||
|
||
heroku-login: | ||
heroku container:login |
Empty file.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
from abc import ABC, abstractmethod | ||
from typing import Dict, List, Optional | ||
import asyncio | ||
|
||
from models.models import ( | ||
Document, | ||
DocumentChunk, | ||
DocumentMetadataFilter, | ||
Query, | ||
QueryResult, | ||
QueryWithEmbedding, | ||
) | ||
from services.chunks import get_document_chunks | ||
from services.openai import get_embeddings | ||
|
||
|
||
class DataStore(ABC): | ||
async def upsert( | ||
self, documents: List[Document], chunk_token_size: Optional[int] = None | ||
) -> List[str]: | ||
""" | ||
Takes in a list of documents and inserts them into the database. | ||
First deletes all the existing vectors with the document id (if necessary, depends on the vector db), then inserts the new ones. | ||
Return a list of document ids. | ||
""" | ||
# Delete any existing vectors for documents with the input document ids | ||
await asyncio.gather( | ||
*[ | ||
self.delete( | ||
filter=DocumentMetadataFilter( | ||
document_id=document.id, | ||
), | ||
delete_all=False, | ||
) | ||
for document in documents | ||
if document.id | ||
] | ||
) | ||
|
||
chunks = get_document_chunks(documents, chunk_token_size) | ||
|
||
return await self._upsert(chunks) | ||
|
||
@abstractmethod | ||
async def _upsert(self, chunks: Dict[str, List[DocumentChunk]]) -> List[str]: | ||
""" | ||
Takes in a list of list of document chunks and inserts them into the database. | ||
Return a list of document ids. | ||
""" | ||
|
||
raise NotImplementedError | ||
|
||
async def query(self, queries: List[Query]) -> List[QueryResult]: | ||
""" | ||
Takes in a list of queries and filters and returns a list of query results with matching document chunks and scores. | ||
""" | ||
# get a list of of just the queries from the Query list | ||
query_texts = [query.query for query in queries] | ||
query_embeddings = get_embeddings(query_texts) | ||
# hydrate the queries with embeddings | ||
queries_with_embeddings = [ | ||
QueryWithEmbedding(**query.dict(), embedding=embedding) | ||
for query, embedding in zip(queries, query_embeddings) | ||
] | ||
return await self._query(queries_with_embeddings) | ||
|
||
@abstractmethod | ||
async def _query(self, queries: List[QueryWithEmbedding]) -> List[QueryResult]: | ||
""" | ||
Takes in a list of queries with embeddings and filters and returns a list of query results with matching document chunks and scores. | ||
""" | ||
raise NotImplementedError | ||
|
||
@abstractmethod | ||
async def delete( | ||
self, | ||
ids: Optional[List[str]] = None, | ||
filter: Optional[DocumentMetadataFilter] = None, | ||
delete_all: Optional[bool] = None, | ||
) -> bool: | ||
""" | ||
Removes vectors by ids, filter, or everything in the datastore. | ||
Multiple parameters can be used at once. | ||
Returns whether the operation was successful. | ||
""" | ||
raise NotImplementedError |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
from datastore.datastore import DataStore | ||
import os | ||
|
||
|
||
async def get_datastore() -> DataStore: | ||
datastore = os.environ.get("DATASTORE") | ||
assert datastore is not None | ||
|
||
match datastore: | ||
case "chroma": | ||
from datastore.providers.chroma_datastore import ChromaDataStore | ||
|
||
return ChromaDataStore() | ||
case "llama": | ||
from datastore.providers.llama_datastore import LlamaDataStore | ||
|
||
return LlamaDataStore() | ||
|
||
case "pinecone": | ||
from datastore.providers.pinecone_datastore import PineconeDataStore | ||
|
||
return PineconeDataStore() | ||
case "weaviate": | ||
from datastore.providers.weaviate_datastore import WeaviateDataStore | ||
|
||
return WeaviateDataStore() | ||
case "milvus": | ||
from datastore.providers.milvus_datastore import MilvusDataStore | ||
|
||
return MilvusDataStore() | ||
case "zilliz": | ||
from datastore.providers.zilliz_datastore import ZillizDataStore | ||
|
||
return ZillizDataStore() | ||
case "redis": | ||
from datastore.providers.redis_datastore import RedisDataStore | ||
|
||
return await RedisDataStore.init() | ||
case "qdrant": | ||
from datastore.providers.qdrant_datastore import QdrantDataStore | ||
|
||
return QdrantDataStore() | ||
case "azuresearch": | ||
from datastore.providers.azuresearch_datastore import AzureSearchDataStore | ||
|
||
return AzureSearchDataStore() | ||
case "supabase": | ||
from datastore.providers.supabase_datastore import SupabaseDataStore | ||
|
||
return SupabaseDataStore() | ||
case "postgres": | ||
from datastore.providers.postgres_datastore import PostgresDataStore | ||
|
||
return PostgresDataStore() | ||
case "analyticdb": | ||
from datastore.providers.analyticdb_datastore import AnalyticDBDataStore | ||
|
||
return AnalyticDBDataStore() | ||
case "elasticsearch": | ||
from datastore.providers.elasticsearch_datastore import ( | ||
ElasticsearchDataStore, | ||
) | ||
return ElasticsearchDataStore() | ||
case "kdbai": | ||
from datastore.providers.kdbai_datastore import KDBAIDataStore | ||
|
||
return KDBAIDataStore() | ||
case _: | ||
raise ValueError( | ||
f"Unsupported vector database: {datastore}. " | ||
f"Try one of the following: llama, elasticsearch, pinecone, weaviate, milvus, zilliz, redis, or qdrant" | ||
) |
Empty file.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Oops, something went wrong.