-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
158 lines (139 loc) · 5.93 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
from dotenv import load_dotenv
load_dotenv()
import requests
from flask import Flask, request, jsonify
from flask_cors import CORS, cross_origin
import os
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
from llama_index.indices.vector_store.base import VectorStoreIndex
from process import process_entries
# Create a new client and connect to the server
client = MongoClient(os.getenv("MONGODB_URI"), server_api=ServerApi('1'))
# connect to Atlas as a vector store
app = Flask(__name__)
cors = CORS(app)
app.config['CORS_HEADERS'] = 'Content-Type'
# This is just so you can easily tell the app is running
@app.route('/')
def hello_world():
return jsonify({
"status": "success",
"message": "hello world"
})
@app.route('/create_index', methods=['POST'])
@cross_origin()
def create_index():
user_id = request.json['user_id']
database_name = "test"
collection_name = user_id.split('@')[0] + '_invoice'
vector_collection_name = user_id.split('@')[0] + '_invoice_vector'
vector_index_name = (user_id.split('@')[0] + '_invoice_vector_index').replace('.', '_')
uri = os.getenv("MONGODB_API_URI")
headers = {
'Content-Type': 'application/json',
'Accept': 'application/vnd.atlas.2023-02-01+json',
}
payload = {
"collectionName": vector_collection_name,
"database": database_name,
"name": vector_index_name,
"type": "search",
"mappings": {
"dynamic": True,
"fields": {
"embedding": {
"dimensions": 1536,
"similarity": "cosine",
"type": "knnVector"
}
}
}
}
try:
response = requests.post(
f"{uri}/groups/{os.getenv('MONGODB_ATLAS_GROUP_ID')}/clusters/{os.getenv('MONGODB_ATLAS_CLUSTER_NAME')}/fts/indexes",
headers=headers,
auth=requests.auth.HTTPDigestAuth(os.getenv('MONGODB_ATLAS_USERNAME'), os.getenv('MONGODB_ATLAS_PASSWORD')),
json=payload
)
response.raise_for_status()
return jsonify({"status": "success", "data": response.json()})
except requests.exceptions.HTTPError as e:
return jsonify({"status": "fail", "error": e.response.json()}), e.response.status_code
except requests.exceptions.RequestException as e:
return jsonify({"status": "fail", "error": str(e)}), 500
except Exception as e:
return jsonify({"status": "fail", "error": str(e)}), 500
@app.route('/list_indexes/<database_name>/<collection_name>', methods=['GET'])
@cross_origin()
def list_indexes(database_name, collection_name):
uri = os.getenv("MONGODB_API_URI")
headers = {
'Content-Type': 'application/json',
'Accept': 'application/vnd.atlas.2023-02-01+json',
}
try:
response = requests.get(
f"{uri}/groups/{os.getenv('MONGODB_ATLAS_GROUP_ID')}/clusters/{os.getenv('MONGODB_ATLAS_CLUSTER_NAME')}/fts/indexes/{database_name}/{collection_name}",
headers=headers,
auth=requests.auth.HTTPDigestAuth(os.getenv('MONGODB_ATLAS_USERNAME'), os.getenv('MONGODB_ATLAS_PASSWORD')),
)
response.raise_for_status()
return jsonify({"status": "success", "data": response.json()})
except requests.exceptions.HTTPError as e:
return jsonify({"status": "fail", "error": e.response.json()}), e.response.status_code
except requests.exceptions.RequestException as e:
return jsonify({"status": "fail", "error": str(e)}), 500
except Exception as e:
return jsonify({"status": "fail", "error": str(e)}), 500
@app.route('/process', methods=['POST'])
@cross_origin()
def process():
try:
user_id = request.json['user_id']
database_name = "test"
collection_name = user_id.split('@')[0] + '_invoice'
vector_collection_name = user_id.split('@')[0] + '_invoice_vector'
vector_index_name = (user_id.split('@')[0] + '_invoice_vector_index').replace('.', '_')
is_processed = process_entries(
client=client,
database_name=database_name,
collection_name=collection_name,
vector_collection_name=vector_collection_name,
vector_index_name=vector_index_name
)
if not is_processed:
return jsonify({"status": "fail", "error": "process failed"}), 400
return jsonify({"status": "success", "message": "process successful"}), 200
except KeyError:
return jsonify({"status": "fail", "error": "invalid request body"}), 400
except Exception as e:
return jsonify({"status": "fail", "error": f"{e}"}), 500
@app.route('/query', methods=['POST'])
@cross_origin()
def process_form():
# get the query
query = request.json["query"]
user_id = request.json['user_id']
database_name = "test"
vector_collection_name = user_id.split('@')[0] + '_invoice_vector'
vector_index_name = (user_id.split('@')[0] + '_invoice_vector_index').replace('.', '_')
store = MongoDBAtlasVectorSearch(
client,
db_name=database_name, # this is the database where you stored your embeddings
collection_name=vector_collection_name, # this is where your embeddings were stored in 2_load_and_index.py
index_name=vector_index_name # this is the name of the index you created after loading your data
)
index = VectorStoreIndex.from_vector_store(store)
if query is not None:
# query your data!
# here we have customized the number of documents returned per query to 20, because tweets are really short
query_engine = index.as_query_engine(similarity_top_k=20)
response = query_engine.query(query)
return jsonify({"response": str(response)})
else:
return jsonify({"error": "query field is missing"}), 400
if __name__ == '__main__':
app.run(debug=True, port=9000)