Skip to content

Commit 2058ed2

Browse files
committed
update PDF data source
1 parent eac0e10 commit 2058ed2

File tree

1 file changed

+19
-14
lines changed

1 file changed

+19
-14
lines changed

ai-integrations/langchain.ipynb

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
5454
"\n",
5555
"# Load the PDF\n",
56-
"loader = PyPDFLoader(\"https://query.prod.cms.rt.microsoft.com/cms/api/am/binary/RE4HkJP\")\n",
56+
"loader = PyPDFLoader(\"https://investors.mongodb.com/node/13176/pdf\")\n",
5757
"data = loader.load()\n",
5858
"\n",
5959
"# Split PDF into documents\n",
@@ -77,7 +77,7 @@
7777
"vector_store = MongoDBAtlasVectorSearch.from_connection_string(\n",
7878
" connection_string = ATLAS_CONNECTION_STRING,\n",
7979
" namespace = \"langchain_db.test\",\n",
80-
" embedding = OpenAIEmbeddings(disallowed_special=()),\n",
80+
" embedding = OpenAIEmbeddings(model=\"text-embedding-3-large\"),\n",
8181
" index_name = \"vector_index\"\n",
8282
")\n",
8383
"\n",
@@ -91,11 +91,16 @@
9191
"metadata": {},
9292
"outputs": [],
9393
"source": [
94+
"import time\n",
95+
"\n",
9496
"# Use helper method to create the vector search index\n",
9597
"vector_store.create_vector_search_index(\n",
96-
" dimensions = 1536, # The dimensions of the vector embeddings to be indexed\n",
97-
" filters = [ \"page\" ]\n",
98-
")"
98+
" dimensions = 3072, # The dimensions of the vector embeddings to be indexed\n",
99+
" filters = [ \"page_label\" ]\n",
100+
")\n",
101+
"\n",
102+
"# Wait for the index to build (this can take around a minute)\n",
103+
"time.sleep(60)"
99104
]
100105
},
101106
{
@@ -113,7 +118,7 @@
113118
"source": [
114119
"import pprint\n",
115120
"\n",
116-
"query = \"MongoDB Atlas security\"\n",
121+
"query = \"MongoDB acquisition\"\n",
117122
"results = vector_store.similarity_search(query)\n",
118123
"\n",
119124
"pprint.pprint(results)"
@@ -132,7 +137,7 @@
132137
"metadata": {},
133138
"outputs": [],
134139
"source": [
135-
"query = \"MongoDB Atlas security\"\n",
140+
"query = \"MongoDB acquisition\"\n",
136141
"results = vector_store.similarity_search_with_score(\n",
137142
" query = query, k = 3\n",
138143
")\n",
@@ -153,12 +158,12 @@
153158
"metadata": {},
154159
"outputs": [],
155160
"source": [
156-
"query = \"MongoDB Atlas security\"\n",
161+
"query = \"MongoDB acquisition\"\n",
157162
"\n",
158163
"results = vector_store.similarity_search_with_score(\n",
159164
" query = query,\n",
160165
" k = 3,\n",
161-
" pre_filter = { \"page\": { \"$eq\": 17 } }\n",
166+
" pre_filter = { \"page_label\": { \"$eq\": 2 } }\n",
162167
")\n",
163168
"\n",
164169
"pprint.pprint(results)"
@@ -200,7 +205,7 @@
200205
"\"\"\"\n",
201206
"custom_rag_prompt = PromptTemplate.from_template(template)\n",
202207
"\n",
203-
"llm = ChatOpenAI()\n",
208+
"llm = ChatOpenAI(model=\"gpt-4o\")\n",
204209
"\n",
205210
"def format_docs(docs):\n",
206211
" return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
@@ -214,7 +219,7 @@
214219
")\n",
215220
"\n",
216221
"# Prompt the chain\n",
217-
"question = \"How can I secure my MongoDB Atlas cluster?\"\n",
222+
"question = \"What was MongoDB's latest acquisition?\"\n",
218223
"answer = rag_chain.invoke(question)\n",
219224
"\n",
220225
"print(\"Question: \" + question)\n",
@@ -245,7 +250,7 @@
245250
" search_kwargs = {\n",
246251
" \"k\": 10,\n",
247252
" \"score_threshold\": 0.75,\n",
248-
" \"pre_filter\": { \"page\": { \"$eq\": 17 } }\n",
253+
" \"pre_filter\": { \"page_label\": { \"$eq\": 2 } }\n",
249254
" }\n",
250255
")\n",
251256
"\n",
@@ -261,7 +266,7 @@
261266
"\"\"\"\n",
262267
"custom_rag_prompt = PromptTemplate.from_template(template)\n",
263268
"\n",
264-
"llm = ChatOpenAI()\n",
269+
"llm = ChatOpenAI(model=\"gpt-4o\")\n",
265270
"\n",
266271
"def format_docs(docs):\n",
267272
" return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
@@ -275,7 +280,7 @@
275280
")\n",
276281
"\n",
277282
"# Prompt the chain\n",
278-
"question = \"How can I secure my MongoDB Atlas cluster?\"\n",
283+
"question = \"What was MongoDB's latest acquisition?\"\n",
279284
"answer = rag_chain.invoke(question)\n",
280285
"\n",
281286
"print(\"Question: \" + question)\n",

0 commit comments

Comments
 (0)