Skip to content

Commit bb68147

Browse files
authored
Merge pull request #15 from davidhou17/DOCSP-48847
(DOCSP-48847): Use from_connection_string and make notebook more modular
2 parents 524b010 + 2058ed2 commit bb68147

File tree

2 files changed

+53
-62
lines changed

2 files changed

+53
-62
lines changed

ai-integrations/langchain.ipynb

Lines changed: 44 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
"cell_type": "markdown",
1212
"metadata": {},
1313
"source": [
14-
"This notebook is a companion to the [LangChain Get Started](https://www.mongodb.com/docs/atlas/atlas-vector-search/ai-integrations/langchain/get-started/) page. Refer to the page for set-up instructions and detailed explanations.\n",
14+
"This notebook is a companion to the [LangChain Get Started](https://www.mongodb.com/docs/atlas/atlas-vector-search/ai-integrations/langchain/get-started/) tutorial. Refer to the page for set-up instructions and detailed explanations.\n",
1515
"\n",
1616
"<a target=\"_blank\" href=\"https://colab.research.google.com/github/mongodb/docs-notebooks/blob/main/ai-integrations/langchain.ipynb\">\n",
1717
" <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/>\n",
@@ -37,23 +37,8 @@
3737
"metadata": {},
3838
"outputs": [],
3939
"source": [
40-
"import os, pprint\n",
41-
"from langchain_community.document_loaders import PyPDFLoader\n",
42-
"from langchain_core.output_parsers import StrOutputParser\n",
43-
"from langchain_core.runnables import RunnablePassthrough\n",
44-
"from langchain_mongodb import MongoDBAtlasVectorSearch\n",
45-
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
46-
"from langchain.prompts import PromptTemplate\n",
47-
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
48-
"from pymongo import MongoClient"
49-
]
50-
},
51-
{
52-
"cell_type": "code",
53-
"execution_count": null,
54-
"metadata": {},
55-
"outputs": [],
56-
"source": [
40+
"import os\n",
41+
"\n",
5742
"os.environ[\"OPENAI_API_KEY\"] = \"<api-key>\"\n",
5843
"ATLAS_CONNECTION_STRING = \"<connection-string>\""
5944
]
@@ -64,24 +49,11 @@
6449
"metadata": {},
6550
"outputs": [],
6651
"source": [
67-
"# Connect to your Atlas cluster\n",
68-
"client = MongoClient(ATLAS_CONNECTION_STRING)\n",
69-
"\n",
70-
"# Define collection and index name\n",
71-
"db_name = \"langchain_db\"\n",
72-
"collection_name = \"test\"\n",
73-
"atlas_collection = client[db_name][collection_name]\n",
74-
"vector_search_index = \"vector_index\""
75-
]
76-
},
77-
{
78-
"cell_type": "code",
79-
"execution_count": null,
80-
"metadata": {},
81-
"outputs": [],
82-
"source": [
52+
"from langchain_community.document_loaders import PyPDFLoader\n",
53+
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
54+
"\n",
8355
"# Load the PDF\n",
84-
"loader = PyPDFLoader(\"https://query.prod.cms.rt.microsoft.com/cms/api/am/binary/RE4HkJP\")\n",
56+
"loader = PyPDFLoader(\"https://investors.mongodb.com/node/13176/pdf\")\n",
8557
"data = loader.load()\n",
8658
"\n",
8759
"# Split PDF into documents\n",
@@ -98,13 +70,19 @@
9870
"metadata": {},
9971
"outputs": [],
10072
"source": [
101-
"# Create the vector store\n",
102-
"vector_store = MongoDBAtlasVectorSearch.from_documents(\n",
103-
" documents = docs,\n",
104-
" embedding = OpenAIEmbeddings(disallowed_special=()),\n",
105-
" collection = atlas_collection,\n",
106-
" index_name = vector_search_index\n",
107-
")"
73+
"from langchain_mongodb import MongoDBAtlasVectorSearch\n",
74+
"from langchain_openai import OpenAIEmbeddings\n",
75+
"\n",
76+
"# Instantiate the vector store using your MongoDB connection string\n",
77+
"vector_store = MongoDBAtlasVectorSearch.from_connection_string(\n",
78+
" connection_string = ATLAS_CONNECTION_STRING,\n",
79+
" namespace = \"langchain_db.test\",\n",
80+
" embedding = OpenAIEmbeddings(model=\"text-embedding-3-large\"),\n",
81+
" index_name = \"vector_index\"\n",
82+
")\n",
83+
"\n",
84+
"# Add documents to the vector store\n",
85+
"vector_store.add_documents(documents=docs)"
10886
]
10987
},
11088
{
@@ -113,11 +91,16 @@
11391
"metadata": {},
11492
"outputs": [],
11593
"source": [
94+
"import time\n",
95+
"\n",
11696
"# Use helper method to create the vector search index\n",
11797
"vector_store.create_vector_search_index(\n",
118-
" dimensions = 1536, # The dimensions of the vector embeddings to be indexed\n",
119-
" filters = [ \"page\" ]\n",
120-
")"
98+
" dimensions = 3072, # The dimensions of the vector embeddings to be indexed\n",
99+
" filters = [ \"page_label\" ]\n",
100+
")\n",
101+
"\n",
102+
"# Wait for the index to build (this can take around a minute)\n",
103+
"time.sleep(60)"
121104
]
122105
},
123106
{
@@ -133,7 +116,9 @@
133116
"metadata": {},
134117
"outputs": [],
135118
"source": [
136-
"query = \"MongoDB Atlas security\"\n",
119+
"import pprint\n",
120+
"\n",
121+
"query = \"MongoDB acquisition\"\n",
137122
"results = vector_store.similarity_search(query)\n",
138123
"\n",
139124
"pprint.pprint(results)"
@@ -152,7 +137,7 @@
152137
"metadata": {},
153138
"outputs": [],
154139
"source": [
155-
"query = \"MongoDB Atlas security\"\n",
140+
"query = \"MongoDB acquisition\"\n",
156141
"results = vector_store.similarity_search_with_score(\n",
157142
" query = query, k = 3\n",
158143
")\n",
@@ -173,12 +158,12 @@
173158
"metadata": {},
174159
"outputs": [],
175160
"source": [
176-
"query = \"MongoDB Atlas security\"\n",
161+
"query = \"MongoDB acquisition\"\n",
177162
"\n",
178163
"results = vector_store.similarity_search_with_score(\n",
179164
" query = query,\n",
180165
" k = 3,\n",
181-
" pre_filter = { \"page\": { \"$eq\": 17 } }\n",
166+
" pre_filter = { \"page_label\": { \"$eq\": 2 } }\n",
182167
")\n",
183168
"\n",
184169
"pprint.pprint(results)"
@@ -197,6 +182,11 @@
197182
"metadata": {},
198183
"outputs": [],
199184
"source": [
185+
"from langchain_core.output_parsers import StrOutputParser\n",
186+
"from langchain_core.runnables import RunnablePassthrough\n",
187+
"from langchain_openai import ChatOpenAI\n",
188+
"from langchain.prompts import PromptTemplate\n",
189+
"\n",
200190
"# Instantiate Atlas Vector Search as a retriever\n",
201191
"retriever = vector_store.as_retriever(\n",
202192
" search_type = \"similarity\",\n",
@@ -215,7 +205,7 @@
215205
"\"\"\"\n",
216206
"custom_rag_prompt = PromptTemplate.from_template(template)\n",
217207
"\n",
218-
"llm = ChatOpenAI()\n",
208+
"llm = ChatOpenAI(model=\"gpt-4o\")\n",
219209
"\n",
220210
"def format_docs(docs):\n",
221211
" return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
@@ -229,7 +219,7 @@
229219
")\n",
230220
"\n",
231221
"# Prompt the chain\n",
232-
"question = \"How can I secure my MongoDB Atlas cluster?\"\n",
222+
"question = \"What was MongoDB's latest acquisition?\"\n",
233223
"answer = rag_chain.invoke(question)\n",
234224
"\n",
235225
"print(\"Question: \" + question)\n",
@@ -260,7 +250,7 @@
260250
" search_kwargs = {\n",
261251
" \"k\": 10,\n",
262252
" \"score_threshold\": 0.75,\n",
263-
" \"pre_filter\": { \"page\": { \"$eq\": 17 } }\n",
253+
" \"pre_filter\": { \"page_label\": { \"$eq\": 2 } }\n",
264254
" }\n",
265255
")\n",
266256
"\n",
@@ -276,7 +266,7 @@
276266
"\"\"\"\n",
277267
"custom_rag_prompt = PromptTemplate.from_template(template)\n",
278268
"\n",
279-
"llm = ChatOpenAI()\n",
269+
"llm = ChatOpenAI(model=\"gpt-4o\")\n",
280270
"\n",
281271
"def format_docs(docs):\n",
282272
" return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
@@ -290,7 +280,7 @@
290280
")\n",
291281
"\n",
292282
"# Prompt the chain\n",
293-
"question = \"How can I secure my MongoDB Atlas cluster?\"\n",
283+
"question = \"What was MongoDB's latest acquisition?\"\n",
294284
"answer = rag_chain.invoke(question)\n",
295285
"\n",
296286
"print(\"Question: \" + question)\n",

ai-integrations/langgraph.ipynb

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -78,16 +78,13 @@
7878
"source": [
7979
"from langchain_mongodb import MongoDBAtlasVectorSearch\n",
8080
"from langchain_openai import OpenAIEmbeddings\n",
81-
"from pymongo import MongoClient\n",
8281
"\n",
83-
"# Connect to your Atlas cluster\n",
84-
"client = MongoClient(MONGODB_URI)\n",
85-
"collection = client[\"sample_mflix\"][\"embedded_movies\"]\n",
8682
"embedding_model = OpenAIEmbeddings(model=\"text-embedding-ada-002\", disallowed_special=())\n",
8783
"\n",
8884
"# Instantiate the vector store\n",
89-
"vector_store = MongoDBAtlasVectorSearch(\n",
90-
" collection = collection,\n",
85+
"vector_store = MongoDBAtlasVectorSearch.from_connection_string(\n",
86+
" connection_string = MONGODB_URI,\n",
87+
" namespace = \"sample_mflix.embedded_movies\",\n",
9188
" embedding = embedding_model,\n",
9289
" text_key = \"plot\",\n",
9390
" embedding_key = \"plot_embedding\",\n",
@@ -114,11 +111,15 @@
114111
"outputs": [],
115112
"source": [
116113
"from langchain_mongodb.index import create_fulltext_search_index\n",
114+
"from pymongo import MongoClient\n",
117115
"import time\n",
118116
"\n",
117+
"# Connect to your cluster\n",
118+
"client = MongoClient(MONGODB_URI)\n",
119+
"\n",
119120
"# Use helper method to create the search index\n",
120-
"create_fulltext_search_index(\n",
121-
" collection = collection,\n",
121+
"create_fulltext_search_index( \n",
122+
" collection = client[\"sample_mflix\"][\"embedded_movies\"],\n",
122123
" field = \"title\",\n",
123124
" index_name = \"search_index\"\n",
124125
")\n",

0 commit comments

Comments
 (0)