Skip to content

Commit c643674

Browse files
authored
Merge pull request #17 from davidhou17/DOCSP-49240
DOCSP-49240: Add LangChain semantic cache and Local RAG notebooks
2 parents f47874f + 06b9094 commit c643674

8 files changed

+623
-29
lines changed

ai-integrations/langchain-graphrag.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"id": "b5dcbf95-9a30-416d-afed-d5b2bf0e8651",
66
"metadata": {},
77
"source": [
8-
"# GraphRAG with MongoDB and LangChain\n",
8+
"# LangChain MongoDB Integration - GraphRAG\n",
99
"\n",
1010
"This notebook is a companion to the [GraphRAG with MongoDB and LangChain](https://www.mongodb.com/docs/atlas/atlas-vector-search/ai-integrations/langchain/graph-rag/) tutorial. Refer to the page for set-up instructions and detailed explanations.\n",
1111
"\n",

ai-integrations/langchain-hybrid-search.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"cell_type": "markdown",
55
"metadata": {},
66
"source": [
7-
"# Atlas Vector Search - LangChain Integration - Hybrid Search"
7+
"# LangChain MongoDB Integration - Hybrid Search"
88
]
99
},
1010
{
Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# LangChain MongoDB Integration - Implement RAG Locally"
8+
]
9+
},
10+
{
11+
"cell_type": "markdown",
12+
"metadata": {},
13+
"source": [
14+
"This notebook is a companion to the [LangChain Local RAG](https://www.mongodb.com/docs/atlas/atlas-vector-search/ai-integrations/langchain/get-started/) tutorial. Refer to the page for set-up instructions and detailed explanations.\n",
15+
"\n",
16+
"<a target=\"_blank\" href=\"https://colab.research.google.com/github/mongodb/docs-notebooks/blob/main/ai-integrations/langchain-local-rag.ipynb\">\n",
17+
" <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/>\n",
18+
"</a>"
19+
]
20+
},
21+
{
22+
"cell_type": "markdown",
23+
"metadata": {
24+
"vscode": {
25+
"languageId": "shellscript"
26+
}
27+
},
28+
"source": [
29+
"## Create a local Atlas deployment\n",
30+
"\n",
31+
"Run the following command in your terminal to set up your local Atlas deployment. \n",
32+
"\n",
33+
"```\n",
34+
"atlas deployments setup\n",
35+
"```"
36+
]
37+
},
38+
{
39+
"cell_type": "markdown",
40+
"metadata": {
41+
"vscode": {
42+
"languageId": "shellscript"
43+
}
44+
},
45+
"source": [
46+
"## Set up the environment"
47+
]
48+
},
49+
{
50+
"cell_type": "code",
51+
"execution_count": null,
52+
"metadata": {
53+
"vscode": {
54+
"languageId": "shellscript"
55+
}
56+
},
57+
"outputs": [],
58+
"source": [
59+
"pip install --quiet --upgrade pymongo langchain langchain-community langchain-huggingface gpt4all pypdf"
60+
]
61+
},
62+
{
63+
"cell_type": "code",
64+
"execution_count": null,
65+
"metadata": {},
66+
"outputs": [],
67+
"source": [
68+
"MONGODB_URI = (\"mongodb://localhost:<port-number>/?directConnection=true\")"
69+
]
70+
},
71+
{
72+
"cell_type": "markdown",
73+
"metadata": {},
74+
"source": [
75+
"## Configure the vector store"
76+
]
77+
},
78+
{
79+
"cell_type": "code",
80+
"execution_count": null,
81+
"metadata": {},
82+
"outputs": [],
83+
"source": [
84+
"from langchain_mongodb import MongoDBAtlasVectorSearch\n",
85+
"from langchain_huggingface import HuggingFaceEmbeddings\n",
86+
"\n",
87+
"# Load the embedding model (https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1)\n",
88+
"embedding_model = HuggingFaceEmbeddings(model_name=\"mixedbread-ai/mxbai-embed-large-v1\")\n",
89+
"\n",
90+
"# Instantiate vector store\n",
91+
"vector_store = MongoDBAtlasVectorSearch.from_connection_string(\n",
92+
" connection_string = MONGODB_URI,\n",
93+
" namespace = \"langchain_db.local_rag\",\n",
94+
" embedding=embedding_model,\n",
95+
" index_name=\"vector_index\"\n",
96+
")"
97+
]
98+
},
99+
{
100+
"cell_type": "code",
101+
"execution_count": null,
102+
"metadata": {},
103+
"outputs": [],
104+
"source": [
105+
"from langchain_community.document_loaders import PyPDFLoader\n",
106+
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
107+
"\n",
108+
"# Load the PDF\n",
109+
"loader = PyPDFLoader(\"https://investors.mongodb.com/node/13176/pdf\")\n",
110+
"data = loader.load()\n",
111+
"\n",
112+
"# Split PDF into documents\n",
113+
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)\n",
114+
"docs = text_splitter.split_documents(data)\n",
115+
"\n",
116+
"# Add data to the vector store\n",
117+
"vector_store.add_documents(docs)"
118+
]
119+
},
120+
{
121+
"cell_type": "code",
122+
"execution_count": null,
123+
"metadata": {},
124+
"outputs": [],
125+
"source": [
126+
"vector_store.create_vector_search_index(\n",
127+
" dimensions = 1024, # The dimensions of the vector embeddings to be indexed\n",
128+
" wait_until_complete = 60 # Number of seconds to wait for the index to build (can take around a minute)\n",
129+
")"
130+
]
131+
},
132+
{
133+
"cell_type": "markdown",
134+
"metadata": {},
135+
"source": [
136+
"## Implement RAG with a local LLM\n",
137+
"Before running the following code, [download the local model](https://gpt4all.io/models/gguf/mistral-7b-openorca.gguf2.Q4_0.gguf)."
138+
]
139+
},
140+
{
141+
"cell_type": "code",
142+
"execution_count": null,
143+
"metadata": {},
144+
"outputs": [],
145+
"source": [
146+
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
147+
"from langchain_community.llms import GPT4All\n",
148+
"\n",
149+
"# Configure the LLM\n",
150+
"local_path = \"<path-to-model>\"\n",
151+
"\n",
152+
"# Callbacks support token-wise streaming\n",
153+
"callbacks = [StreamingStdOutCallbackHandler()]\n",
154+
"\n",
155+
"# Verbose is required to pass to the callback manager\n",
156+
"llm = GPT4All(model=local_path, callbacks=callbacks, verbose=True)"
157+
]
158+
},
159+
{
160+
"cell_type": "code",
161+
"execution_count": null,
162+
"metadata": {},
163+
"outputs": [],
164+
"source": [
165+
"from langchain_core.prompts import PromptTemplate\n",
166+
"from langchain_core.output_parsers import StrOutputParser\n",
167+
"from langchain_core.runnables import RunnablePassthrough\n",
168+
"import pprint\n",
169+
"\n",
170+
"# Instantiate Atlas Vector Search as a retriever\n",
171+
"retriever = vector_store.as_retriever()\n",
172+
"\n",
173+
"# Define prompt template\n",
174+
"template = \"\"\"\n",
175+
"Use the following pieces of context to answer the question at the end.\n",
176+
"{context}\n",
177+
"Question: {question}\n",
178+
"\"\"\"\n",
179+
"custom_rag_prompt = PromptTemplate.from_template(template)\n",
180+
"\n",
181+
"def format_docs(docs):\n",
182+
" return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
183+
"\n",
184+
"# Create chain \n",
185+
"rag_chain = (\n",
186+
" {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
187+
" | custom_rag_prompt\n",
188+
" | llm\n",
189+
" | StrOutputParser()\n",
190+
")\n",
191+
"\n",
192+
"# Prompt the chain\n",
193+
"question = \"What was MongoDB's latest acquisition?\"\n",
194+
"answer = rag_chain.invoke(question)\n",
195+
"\n",
196+
"# Return source documents\n",
197+
"documents = retriever.invoke(question)\n",
198+
"print(\"\\nSource documents:\")\n",
199+
"pprint.pprint(documents)"
200+
]
201+
}
202+
],
203+
"metadata": {
204+
"kernelspec": {
205+
"display_name": "Python 3",
206+
"language": "python",
207+
"name": "python3"
208+
},
209+
"language_info": {
210+
"codemirror_mode": {
211+
"name": "ipython",
212+
"version": 3
213+
},
214+
"file_extension": ".py",
215+
"mimetype": "text/x-python",
216+
"name": "python",
217+
"nbconvert_exporter": "python",
218+
"pygments_lexer": "ipython3",
219+
"version": "3.10.12"
220+
}
221+
},
222+
"nbformat": 4,
223+
"nbformat_minor": 2
224+
}

0 commit comments

Comments
 (0)