|
53 | 53 | "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
54 | 54 | "\n",
|
55 | 55 | "# Load the PDF\n",
|
56 |
| - "loader = PyPDFLoader(\"https://query.prod.cms.rt.microsoft.com/cms/api/am/binary/RE4HkJP\")\n", |
| 56 | + "loader = PyPDFLoader(\"https://investors.mongodb.com/node/13176/pdf\")\n", |
57 | 57 | "data = loader.load()\n",
|
58 | 58 | "\n",
|
59 | 59 | "# Split PDF into documents\n",
|
|
77 | 77 | "vector_store = MongoDBAtlasVectorSearch.from_connection_string(\n",
|
78 | 78 | " connection_string = ATLAS_CONNECTION_STRING,\n",
|
79 | 79 | " namespace = \"langchain_db.test\",\n",
|
80 |
| - " embedding = OpenAIEmbeddings(disallowed_special=()),\n", |
| 80 | + " embedding = OpenAIEmbeddings(model=\"text-embedding-3-large\"),\n", |
81 | 81 | " index_name = \"vector_index\"\n",
|
82 | 82 | ")\n",
|
83 | 83 | "\n",
|
|
91 | 91 | "metadata": {},
|
92 | 92 | "outputs": [],
|
93 | 93 | "source": [
|
| 94 | + "import time\n", |
| 95 | + "\n", |
94 | 96 | "# Use helper method to create the vector search index\n",
|
95 | 97 | "vector_store.create_vector_search_index(\n",
|
96 |
| - " dimensions = 1536, # The dimensions of the vector embeddings to be indexed\n", |
97 |
| - " filters = [ \"page\" ]\n", |
98 |
| - ")" |
| 98 | + " dimensions = 3072, # The dimensions of the vector embeddings to be indexed\n", |
| 99 | + " filters = [ \"page_label\" ]\n", |
| 100 | + ")\n", |
| 101 | + "\n", |
| 102 | + "# Wait for the index to build (this can take around a minute)\n", |
| 103 | + "time.sleep(60)" |
99 | 104 | ]
|
100 | 105 | },
|
101 | 106 | {
|
|
113 | 118 | "source": [
|
114 | 119 | "import pprint\n",
|
115 | 120 | "\n",
|
116 |
| - "query = \"MongoDB Atlas security\"\n", |
| 121 | + "query = \"MongoDB acquisition\"\n", |
117 | 122 | "results = vector_store.similarity_search(query)\n",
|
118 | 123 | "\n",
|
119 | 124 | "pprint.pprint(results)"
|
|
132 | 137 | "metadata": {},
|
133 | 138 | "outputs": [],
|
134 | 139 | "source": [
|
135 |
| - "query = \"MongoDB Atlas security\"\n", |
| 140 | + "query = \"MongoDB acquisition\"\n", |
136 | 141 | "results = vector_store.similarity_search_with_score(\n",
|
137 | 142 | " query = query, k = 3\n",
|
138 | 143 | ")\n",
|
|
153 | 158 | "metadata": {},
|
154 | 159 | "outputs": [],
|
155 | 160 | "source": [
|
156 |
| - "query = \"MongoDB Atlas security\"\n", |
| 161 | + "query = \"MongoDB acquisition\"\n", |
157 | 162 | "\n",
|
158 | 163 | "results = vector_store.similarity_search_with_score(\n",
|
159 | 164 | " query = query,\n",
|
160 | 165 | " k = 3,\n",
|
161 |
| - " pre_filter = { \"page\": { \"$eq\": 17 } }\n", |
| 166 | + " pre_filter = { \"page_label\": { \"$eq\": 2 } }\n", |
162 | 167 | ")\n",
|
163 | 168 | "\n",
|
164 | 169 | "pprint.pprint(results)"
|
|
200 | 205 | "\"\"\"\n",
|
201 | 206 | "custom_rag_prompt = PromptTemplate.from_template(template)\n",
|
202 | 207 | "\n",
|
203 |
| - "llm = ChatOpenAI()\n", |
| 208 | + "llm = ChatOpenAI(model=\"gpt-4o\")\n", |
204 | 209 | "\n",
|
205 | 210 | "def format_docs(docs):\n",
|
206 | 211 | " return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
|
|
214 | 219 | ")\n",
|
215 | 220 | "\n",
|
216 | 221 | "# Prompt the chain\n",
|
217 |
| - "question = \"How can I secure my MongoDB Atlas cluster?\"\n", |
| 222 | + "question = \"What was MongoDB's latest acquisition?\"\n", |
218 | 223 | "answer = rag_chain.invoke(question)\n",
|
219 | 224 | "\n",
|
220 | 225 | "print(\"Question: \" + question)\n",
|
|
245 | 250 | " search_kwargs = {\n",
|
246 | 251 | " \"k\": 10,\n",
|
247 | 252 | " \"score_threshold\": 0.75,\n",
|
248 |
| - " \"pre_filter\": { \"page\": { \"$eq\": 17 } }\n", |
| 253 | + " \"pre_filter\": { \"page_label\": { \"$eq\": 2 } }\n", |
249 | 254 | " }\n",
|
250 | 255 | ")\n",
|
251 | 256 | "\n",
|
|
261 | 266 | "\"\"\"\n",
|
262 | 267 | "custom_rag_prompt = PromptTemplate.from_template(template)\n",
|
263 | 268 | "\n",
|
264 |
| - "llm = ChatOpenAI()\n", |
| 269 | + "llm = ChatOpenAI(model=\"gpt-4o\")\n", |
265 | 270 | "\n",
|
266 | 271 | "def format_docs(docs):\n",
|
267 | 272 | " return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
|
|
275 | 280 | ")\n",
|
276 | 281 | "\n",
|
277 | 282 | "# Prompt the chain\n",
|
278 |
| - "question = \"How can I secure my MongoDB Atlas cluster?\"\n", |
| 283 | + "question = \"What was MongoDB's latest acquisition?\"\n", |
279 | 284 | "answer = rag_chain.invoke(question)\n",
|
280 | 285 | "\n",
|
281 | 286 | "print(\"Question: \" + question)\n",
|
|
0 commit comments