From 9020a934be35b0798c972eb77a22fb62ce654ca5 Mon Sep 17 00:00:00 2001 From: Farzad Sunavala <40604067+farzad528@users.noreply.github.com> Date: Fri, 24 Jan 2025 06:56:26 -0600 Subject: [PATCH] docs: add Azure RAG example (#675) Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Co-authored-by: Farzad Sunavala --- docs/examples/rag_azuresearch.ipynb | 893 ++++++++++++++++++++++++++++ mkdocs.yml | 10 +- 2 files changed, 899 insertions(+), 4 deletions(-) create mode 100644 docs/examples/rag_azuresearch.ipynb diff --git a/docs/examples/rag_azuresearch.ipynb b/docs/examples/rag_azuresearch.ipynb new file mode 100644 index 00000000..abbe3774 --- /dev/null +++ b/docs/examples/rag_azuresearch.ipynb @@ -0,0 +1,893 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Ag9kcX2B_atc" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# RAG with Azure AI Search" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "| Step | Tech | Execution |\n", + "| ------------------ | ------------------ | --------- |\n", + "| Embedding | Azure OpenAI | ๐ŸŒ Remote |\n", + "| Vector Store | Azure AI Search | ๐ŸŒ Remote |\n", + "| Gen AI | Azure OpenAI | ๐ŸŒ Remote |" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "This notebook demonstrates how to build a Retrieval-Augmented Generation (RAG) system using:\n", + "- [Docling](https://ds4sd.github.io/docling/) for document parsing and chunking\n", + "- [Azure AI Search](https://azure.microsoft.com/products/ai-services/ai-search/?msockid=0109678bea39665431e37323ebff6723) for vector indexing and retrieval\n", + "- [Azure OpenAI](https://azure.microsoft.com/products/ai-services/openai-service?msockid=0109678bea39665431e37323ebff6723) for embeddings and chat completion\n", + "\n", + "This sample demonstrates how to:\n", + "1. Parse a PDF with Docling.\n", + "2. Chunk the parsed text.\n", + "3. Use Azure OpenAI for embeddings.\n", + "4. Index and search in Azure AI Search.\n", + "5. Run a retrieval-augmented generation (RAG) query with Azure OpenAI GPT-4o.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# If running in a fresh environment (like Google Colab), uncomment and run this single command:\n", + "%pip install \"docling~=2.12\" azure-search-documents==11.5.2 azure-identity openai rich torch python-dotenv" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Part 0: Prerequisites\n", + " - **Azure AI Search** resource\n", + " - **Azure OpenAI** resource with a deployed embedding & chat completion model (e.g. `text-embedding-3-small` & `gpt-4o`) \n", + " - **Docling 2.12+** (installs `docling_core` automatically) Docling installed (Python 3.8+ environment)\n", + "\n", + "- A **GPU-enabled environment** is preferred for faster parsing. Docling 2.12 automatically detects GPU if present.\n", + " - If you only have CPU, parsing large PDFs can be slower. " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "from dotenv import load_dotenv\n", + "\n", + "load_dotenv()\n", + "\n", + "\n", + "def _get_env(key, default=None):\n", + " try:\n", + " from google.colab import userdata\n", + "\n", + " try:\n", + " return userdata.get(key)\n", + " except userdata.SecretNotFoundError:\n", + " pass\n", + " except ImportError:\n", + " pass\n", + " return os.getenv(key, default)\n", + "\n", + "\n", + "AZURE_SEARCH_ENDPOINT = _get_env(\"AZURE_SEARCH_ENDPOINT\")\n", + "AZURE_SEARCH_KEY = _get_env(\"AZURE_SEARCH_KEY\") # Ensure this your Admin Key\n", + "AZURE_SEARCH_INDEX_NAME = _get_env(\"AZURE_SEARCH_INDEX_NAME\", \"docling-rag-sample\")\n", + "AZURE_OPENAI_ENDPOINT = _get_env(\"AZURE_OPENAI_ENDPOINT\")\n", + "AZURE_OPENAI_API_KEY = _get_env(\"AZURE_OPENAI_API_KEY\")\n", + "AZURE_OPENAI_API_VERSION = _get_env(\"AZURE_OPENAI_API_VERSION\", \"2024-10-21\")\n", + "AZURE_OPENAI_CHAT_MODEL = _get_env(\n", + " \"AZURE_OPENAI_CHAT_MODEL\"\n", + ") # Using a deployed model named \"gpt-4o\"\n", + "AZURE_OPENAI_EMBEDDINGS = _get_env(\n", + " \"AZURE_OPENAI_EMBEDDINGS\", \"text-embedding-3-small\"\n", + ") # Using a deployed model named \"text-embeddings-3-small\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Part 1: Parse the PDF with Docling\n", + "\n", + "Weโ€™ll parse the **Microsoft GraphRAG Research Paper** (~15 pages). Parsing should be relatively quick, even on CPU, but it will be faster on a GPU or MPS device if available.\n", + "\n", + "*(If you prefer a different document, simply provide a different URL or local file path.)*" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Parsing a ~15-page PDF. The process should be relatively quick, even on CPU...\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;33mParsing a ~\u001b[0m\u001b[1;33m15\u001b[0m\u001b[1;33m-page PDF. The process should be relatively quick, even on CPU\u001b[0m\u001b[1;33m...\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Docling Markdown Preview โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ\n",
+       "โ”‚ ## From Local to Global: A Graph RAG Approach to Query-Focused Summarization                                    โ”‚\n",
+       "โ”‚                                                                                                                 โ”‚\n",
+       "โ”‚ Darren Edge 1โ€                                                                                                   โ”‚\n",
+       "โ”‚                                                                                                                 โ”‚\n",
+       "โ”‚ Ha Trinh 1โ€                                                                                                      โ”‚\n",
+       "โ”‚                                                                                                                 โ”‚\n",
+       "โ”‚ Newman Cheng 2                                                                                                  โ”‚\n",
+       "โ”‚                                                                                                                 โ”‚\n",
+       "โ”‚ Joshua Bradley 2                                                                                                โ”‚\n",
+       "โ”‚                                                                                                                 โ”‚\n",
+       "โ”‚ Alex Chao 3                                                                                                     โ”‚\n",
+       "โ”‚                                                                                                                 โ”‚\n",
+       "โ”‚ Apurva Mody 3                                                                                                   โ”‚\n",
+       "โ”‚                                                                                                                 โ”‚\n",
+       "โ”‚ Steven Truitt 2                                                                                                 โ”‚\n",
+       "โ”‚                                                                                                                 โ”‚\n",
+       "โ”‚ ## Jonathan Larson 1                                                                                            โ”‚\n",
+       "โ”‚                                                                                                                 โ”‚\n",
+       "โ”‚ 1 Microsoft Research 2 Microsoft Strategic Missions and Technologies 3 Microsoft Office of the CTO              โ”‚\n",
+       "โ”‚                                                                                                                 โ”‚\n",
+       "โ”‚ { daedge,trinhha,newmancheng,joshbradley,achao,moapurva,steventruitt,jolarso } @microsoft.com                   โ”‚\n",
+       "โ”‚                                                                                                                 โ”‚\n",
+       "โ”‚ โ€  These authors contributed equally to this work                                                                โ”‚\n",
+       "โ”‚                                                                                                                 โ”‚\n",
+       "โ”‚ ## Abstract                                                                                                     โ”‚\n",
+       "โ”‚                                                                                                                 โ”‚\n",
+       "โ”‚ The use of retrieval-augmented gen...                                                                           โ”‚\n",
+       "โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ\n",
+       "
\n" + ], + "text/plain": [ + "โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Docling Markdown Preview โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ\n", + "โ”‚ ## From Local to Global: A Graph RAG Approach to Query-Focused Summarization โ”‚\n", + "โ”‚ โ”‚\n", + "โ”‚ Darren Edge 1โ€  โ”‚\n", + "โ”‚ โ”‚\n", + "โ”‚ Ha Trinh 1โ€  โ”‚\n", + "โ”‚ โ”‚\n", + "โ”‚ Newman Cheng 2 โ”‚\n", + "โ”‚ โ”‚\n", + "โ”‚ Joshua Bradley 2 โ”‚\n", + "โ”‚ โ”‚\n", + "โ”‚ Alex Chao 3 โ”‚\n", + "โ”‚ โ”‚\n", + "โ”‚ Apurva Mody 3 โ”‚\n", + "โ”‚ โ”‚\n", + "โ”‚ Steven Truitt 2 โ”‚\n", + "โ”‚ โ”‚\n", + "โ”‚ ## Jonathan Larson 1 โ”‚\n", + "โ”‚ โ”‚\n", + "โ”‚ 1 Microsoft Research 2 Microsoft Strategic Missions and Technologies 3 Microsoft Office of the CTO โ”‚\n", + "โ”‚ โ”‚\n", + "โ”‚ { daedge,trinhha,newmancheng,joshbradley,achao,moapurva,steventruitt,jolarso } @microsoft.com โ”‚\n", + "โ”‚ โ”‚\n", + "โ”‚ โ€  These authors contributed equally to this work โ”‚\n", + "โ”‚ โ”‚\n", + "โ”‚ ## Abstract โ”‚\n", + "โ”‚ โ”‚\n", + "โ”‚ The use of retrieval-augmented gen... โ”‚\n", + "โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from rich.console import Console\n", + "from rich.panel import Panel\n", + "\n", + "from docling.document_converter import DocumentConverter\n", + "\n", + "console = Console()\n", + "\n", + "# This URL points to the Microsoft GraphRAG Research Paper (arXiv: 2404.16130), ~15 pages\n", + "source_url = \"https://arxiv.org/pdf/2404.16130\"\n", + "\n", + "console.print(\n", + " \"[bold yellow]Parsing a ~15-page PDF. The process should be relatively quick, even on CPU...[/bold yellow]\"\n", + ")\n", + "converter = DocumentConverter()\n", + "result = converter.convert(source_url)\n", + "\n", + "# Optional: preview the parsed Markdown\n", + "md_preview = result.document.export_to_markdown()\n", + "console.print(Panel(md_preview[:500] + \"...\", title=\"Docling Markdown Preview\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Part 3: Hierarchical Chunking\n", + "We convert the `Document` into smaller chunks for embedding & indexing. The built-in `HierarchicalChunker` preserves structure. " + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Total chunks from PDF: 106\n",
+       "
\n" + ], + "text/plain": [ + "Total chunks from PDF: \u001b[1;36m106\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from docling.chunking import HierarchicalChunker\n", + "\n", + "chunker = HierarchicalChunker()\n", + "doc_chunks = list(chunker.chunk(result.document))\n", + "\n", + "all_chunks = []\n", + "for idx, c in enumerate(doc_chunks):\n", + " chunk_text = c.text\n", + " all_chunks.append((f\"chunk_{idx}\", chunk_text))\n", + "\n", + "console.print(f\"Total chunks from PDF: {len(all_chunks)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Part 3: Create Azure Search Index and Push Chunk Embeddings\n", + "Weโ€™ll define a vector index in Azure AI Search, then embed each chunk using Azure OpenAI and upload in batches." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Index 'docling-rag-sample-2' created.\n",
+       "
\n" + ], + "text/plain": [ + "Index \u001b[32m'docling-rag-sample-2'\u001b[0m created.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from azure.core.credentials import AzureKeyCredential\n", + "from azure.search.documents.indexes import SearchIndexClient\n", + "from azure.search.documents.indexes.models import (\n", + " AzureOpenAIVectorizer,\n", + " AzureOpenAIVectorizerParameters,\n", + " HnswAlgorithmConfiguration,\n", + " SearchableField,\n", + " SearchField,\n", + " SearchFieldDataType,\n", + " SearchIndex,\n", + " SimpleField,\n", + " VectorSearch,\n", + " VectorSearchProfile,\n", + ")\n", + "from rich.console import Console\n", + "\n", + "console = Console()\n", + "\n", + "VECTOR_DIM = 1536 # Adjust based on your chosen embeddings model\n", + "\n", + "index_client = SearchIndexClient(\n", + " AZURE_SEARCH_ENDPOINT, AzureKeyCredential(AZURE_SEARCH_KEY)\n", + ")\n", + "\n", + "\n", + "def create_search_index(index_name: str):\n", + " # Define fields\n", + " fields = [\n", + " SimpleField(name=\"chunk_id\", type=SearchFieldDataType.String, key=True),\n", + " SearchableField(name=\"content\", type=SearchFieldDataType.String),\n", + " SearchField(\n", + " name=\"content_vector\",\n", + " type=SearchFieldDataType.Collection(SearchFieldDataType.Single),\n", + " searchable=True,\n", + " filterable=False,\n", + " sortable=False,\n", + " facetable=False,\n", + " vector_search_dimensions=VECTOR_DIM,\n", + " vector_search_profile_name=\"default\",\n", + " ),\n", + " ]\n", + " # Vector search config with an AzureOpenAIVectorizer\n", + " vector_search = VectorSearch(\n", + " algorithms=[HnswAlgorithmConfiguration(name=\"default\")],\n", + " profiles=[\n", + " VectorSearchProfile(\n", + " name=\"default\",\n", + " algorithm_configuration_name=\"default\",\n", + " vectorizer_name=\"default\",\n", + " )\n", + " ],\n", + " vectorizers=[\n", + " AzureOpenAIVectorizer(\n", + " vectorizer_name=\"default\",\n", + " parameters=AzureOpenAIVectorizerParameters(\n", + " resource_url=AZURE_OPENAI_ENDPOINT,\n", + " deployment_name=AZURE_OPENAI_EMBEDDINGS,\n", + " model_name=\"text-embedding-3-small\",\n", + " api_key=AZURE_OPENAI_API_KEY,\n", + " ),\n", + " )\n", + " ],\n", + " )\n", + "\n", + " # Create or update the index\n", + " new_index = SearchIndex(name=index_name, fields=fields, vector_search=vector_search)\n", + " try:\n", + " index_client.delete_index(index_name)\n", + " except:\n", + " pass\n", + "\n", + " index_client.create_or_update_index(new_index)\n", + " console.print(f\"Index '{index_name}' created.\")\n", + "\n", + "\n", + "create_search_index(AZURE_SEARCH_INDEX_NAME)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Embed and Upsert to Azure AI Search\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Uploaded batch 0 -> 50; all_succeeded: True, first_doc_status_code: 201\n",
+       "
\n" + ], + "text/plain": [ + "Uploaded batch \u001b[1;36m0\u001b[0m -> \u001b[1;36m50\u001b[0m; all_succeeded: \u001b[3;92mTrue\u001b[0m, first_doc_status_code: \u001b[1;36m201\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Uploaded batch 50 -> 100; all_succeeded: True, first_doc_status_code: 201\n",
+       "
\n" + ], + "text/plain": [ + "Uploaded batch \u001b[1;36m50\u001b[0m -> \u001b[1;36m100\u001b[0m; all_succeeded: \u001b[3;92mTrue\u001b[0m, first_doc_status_code: \u001b[1;36m201\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Uploaded batch 100 -> 106; all_succeeded: True, first_doc_status_code: 201\n",
+       "
\n" + ], + "text/plain": [ + "Uploaded batch \u001b[1;36m100\u001b[0m -> \u001b[1;36m106\u001b[0m; all_succeeded: \u001b[3;92mTrue\u001b[0m, first_doc_status_code: \u001b[1;36m201\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
All chunks uploaded to Azure Search.\n",
+       "
\n" + ], + "text/plain": [ + "All chunks uploaded to Azure Search.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from azure.search.documents import SearchClient\n", + "from openai import AzureOpenAI\n", + "\n", + "search_client = SearchClient(\n", + " AZURE_SEARCH_ENDPOINT, AZURE_SEARCH_INDEX_NAME, AzureKeyCredential(AZURE_SEARCH_KEY)\n", + ")\n", + "openai_client = AzureOpenAI(\n", + " api_key=AZURE_OPENAI_API_KEY,\n", + " api_version=AZURE_OPENAI_API_VERSION,\n", + " azure_endpoint=AZURE_OPENAI_ENDPOINT,\n", + ")\n", + "\n", + "\n", + "def embed_text(text: str):\n", + " \"\"\"\n", + " Helper to generate embeddings with Azure OpenAI.\n", + " \"\"\"\n", + " response = openai_client.embeddings.create(\n", + " input=text, model=AZURE_OPENAI_EMBEDDINGS\n", + " )\n", + " return response.data[0].embedding\n", + "\n", + "\n", + "upload_docs = []\n", + "for chunk_id, chunk_text in all_chunks:\n", + " embedding_vector = embed_text(chunk_text)\n", + " upload_docs.append(\n", + " {\n", + " \"chunk_id\": chunk_id,\n", + " \"content\": chunk_text,\n", + " \"content_vector\": embedding_vector,\n", + " }\n", + " )\n", + "\n", + "\n", + "BATCH_SIZE = 50\n", + "for i in range(0, len(upload_docs), BATCH_SIZE):\n", + " subset = upload_docs[i : i + BATCH_SIZE]\n", + " resp = search_client.upload_documents(documents=subset)\n", + "\n", + " all_succeeded = all(r.succeeded for r in resp)\n", + " console.print(\n", + " f\"Uploaded batch {i} -> {i+len(subset)}; all_succeeded: {all_succeeded}, \"\n", + " f\"first_doc_status_code: {resp[0].status_code}\"\n", + " )\n", + "\n", + "console.print(\"All chunks uploaded to Azure Search.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Part 4: RAG Query with Azure OpenAI\n", + "Combine retrieval from Azure Search with Chat Completions (aka. grounding your LLM)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ RAG Prompt โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ\n",
+       "โ”‚                                                                                                                 โ”‚\n",
+       "โ”‚ You are an AI assistant helping answering questions about Microsoft GraphRAG.                                   โ”‚\n",
+       "โ”‚ Use ONLY the text below to answer the user's question.                                                          โ”‚\n",
+       "โ”‚ If the answer isn't in the text, say you don't know.                                                            โ”‚\n",
+       "โ”‚                                                                                                                 โ”‚\n",
+       "โ”‚ Context:                                                                                                        โ”‚\n",
+       "โ”‚ Community summaries vs. source texts. When comparing community summaries to source texts using Graph RAG,       โ”‚\n",
+       "โ”‚ community summaries generally provided a small but consistent improvement in answer comprehensiveness and       โ”‚\n",
+       "โ”‚ diversity, except for root-level summaries. Intermediate-level summaries in the Podcast dataset and low-level   โ”‚\n",
+       "โ”‚ community summaries in the News dataset achieved comprehensiveness win rates of 57% and 64%, respectively.      โ”‚\n",
+       "โ”‚ Diversity win rates were 57% for Podcast intermediate-level summaries and 60% for News low-level community      โ”‚\n",
+       "โ”‚ summaries. Table 3 also illustrates the scalability advantages of Graph RAG compared to source text             โ”‚\n",
+       "โ”‚ summarization: for low-level community summaries ( C3 ), Graph RAG required 26-33% fewer context tokens, while  โ”‚\n",
+       "โ”‚ for root-level community summaries ( C0 ), it required over 97% fewer tokens. For a modest drop in performance  โ”‚\n",
+       "โ”‚ compared with other global methods, root-level Graph RAG offers a highly efficient method for the iterative     โ”‚\n",
+       "โ”‚ question answering that characterizes sensemaking activity, while retaining advantages in comprehensiveness     โ”‚\n",
+       "โ”‚ (72% win rate) and diversity (62% win rate) over naยจฤฑve RAG.                                                    โ”‚\n",
+       "โ”‚ ---                                                                                                             โ”‚\n",
+       "โ”‚ We have presented a global approach to Graph RAG, combining knowledge graph generation, retrieval-augmented     โ”‚\n",
+       "โ”‚ generation (RAG), and query-focused summarization (QFS) to support human sensemaking over entire text corpora.  โ”‚\n",
+       "โ”‚ Initial evaluations show substantial improvements over a naยจฤฑve RAG baseline for both the comprehensiveness and โ”‚\n",
+       "โ”‚ diversity of answers, as well as favorable comparisons to a global but graph-free approach using map-reduce     โ”‚\n",
+       "โ”‚ source text summarization. For situations requiring many global queries over the same dataset, summaries of     โ”‚\n",
+       "โ”‚ root-level communities in the entity-based graph index provide a data index that is both superior to naยจฤฑve RAG โ”‚\n",
+       "โ”‚ and achieves competitive performance to other global methods at a fraction of the token cost.                   โ”‚\n",
+       "โ”‚ ---                                                                                                             โ”‚\n",
+       "โ”‚ Trade-offs of building a graph index . We consistently observed Graph RAG achieve the best headto-head results  โ”‚\n",
+       "โ”‚ against other methods, but in many cases the graph-free approach to global summarization of source texts        โ”‚\n",
+       "โ”‚ performed competitively. The real-world decision about whether to invest in building a graph index depends on   โ”‚\n",
+       "โ”‚ multiple factors, including the compute budget, expected number of lifetime queries per dataset, and value      โ”‚\n",
+       "โ”‚ obtained from other aspects of the graph index (including the generic community summaries and the use of other  โ”‚\n",
+       "โ”‚ graph-related RAG approaches).                                                                                  โ”‚\n",
+       "โ”‚ ---                                                                                                             โ”‚\n",
+       "โ”‚ Future work . The graph index, rich text annotations, and hierarchical community structure supporting the       โ”‚\n",
+       "โ”‚ current Graph RAG approach offer many possibilities for refinement and adaptation. This includes RAG approaches โ”‚\n",
+       "โ”‚ that operate in a more local manner, via embedding-based matching of user queries and graph annotations, as     โ”‚\n",
+       "โ”‚ well as the possibility of hybrid RAG schemes that combine embedding-based matching against community reports   โ”‚\n",
+       "โ”‚ before employing our map-reduce summarization mechanisms. This 'roll-up' operation could also be extended       โ”‚\n",
+       "โ”‚ across more levels of the community hierarchy, as well as implemented as a more exploratory 'drill down'        โ”‚\n",
+       "โ”‚ mechanism that follows the information scent contained in higher-level community summaries.                     โ”‚\n",
+       "โ”‚ ---                                                                                                             โ”‚\n",
+       "โ”‚ Advanced RAG systems include pre-retrieval, retrieval, post-retrieval strategies designed to overcome the       โ”‚\n",
+       "โ”‚ drawbacks of Naยจฤฑve RAG, while Modular RAG systems include patterns for iterative and dynamic cycles of         โ”‚\n",
+       "โ”‚ interleaved retrieval and generation (Gao et al., 2023). Our implementation of Graph RAG incorporates multiple  โ”‚\n",
+       "โ”‚ concepts related to other systems. For example, our community summaries are a kind of self-memory (Selfmem,     โ”‚\n",
+       "โ”‚ Cheng et al., 2024) for generation-augmented retrieval (GAR, Mao et al., 2020) that facilitates future          โ”‚\n",
+       "โ”‚ generation cycles, while our parallel generation of community answers from these summaries is a kind of         โ”‚\n",
+       "โ”‚ iterative (Iter-RetGen, Shao et al., 2023) or federated (FeB4RAG, Wang et al., 2024) retrieval-generation       โ”‚\n",
+       "โ”‚ strategy. Other systems have also combined these concepts for multi-document summarization (CAiRE-COVID, Su et  โ”‚\n",
+       "โ”‚ al., 2020) and multi-hop question answering (ITRG, Feng et al., 2023; IR-CoT, Trivedi et al., 2022; DSP,        โ”‚\n",
+       "โ”‚ Khattab et al., 2022). Our use of a hierarchical index and summarization also bears resemblance to further      โ”‚\n",
+       "โ”‚ approaches, such as generating a hierarchical index of text chunks by clustering the vectors of text embeddings โ”‚\n",
+       "โ”‚ (RAPTOR, Sarthi et al., 2024) or generating a 'tree of clarifications' to answer multiple interpretations of    โ”‚\n",
+       "โ”‚ ambiguous questions (Kim et al., 2023). However, none of these iterative or hierarchical approaches use the     โ”‚\n",
+       "โ”‚ kind of self-generated graph index that enables Graph RAG.                                                      โ”‚\n",
+       "โ”‚ ---                                                                                                             โ”‚\n",
+       "โ”‚ The use of retrieval-augmented generation (RAG) to retrieve relevant information from an external knowledge     โ”‚\n",
+       "โ”‚ source enables large language models (LLMs) to answer questions over private and/or previously unseen document  โ”‚\n",
+       "โ”‚ collections. However, RAG fails on global questions directed at an entire text corpus, such as 'What are the    โ”‚\n",
+       "โ”‚ main themes in the dataset?', since this is inherently a queryfocused summarization (QFS) task, rather than an  โ”‚\n",
+       "โ”‚ explicit retrieval task. Prior QFS methods, meanwhile, fail to scale to the quantities of text indexed by       โ”‚\n",
+       "โ”‚ typical RAGsystems. To combine the strengths of these contrasting methods, we propose a Graph RAG approach to   โ”‚\n",
+       "โ”‚ question answering over private text corpora that scales with both the generality of user questions and the     โ”‚\n",
+       "โ”‚ quantity of source text to be indexed. Our approach uses an LLM to build a graph-based text index in two        โ”‚\n",
+       "โ”‚ stages: first to derive an entity knowledge graph from the source documents, then to pregenerate community      โ”‚\n",
+       "โ”‚ summaries for all groups of closely-related entities. Given a question, each community summary is used to       โ”‚\n",
+       "โ”‚ generate a partial response, before all partial responses are again summarized in a final response to the user. โ”‚\n",
+       "โ”‚ For a class of global sensemaking questions over datasets in the 1 million token range, we show that Graph RAG  โ”‚\n",
+       "โ”‚ leads to substantial improvements over a naยจฤฑve RAG baseline for both the comprehensiveness and diversity of    โ”‚\n",
+       "โ”‚ generated answers. An open-source, Python-based implementation of both global and local Graph RAG approaches is โ”‚\n",
+       "โ”‚ forthcoming at https://aka . ms/graphrag .                                                                      โ”‚\n",
+       "โ”‚ ---                                                                                                             โ”‚\n",
+       "โ”‚ Given the multi-stage nature of our Graph RAG mechanism, the multiple conditions we wanted to compare, and the  โ”‚\n",
+       "โ”‚ lack of gold standard answers to our activity-based sensemaking questions, we decided to adopt a head-to-head   โ”‚\n",
+       "โ”‚ comparison approach using an LLM evaluator. We selected three target metrics capturing qualities that are       โ”‚\n",
+       "โ”‚ desirable for sensemaking activities, as well as a control metric (directness) used as a indicator of validity. โ”‚\n",
+       "โ”‚ Since directness is effectively in opposition to comprehensiveness and diversity, we would not expect any       โ”‚\n",
+       "โ”‚ method to win across all four metrics.                                                                          โ”‚\n",
+       "โ”‚ ---                                                                                                             โ”‚\n",
+       "โ”‚ Figure 1: Graph RAG pipeline using an LLM-derived graph index of source document text. This index spans nodes   โ”‚\n",
+       "โ”‚ (e.g., entities), edges (e.g., relationships), and covariates (e.g., claims) that have been detected,           โ”‚\n",
+       "โ”‚ extracted, and summarized by LLM prompts tailored to the domain of the dataset. Community detection (e.g.,      โ”‚\n",
+       "โ”‚ Leiden, Traag et al., 2019) is used to partition the graph index into groups of elements (nodes, edges,         โ”‚\n",
+       "โ”‚ covariates) that the LLM can summarize in parallel at both indexing time and query time. The 'global answer' to โ”‚\n",
+       "โ”‚ a given query is produced using a final round of query-focused summarization over all community summaries       โ”‚\n",
+       "โ”‚ reporting relevance to that query.                                                                              โ”‚\n",
+       "โ”‚ ---                                                                                                             โ”‚\n",
+       "โ”‚ Retrieval-augmented generation (RAG, Lewis et al., 2020) is an established approach to answering user questions โ”‚\n",
+       "โ”‚ over entire datasets, but it is designed for situations where these answers are contained locally within        โ”‚\n",
+       "โ”‚ regions of text whose retrieval provides sufficient grounding for the generation task. Instead, a more          โ”‚\n",
+       "โ”‚ appropriate task framing is query-focused summarization (QFS, Dang, 2006), and in particular, query-focused     โ”‚\n",
+       "โ”‚ abstractive summarization that generates natural language summaries and not just concatenated excerpts (Baumel  โ”‚\n",
+       "โ”‚ et al., 2018; Laskar et al., 2020; Yao et al., 2017) . In recent years, however, such distinctions between      โ”‚\n",
+       "โ”‚ summarization tasks that are abstractive versus extractive, generic versus query-focused, and single-document   โ”‚\n",
+       "โ”‚ versus multi-document, have become less relevant. While early applications of the transformer architecture      โ”‚\n",
+       "โ”‚ showed substantial improvements on the state-of-the-art for all such summarization tasks (Goodwin et al., 2020; โ”‚\n",
+       "โ”‚ Laskar et al., 2022; Liu and Lapata, 2019), these tasks are now trivialized by modern LLMs, including the GPT   โ”‚\n",
+       "โ”‚ (Achiam et al., 2023; Brown et al., 2020), Llama (Touvron et al., 2023), and Gemini (Anil et al., 2023) series, โ”‚\n",
+       "โ”‚ all of which can use in-context learning to summarize any content provided in their context window.             โ”‚\n",
+       "โ”‚ ---                                                                                                             โ”‚\n",
+       "โ”‚ community descriptions provide complete coverage of the underlying graph index and the input documents it       โ”‚\n",
+       "โ”‚ represents. Query-focused summarization of an entire corpus is then made possible using a map-reduce approach:  โ”‚\n",
+       "โ”‚ first using each community summary to answer the query independently and in parallel, then summarizing all      โ”‚\n",
+       "โ”‚ relevant partial answers into a final global answer.                                                            โ”‚\n",
+       "โ”‚                                                                                                                 โ”‚\n",
+       "โ”‚ Question: What are the main advantages of using the Graph RAG approach for query-focused summarization compared โ”‚\n",
+       "โ”‚ to traditional RAG methods?                                                                                     โ”‚\n",
+       "โ”‚ Answer:                                                                                                         โ”‚\n",
+       "โ”‚                                                                                                                 โ”‚\n",
+       "โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;31mโ•ญโ”€\u001b[0m\u001b[1;31mโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\u001b[0m RAG Prompt \u001b[1;31mโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\u001b[0m\u001b[1;31mโ”€โ•ฎ\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mYou are an AI assistant helping answering questions about Microsoft GraphRAG.\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mUse ONLY the text below to answer the user's question.\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mIf the answer isn't in the text, say you don't know.\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mContext:\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mCommunity summaries vs. source texts. When comparing community summaries to source texts using Graph RAG, \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mcommunity summaries generally provided a small but consistent improvement in answer comprehensiveness and \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mdiversity, except for root-level summaries. Intermediate-level summaries in the Podcast dataset and low-level \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mcommunity summaries in the News dataset achieved comprehensiveness win rates of 57% and 64%, respectively. \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mDiversity win rates were 57% for Podcast intermediate-level summaries and 60% for News low-level community \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31msummaries. Table 3 also illustrates the scalability advantages of Graph RAG compared to source text \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31msummarization: for low-level community summaries ( C3 ), Graph RAG required 26-33% fewer context tokens, while \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mfor root-level community summaries ( C0 ), it required over 97% fewer tokens. For a modest drop in performance \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mcompared with other global methods, root-level Graph RAG offers a highly efficient method for the iterative \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mquestion answering that characterizes sensemaking activity, while retaining advantages in comprehensiveness \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m(72% win rate) and diversity (62% win rate) over naยจฤฑve RAG.\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m---\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mWe have presented a global approach to Graph RAG, combining knowledge graph generation, retrieval-augmented \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mgeneration (RAG), and query-focused summarization (QFS) to support human sensemaking over entire text corpora. \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mInitial evaluations show substantial improvements over a naยจฤฑve RAG baseline for both the comprehensiveness and\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mdiversity of answers, as well as favorable comparisons to a global but graph-free approach using map-reduce \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31msource text summarization. For situations requiring many global queries over the same dataset, summaries of \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mroot-level communities in the entity-based graph index provide a data index that is both superior to naยจฤฑve RAG\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mand achieves competitive performance to other global methods at a fraction of the token cost.\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m---\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mTrade-offs of building a graph index . We consistently observed Graph RAG achieve the best headto-head results \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31magainst other methods, but in many cases the graph-free approach to global summarization of source texts \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mperformed competitively. The real-world decision about whether to invest in building a graph index depends on \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mmultiple factors, including the compute budget, expected number of lifetime queries per dataset, and value \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mobtained from other aspects of the graph index (including the generic community summaries and the use of other \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mgraph-related RAG approaches).\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m---\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mFuture work . The graph index, rich text annotations, and hierarchical community structure supporting the \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mcurrent Graph RAG approach offer many possibilities for refinement and adaptation. This includes RAG approaches\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mthat operate in a more local manner, via embedding-based matching of user queries and graph annotations, as \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mwell as the possibility of hybrid RAG schemes that combine embedding-based matching against community reports \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mbefore employing our map-reduce summarization mechanisms. This 'roll-up' operation could also be extended \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31macross more levels of the community hierarchy, as well as implemented as a more exploratory 'drill down' \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mmechanism that follows the information scent contained in higher-level community summaries.\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m---\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mAdvanced RAG systems include pre-retrieval, retrieval, post-retrieval strategies designed to overcome the \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mdrawbacks of Naยจฤฑve RAG, while Modular RAG systems include patterns for iterative and dynamic cycles of \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31minterleaved retrieval and generation (Gao et al., 2023). Our implementation of Graph RAG incorporates multiple \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mconcepts related to other systems. For example, our community summaries are a kind of self-memory (Selfmem, \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mCheng et al., 2024) for generation-augmented retrieval (GAR, Mao et al., 2020) that facilitates future \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mgeneration cycles, while our parallel generation of community answers from these summaries is a kind of \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31miterative (Iter-RetGen, Shao et al., 2023) or federated (FeB4RAG, Wang et al., 2024) retrieval-generation \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mstrategy. Other systems have also combined these concepts for multi-document summarization (CAiRE-COVID, Su et \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mal., 2020) and multi-hop question answering (ITRG, Feng et al., 2023; IR-CoT, Trivedi et al., 2022; DSP, \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mKhattab et al., 2022). Our use of a hierarchical index and summarization also bears resemblance to further \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mapproaches, such as generating a hierarchical index of text chunks by clustering the vectors of text embeddings\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m(RAPTOR, Sarthi et al., 2024) or generating a 'tree of clarifications' to answer multiple interpretations of \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mambiguous questions (Kim et al., 2023). However, none of these iterative or hierarchical approaches use the \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mkind of self-generated graph index that enables Graph RAG.\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m---\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mThe use of retrieval-augmented generation (RAG) to retrieve relevant information from an external knowledge \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31msource enables large language models (LLMs) to answer questions over private and/or previously unseen document \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mcollections. However, RAG fails on global questions directed at an entire text corpus, such as 'What are the \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mmain themes in the dataset?', since this is inherently a queryfocused summarization (QFS) task, rather than an \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mexplicit retrieval task. Prior QFS methods, meanwhile, fail to scale to the quantities of text indexed by \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mtypical RAGsystems. To combine the strengths of these contrasting methods, we propose a Graph RAG approach to \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mquestion answering over private text corpora that scales with both the generality of user questions and the \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mquantity of source text to be indexed. Our approach uses an LLM to build a graph-based text index in two \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mstages: first to derive an entity knowledge graph from the source documents, then to pregenerate community \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31msummaries for all groups of closely-related entities. Given a question, each community summary is used to \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mgenerate a partial response, before all partial responses are again summarized in a final response to the user.\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mFor a class of global sensemaking questions over datasets in the 1 million token range, we show that Graph RAG \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mleads to substantial improvements over a naยจฤฑve RAG baseline for both the comprehensiveness and diversity of \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mgenerated answers. An open-source, Python-based implementation of both global and local Graph RAG approaches is\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mforthcoming at https://aka . ms/graphrag .\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m---\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mGiven the multi-stage nature of our Graph RAG mechanism, the multiple conditions we wanted to compare, and the \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mlack of gold standard answers to our activity-based sensemaking questions, we decided to adopt a head-to-head \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mcomparison approach using an LLM evaluator. We selected three target metrics capturing qualities that are \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mdesirable for sensemaking activities, as well as a control metric (directness) used as a indicator of validity.\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mSince directness is effectively in opposition to comprehensiveness and diversity, we would not expect any \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mmethod to win across all four metrics.\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m---\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mFigure 1: Graph RAG pipeline using an LLM-derived graph index of source document text. This index spans nodes \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m(e.g., entities), edges (e.g., relationships), and covariates (e.g., claims) that have been detected, \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mextracted, and summarized by LLM prompts tailored to the domain of the dataset. Community detection (e.g., \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mLeiden, Traag et al., 2019) is used to partition the graph index into groups of elements (nodes, edges, \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mcovariates) that the LLM can summarize in parallel at both indexing time and query time. The 'global answer' to\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31ma given query is produced using a final round of query-focused summarization over all community summaries \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mreporting relevance to that query.\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m---\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mRetrieval-augmented generation (RAG, Lewis et al., 2020) is an established approach to answering user questions\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mover entire datasets, but it is designed for situations where these answers are contained locally within \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mregions of text whose retrieval provides sufficient grounding for the generation task. Instead, a more \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mappropriate task framing is query-focused summarization (QFS, Dang, 2006), and in particular, query-focused \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mabstractive summarization that generates natural language summaries and not just concatenated excerpts (Baumel \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31met al., 2018; Laskar et al., 2020; Yao et al., 2017) . In recent years, however, such distinctions between \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31msummarization tasks that are abstractive versus extractive, generic versus query-focused, and single-document \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mversus multi-document, have become less relevant. While early applications of the transformer architecture \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mshowed substantial improvements on the state-of-the-art for all such summarization tasks (Goodwin et al., 2020;\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mLaskar et al., 2022; Liu and Lapata, 2019), these tasks are now trivialized by modern LLMs, including the GPT \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m(Achiam et al., 2023; Brown et al., 2020), Llama (Touvron et al., 2023), and Gemini (Anil et al., 2023) series,\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mall of which can use in-context learning to summarize any content provided in their context window.\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m---\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mcommunity descriptions provide complete coverage of the underlying graph index and the input documents it \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mrepresents. Query-focused summarization of an entire corpus is then made possible using a map-reduce approach: \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mfirst using each community summary to answer the query independently and in parallel, then summarizing all \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mrelevant partial answers into a final global answer.\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mQuestion: What are the main advantages of using the Graph RAG approach for query-focused summarization compared\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mto traditional RAG methods?\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mAnswer:\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ”‚\u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31m \u001b[0m\u001b[1;31mโ”‚\u001b[0m\n", + "\u001b[1;31mโ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ RAG Response โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ\n",
+       "โ”‚ The main advantages of using the Graph RAG approach for query-focused summarization compared to traditional RAG โ”‚\n",
+       "โ”‚ methods include:                                                                                                โ”‚\n",
+       "โ”‚                                                                                                                 โ”‚\n",
+       "โ”‚ 1. **Improved Comprehensiveness and Diversity**: Graph RAG shows substantial improvements over a naรฏve RAG      โ”‚\n",
+       "โ”‚ baseline in terms of the comprehensiveness and diversity of answers. This is particularly beneficial for global โ”‚\n",
+       "โ”‚ sensemaking questions over large datasets.                                                                      โ”‚\n",
+       "โ”‚                                                                                                                 โ”‚\n",
+       "โ”‚ 2. **Scalability**: Graph RAG provides scalability advantages, achieving efficient summarization with           โ”‚\n",
+       "โ”‚ significantly fewer context tokens required. For instance, it requires 26-33% fewer tokens for low-level        โ”‚\n",
+       "โ”‚ community summaries and over 97% fewer tokens for root-level summaries compared to source text summarization.   โ”‚\n",
+       "โ”‚                                                                                                                 โ”‚\n",
+       "โ”‚ 3. **Efficiency in Iterative Question Answering**: Root-level Graph RAG offers a highly efficient method for    โ”‚\n",
+       "โ”‚ iterative question answering, which is crucial for sensemaking activities, with only a modest drop in           โ”‚\n",
+       "โ”‚ performance compared to other global methods.                                                                   โ”‚\n",
+       "โ”‚                                                                                                                 โ”‚\n",
+       "โ”‚ 4. **Global Query Handling**: It supports handling global queries effectively, as it combines knowledge graph   โ”‚\n",
+       "โ”‚ generation, retrieval-augmented generation, and query-focused summarization, making it suitable for sensemaking โ”‚\n",
+       "โ”‚ over entire text corpora.                                                                                       โ”‚\n",
+       "โ”‚                                                                                                                 โ”‚\n",
+       "โ”‚ 5. **Hierarchical Indexing and Summarization**: The use of a hierarchical index and summarization allows for    โ”‚\n",
+       "โ”‚ efficient processing and summarizing of community summaries into a final global answer, facilitating a          โ”‚\n",
+       "โ”‚ comprehensive coverage of the underlying graph index and input documents.                                       โ”‚\n",
+       "โ”‚                                                                                                                 โ”‚\n",
+       "โ”‚ 6. **Reduced Token Cost**: For situations requiring many global queries over the same dataset, Graph RAG        โ”‚\n",
+       "โ”‚ achieves competitive performance to other global methods at a fraction of the token cost.                       โ”‚\n",
+       "โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1;32mโ•ญโ”€\u001b[0m\u001b[1;32mโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\u001b[0m RAG Response \u001b[1;32mโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\u001b[0m\u001b[1;32mโ”€โ•ฎ\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mThe main advantages of using the Graph RAG approach for query-focused summarization compared to traditional RAG\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mmethods include:\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m1. **Improved Comprehensiveness and Diversity**: Graph RAG shows substantial improvements over a naรฏve RAG \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mbaseline in terms of the comprehensiveness and diversity of answers. This is particularly beneficial for global\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32msensemaking questions over large datasets.\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m2. **Scalability**: Graph RAG provides scalability advantages, achieving efficient summarization with \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32msignificantly fewer context tokens required. For instance, it requires 26-33% fewer tokens for low-level \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mcommunity summaries and over 97% fewer tokens for root-level summaries compared to source text summarization.\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m3. **Efficiency in Iterative Question Answering**: Root-level Graph RAG offers a highly efficient method for \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32miterative question answering, which is crucial for sensemaking activities, with only a modest drop in \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mperformance compared to other global methods.\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m4. **Global Query Handling**: It supports handling global queries effectively, as it combines knowledge graph \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mgeneration, retrieval-augmented generation, and query-focused summarization, making it suitable for sensemaking\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mover entire text corpora.\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m5. **Hierarchical Indexing and Summarization**: The use of a hierarchical index and summarization allows for \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mefficient processing and summarizing of community summaries into a final global answer, facilitating a \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mcomprehensive coverage of the underlying graph index and input documents.\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m6. **Reduced Token Cost**: For situations requiring many global queries over the same dataset, Graph RAG \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ”‚\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32machieves competitive performance to other global methods at a fraction of the token cost.\u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32m \u001b[0m\u001b[1;32mโ”‚\u001b[0m\n", + "\u001b[1;32mโ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from azure.search.documents.models import VectorizableTextQuery\n", + "\n", + "\n", + "def generate_chat_response(prompt: str, system_message: str = None):\n", + " \"\"\"\n", + " Generates a single-turn chat response using Azure OpenAI Chat.\n", + " If you need multi-turn conversation or follow-up queries, you'll have to\n", + " maintain the messages list externally.\n", + " \"\"\"\n", + " messages = []\n", + " if system_message:\n", + " messages.append({\"role\": \"system\", \"content\": system_message})\n", + " messages.append({\"role\": \"user\", \"content\": prompt})\n", + "\n", + " completion = openai_client.chat.completions.create(\n", + " model=AZURE_OPENAI_CHAT_MODEL, messages=messages, temperature=0.7\n", + " )\n", + " return completion.choices[0].message.content\n", + "\n", + "\n", + "user_query = \"What are the main advantages of using the Graph RAG approach for query-focused summarization compared to traditional RAG methods?\"\n", + "user_embed = embed_text(user_query)\n", + "\n", + "vector_query = VectorizableTextQuery(\n", + " text=user_query, # passing in text for a hybrid search\n", + " k_nearest_neighbors=5,\n", + " fields=\"content_vector\",\n", + ")\n", + "\n", + "search_results = search_client.search(\n", + " search_text=user_query, vector_queries=[vector_query], select=[\"content\"], top=10\n", + ")\n", + "\n", + "retrieved_chunks = []\n", + "for result in search_results:\n", + " snippet = result[\"content\"]\n", + " retrieved_chunks.append(snippet)\n", + "\n", + "context_str = \"\\n---\\n\".join(retrieved_chunks)\n", + "rag_prompt = f\"\"\"\n", + "You are an AI assistant helping answering questions about Microsoft GraphRAG.\n", + "Use ONLY the text below to answer the user's question.\n", + "If the answer isn't in the text, say you don't know.\n", + "\n", + "Context:\n", + "{context_str}\n", + "\n", + "Question: {user_query}\n", + "Answer:\n", + "\"\"\"\n", + "\n", + "final_answer = generate_chat_response(rag_prompt)\n", + "\n", + "console.print(Panel(rag_prompt, title=\"RAG Prompt\", style=\"bold red\"))\n", + "console.print(Panel(final_answer, title=\"RAG Response\", style=\"bold green\"))" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/mkdocs.yml b/mkdocs.yml index 8f8d86d9..0f3e9dd0 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -76,15 +76,17 @@ nav: - "Multimodal export": examples/export_multimodal.py - "Force full page OCR": examples/full_page_ocr.py - "Accelerator options": examples/run_with_accelerator.py - - "Simple translation": examples/translate.py + - "Simple translation": examples/translate.py - โœ‚๏ธ Chunking: - - "Hybrid chunking": examples/hybrid_chunking.ipynb - - ๐Ÿ’ฌ RAG / QA: + - examples/hybrid_chunking.ipynb + - ๐Ÿค– RAG with AI dev frameworks: - examples/rag_haystack.ipynb - - examples/rag_llamaindex.ipynb - examples/rag_langchain.ipynb + - examples/rag_llamaindex.ipynb + - ๐Ÿ—‚๏ธ More examples: - examples/rag_weaviate.ipynb - RAG with Granite [โ†—]: https://github.com/ibm-granite-community/granite-snack-cookbook/blob/main/recipes/RAG/Granite_Docling_RAG.ipynb + - examples/rag_azuresearch.ipynb - examples/retrieval_qdrant.ipynb - Integrations: - Integrations: integrations/index.md