feat(gpt4): Add search functionality (QuivrHQ#2566)

This pull request adds search functionality to the application The search functionality allows users to search the internet for information.
srvo · May 9, 2024 · f5cfa2f · f5cfa2f
1 parent 1125699
commit f5cfa2f
Show file tree

Hide file tree

Showing 7 changed files with 664 additions and 95 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,7 +4,6 @@ secondbrain/
 .streamlit/secrets.toml
 **/*.pyc
 toto.txt
-*.ipynb
 log.txt
 
 backend/venv

diff --git a/Pipfile b/Pipfile
@@ -64,9 +64,13 @@ flashrank = "*"
 langchain-cohere = "*"
 pyinstrument = "*"
 playwright = "*"
+langgraph = "*"
+tavily-python = "*"
+duckduckgo-search = "*"
 
 [dev-packages]
 black = "*"
+ipykernel = "*"
 
 [requires]
 python_version = "3.11"
diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/backend/modules/brain/integrations/GPT4/Brain.py b/backend/modules/brain/integrations/GPT4/Brain.py
@@ -1,15 +1,28 @@
 import json
-from typing import AsyncIterable
+import operator
+from typing import Annotated, AsyncIterable, List, Sequence, TypedDict
 from uuid import UUID
 
-from langchain_community.chat_models import ChatLiteLLM
+from langchain_community.tools import DuckDuckGoSearchResults
+from langchain_core.messages import BaseMessage, ToolMessage
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain_core.tools import BaseTool
+from langchain_openai import ChatOpenAI
+from langgraph.graph import END, StateGraph
+from langgraph.prebuilt import ToolExecutor, ToolInvocation
 from logger import get_logger
 from modules.brain.knowledge_brain_qa import KnowledgeBrainQA
 from modules.chat.dto.chats import ChatQuestion
 from modules.chat.dto.outputs import GetChatHistoryOutput
 from modules.chat.service.chat_service import ChatService
 
+
+class AgentState(TypedDict):
+    messages: Annotated[Sequence[BaseMessage], operator.add]
+
+
+# Define the function that determines whether to continue or not
+
 logger = get_logger(__name__)
 
 chat_service = ChatService()
@@ -23,6 +36,10 @@ class GPT4Brain(KnowledgeBrainQA):
         KnowledgeBrainQA (_type_): A brain that store the knowledge internaly
     """
 
+    tools: List[BaseTool] = [DuckDuckGoSearchResults()]
+    tool_executor: ToolExecutor = ToolExecutor(tools)
+    model_function: ChatOpenAI = None
+
     def __init__(
         self,
         **kwargs,
@@ -34,24 +51,97 @@ def __init__(
     def calculate_pricing(self):
         return 3
 
-    def get_chain(self):
-
-        prompt = ChatPromptTemplate.from_messages(
-            [
-                (
-                    "system",
-                    "You are GPT-4 powered by Quivr. You are an assistant. {custom_personality}",
-                ),
-                MessagesPlaceholder(variable_name="chat_history"),
-                ("human", "{question}"),
-            ]
+    def should_continue(self, state):
+        messages = state["messages"]
+        last_message = messages[-1]
+        # If there is no function call, then we finish
+        if not last_message.tool_calls:
+            return "end"
+        # Otherwise if there is, we continue
+        else:
+            return "continue"
+
+    # Define the function that calls the model
+    def call_model(self, state):
+        messages = state["messages"]
+        response = self.model_function.invoke(messages)
+        # We return a list, because this will get added to the existing list
+        return {"messages": [response]}
+
+    # Define the function to execute tools
+    def call_tool(self, state):
+        messages = state["messages"]
+        # Based on the continue condition
+        # we know the last message involves a function call
+        last_message = messages[-1]
+        # We construct an ToolInvocation from the function_call
+        tool_call = last_message.tool_calls[0]
+        action = ToolInvocation(
+            tool=tool_call["name"],
+            tool_input=tool_call["args"],
         )
+        # We call the tool_executor and get back a response
+        response = self.tool_executor.invoke(action)
+        # We use the response to create a FunctionMessage
+        function_message = ToolMessage(
+            content=str(response), name=action.tool, tool_call_id=tool_call["id"]
+        )
+        # We return a list, because this will get added to the existing list
+        return {"messages": [function_message]}
+
+    def create_graph(self):
+        # Define a new graph
+        workflow = StateGraph(AgentState)
+
+        # Define the two nodes we will cycle between
+        workflow.add_node("agent", self.call_model)
+        workflow.add_node("action", self.call_tool)
+
+        # Set the entrypoint as `agent`
+        # This means that this node is the first one called
+        workflow.set_entry_point("agent")
+
+        # We now add a conditional edge
+        workflow.add_conditional_edges(
+            # First, we define the start node. We use `agent`.
+            # This means these are the edges taken after the `agent` node is called.
+            "agent",
+            # Next, we pass in the function that will determine which node is called next.
+            self.should_continue,
+            # Finally we pass in a mapping.
+            # The keys are strings, and the values are other nodes.
+            # END is a special node marking that the graph should finish.
+            # What will happen is we will call `should_continue`, and then the output of that
+            # will be matched against the keys in this mapping.
+            # Based on which one it matches, that node will then be called.
+            {
+                # If `tools`, then we call the tool node.
+                "continue": "action",
+                # Otherwise we finish.
+                "end": END,
+            },
+        )
+
+        # We now add a normal edge from `tools` to `agent`.
+        # This means that after `tools` is called, `agent` node is called next.
+        workflow.add_edge("action", "agent")
 
-        chain = prompt | ChatLiteLLM(
-            model="gpt-4-0125-preview", max_tokens=self.max_tokens
+        # Finally, we compile it!
+        # This compiles it into a LangChain Runnable,
+        # meaning you can use it as you would any other runnable
+        app = workflow.compile()
+        return app
+
+    def get_chain(self):
+        self.model_function = ChatOpenAI(
+            model="gpt-4-turbo", temperature=0, streaming=True
         )
 
-        return chain
+        self.model_function = self.model_function.bind_tools(self.tools)
+
+        graph = self.create_graph()
+
+        return graph
 
     async def generate_stream(
         self, chat_id: UUID, question: ChatQuestion, save_answer: bool = True
@@ -60,22 +150,52 @@ async def generate_stream(
         transformed_history, streamed_chat_history = (
             self.initialize_streamed_chat_history(chat_id, question)
         )
+        filtered_history = self.filter_history(transformed_history, 20, 2000)
         response_tokens = []
         config = {"metadata": {"conversation_id": str(chat_id)}}
 
-        async for chunk in conversational_qa_chain.astream(
-            {
-                "question": question.question,
-                "chat_history": transformed_history,
-                "custom_personality": (
-                    self.prompt_to_use.content if self.prompt_to_use else None
+        prompt = ChatPromptTemplate.from_messages(
+            [
+                (
+                    "system",
+                    "You are GPT-4 powered by Quivr. You are an assistant. {custom_personality}",
                 ),
-            },
+                MessagesPlaceholder(variable_name="chat_history"),
+                ("human", "{question}"),
+            ]
+        )
+        prompt_formated = prompt.format_messages(
+            chat_history=filtered_history,
+            question=question.question,
+            custom_personality=(
+                self.prompt_to_use.content if self.prompt_to_use else None
+            ),
+        )
+
+        async for event in conversational_qa_chain.astream_events(
+            {"messages": prompt_formated},
             config=config,
+            version="v1",
         ):
-            response_tokens.append(chunk.content)
-            streamed_chat_history.assistant = chunk.content
-            yield f"data: {json.dumps(streamed_chat_history.dict())}"
+            kind = event["event"]
+            if kind == "on_chat_model_stream":
+                content = event["data"]["chunk"].content
+                if content:
+                    # Empty content in the context of OpenAI or Anthropic usually means
+                    # that the model is asking for a tool to be invoked.
+                    # So we only print non-empty content
+                    response_tokens.append(content)
+                    streamed_chat_history.assistant = content
+                    yield f"data: {json.dumps(streamed_chat_history.dict())}"
+            elif kind == "on_tool_start":
+                print("--")
+                print(
+                    f"Starting tool: {event['name']} with inputs: {event['data'].get('input')}"
+                )
+            elif kind == "on_tool_end":
+                print(f"Done tool: {event['name']}")
+                print(f"Tool output was: {event['data'].get('output')}")
+                print("--")
 
         self.save_answer(question, response_tokens, streamed_chat_history, save_answer)
 
@@ -86,20 +206,34 @@ def generate_answer(
         transformed_history, streamed_chat_history = (
             self.initialize_streamed_chat_history(chat_id, question)
         )
-        model_response = conversational_qa_chain.invoke(
-            {
-                "question": question.question,
-                "chat_history": transformed_history,
-                "custom_personality": (
-                    self.prompt_to_use.content if self.prompt_to_use else None
+        filtered_history = self.filter_history(transformed_history, 20, 2000)
+        response_tokens = []
+        config = {"metadata": {"conversation_id": str(chat_id)}}
+
+        prompt = ChatPromptTemplate.from_messages(
+            [
+                (
+                    "system",
+                    "You are GPT-4 powered by Quivr. You are an assistant. {custom_personality}",
                 ),
-            }
+                MessagesPlaceholder(variable_name="chat_history"),
+                ("human", "{question}"),
+            ]
+        )
+        prompt_formated = prompt.format_messages(
+            chat_history=filtered_history,
+            question=question.question,
+            custom_personality=(
+                self.prompt_to_use.content if self.prompt_to_use else None
+            ),
+        )
+        model_response = conversational_qa_chain.invoke(
+            {"messages": prompt_formated},
+            config=config,
         )
 
-        answer = model_response.content
+        answer = model_response["messages"][-1].content
 
         return self.save_non_streaming_answer(
-            chat_id=chat_id,
-            question=question,
-            answer=answer,
+            chat_id=chat_id, question=question, answer=answer, metadata={}
         )
diff --git a/backend/modules/brain/knowledge_brain_qa.py b/backend/modules/brain/knowledge_brain_qa.py
@@ -217,6 +217,40 @@ def prompt_to_use_id(self) -> Optional[UUID]:
         else:
             return None
 
+    def filter_history(
+        self, chat_history, max_history: int = 10, max_tokens: int = 2000
+    ):
+        """
+        Filter out the chat history to only include the messages that are relevant to the current question
+
+        Takes in a chat_history= [HumanMessage(content='Qui est Chloé ? '), AIMessage(content="Chloé est une salariée travaillant pour l'entreprise Quivr en tant qu'AI Engineer, sous la direction de son supérieur hiérarchique, Stanislas Girard."), HumanMessage(content='Dis moi en plus sur elle'), AIMessage(content=''), HumanMessage(content='Dis moi en plus sur elle'), AIMessage(content="Désolé, je n'ai pas d'autres informations sur Chloé à partir des fichiers fournis.")]
+        Returns a filtered chat_history with in priority: first max_tokens, then max_history where a Human message and an AI message count as one pair
+        a token is 4 characters
+        """
+        chat_history = chat_history[::-1]
+        total_tokens = 0
+        total_pairs = 0
+        filtered_chat_history = []
+        for i in range(0, len(chat_history), 2):
+            if i + 1 < len(chat_history):
+                human_message = chat_history[i]
+                ai_message = chat_history[i + 1]
+                message_tokens = (
+                    len(human_message.content) + len(ai_message.content)
+                ) // 4
+                if (
+                    total_tokens + message_tokens > max_tokens
+                    or total_pairs >= max_history
+                ):
+                    break
+                filtered_chat_history.append(human_message)
+                filtered_chat_history.append(ai_message)
+                total_tokens += message_tokens
+                total_pairs += 1
+        chat_history = filtered_chat_history[::-1]
+
+        return chat_history
+
     def increase_usage_user(self):
         # Raises an error if the user has consumed all of of his credits
 

diff --git a/backend/modules/chat/controller/chat/brainful_chat.py b/backend/modules/chat/controller/chat/brainful_chat.py
@@ -33,6 +33,7 @@
     "gpt-3.5-turbo-0613",
     "gpt-4-0125-preview",
     "gpt-3.5-turbo",
+    "gpt-4-turbo",
 ]