v2.3.12

agno-agi · Jan 31, 2024 · 0547e45 · 0547e45
1 parent 0062b2e
commit 0547e45
Show file tree

Hide file tree

Showing 6 changed files with 25 additions and 18 deletions.
diff --git a/pdf_ai/assistant.py b/pdf_ai/assistant.py
@@ -1,3 +1,4 @@
+import json
 from typing import Optional, List
 
 from phi.assistant import Assistant
@@ -17,7 +18,8 @@ def get_pdf_assistant(
     debug_mode: bool = False,
 ) -> Assistant:
     pdf_tools = PDFTools(user_id=user_id)
-    document_names: Optional[List[str]] = pdf_tools.get_document_names()
+    document_names_json: Optional[str] = pdf_tools.get_document_names()
+    document_names: Optional[List] = json.loads(document_names_json) if document_names_json else None
     logger.info(f"Documents available: {document_names}")
 
     introduction = "Hi, I am PDF AI, built by [phidata](https://github.com/phidatahq/phidata)."
@@ -26,8 +28,7 @@ def get_pdf_assistant(
         "You are made by phidata: https://github.com/phidatahq/phidata",
         f"You are interacting with the user: {user_id}",
         "You have a knowledge base of PDFs that you can use to answer questions.",
-        "When the user asks a question, first determine if you should search the web or your knowledge base for the answer.",
-        "If you need to search the web, use the `search_web` tool to search the web for the answer.",
+        "When the user asks a question, first determine if you can answer the question from the documents in the knowledge base.",
     ]
     if document_names is None or len(document_names) == 0:
         introduction += " Please upload a document to get started."
@@ -36,11 +37,12 @@ def get_pdf_assistant(
         )
     elif len(document_names) == 1:
         introduction += "\n\nAsk me about: {}".format(", ".join(document_names))
-        (f"You have the following documents in your knowledge base: {document_names}",)
         instructions.extend(
             [
+                f"You have the following documents in your knowledge base: {document_names}",
                 "If the user asks a specific question, use the `search_latest_document` tool to search the latest document for context.",
                 "If the user asks a summary, use the `get_latest_document_contents` tool to get the contents of the latest document.",
+                "You can also search the entire knowledge base using the `search_knowledge_base` tool.",
             ]
         )
     else:
@@ -55,9 +57,14 @@ def get_pdf_assistant(
         )
     instructions.extend(
         [
+            "You can also search the entire knowledge base using the `search_knowledge_base` tool.",
             "Keep your conversation light hearted and fun.",
             "Using information from the document, provide the user with a concise and relevant answer.",
             "If the user asks what is this? they are asking about the latest document",
+            "If you cannot find the information in the knowledge base, think if you can find it on the web. If you can find the information on the web, use the `search_web` tool",
+            "When searching the knowledge base, search for at least 3 documents.",
+            "When getting document contents, get atleast 3000 words so you get the first few pages.",
+            "Most documents have a table of contents in the beginning so if you need those, use the `get_document_contents` tool.",
             "If the user compliments you, ask them to star phidata on GitHub: https://github.com/phidatahq/phidata",
         ]
     )

diff --git a/pdf_ai/tools.py b/pdf_ai/tools.py
@@ -58,12 +58,12 @@ def get_latest_document_contents(self, limit: int = 5000) -> Optional[str]:
 
             return latest_document_content[:limit]
 
-    def search_latest_document(self, query: str, num_chunks: int = 5) -> Optional[str]:
+    def search_latest_document(self, query: str, num_documents: int = 5) -> Optional[str]:
         """Use this function to search the latest document uploaded by the user for a query.
 
         Args:
             query (str): Query to search for
-            num_chunks (int): Number of chunks to return. Defaults to 5.
+            num_documents (int): Number of documents to return. Defaults to 5.
 
         Returns:
             str: JSON string of the search results
@@ -91,7 +91,7 @@ def search_latest_document(self, query: str, num_chunks: int = 5) -> Optional[st
             return "Sorry could not find latest document"
 
         search_results: List[Document] = vector_db.search(
-            query=query, limit=num_chunks, filters={"name": latest_document_name}
+            query=query, limit=num_documents, filters={"name": latest_document_name}
         )
         logger.debug(f"Search result: {search_results}")
 
@@ -100,7 +100,7 @@ def search_latest_document(self, query: str, num_chunks: int = 5) -> Optional[st
 
         return json.dumps([doc.to_dict() for doc in search_results])
 
-    def get_document_names(self, limit: int = 20) -> Optional[List[str]]:
+    def get_document_names(self, limit: int = 20) -> Optional[str]:
         """Use this function to get the names of the documents uploaded by the user.
 
         Args:
@@ -130,17 +130,17 @@ def get_document_names(self, limit: int = 20) -> Optional[List[str]]:
                     document_name = row.name
                     document_names.append(document_name)
 
-                return document_names
+                return json.dumps(document_names)
             except Exception as e:
                 logger.error(f"Error getting document names: {e}")
                 return None
 
-    def search_document(self, query: str, document_name: str, num_chunks: int = 5) -> Optional[str]:
+    def search_document(self, query: str, document_name: str, num_documents: int = 5) -> Optional[str]:
         """Use this function to search the latest document uploaded by the user for a query.
 
         Args:
             query (str): Query to search for
-            num_chunks (int): Number of chunks to return. Defaults to 5.
+            num_documents (int): Number of documents to return. Defaults to 5.
 
         Returns:
             str: JSON string of the search results
@@ -151,7 +151,7 @@ def search_document(self, query: str, document_name: str, num_chunks: int = 5) -
             return "Sorry could not search latest document"
 
         search_results: List[Document] = self.knowledge_base.vector_db.search(
-            query=query, limit=num_chunks, filters={"name": document_name}
+            query=query, limit=num_documents, filters={"name": document_name}
         )
         logger.debug(f"Search result: {search_results}")
 
@@ -188,7 +188,7 @@ def get_document_contents(self, document_name: str, limit: int = 5000) -> Option
 
             return document_content[:limit]
 
-    # def get_document_introduction(self) -> Optional[str]:
+    # def get_documents_with_intro_section(self) -> Optional[str]:
     #     """Use this function to get a quick introduction to the documents uploaded by the user.
     #     This function will return a dictionary of document names and their first 200 characters.
 
@@ -211,7 +211,7 @@ def get_document_contents(self, document_name: str, limit: int = 5000) -> Option
     #             if rows is None:
     #                 return "Sorry could not find any documents"
 
-    #             document_names = []
+    #             document_intro = {}
     #             for row in rows:
     #                 document_name = row.name
     #                 document_names.append(document_name)

diff --git a/pyproject.toml b/pyproject.toml
@@ -46,7 +46,7 @@ dependencies = [
   # Linting and Formatting
   "ruff",
   # phidata
-  "phidata==2.3.11",
+  "phidata==2.3.12",
 ]
 
 [build-system]

diff --git a/requirements.txt b/requirements.txt
@@ -56,7 +56,7 @@ packaging==23.2
 pandas==2.2.0
 pandas-stubs==2.1.4.231227
 pgvector==0.2.4
-phidata==2.3.10
+phidata==2.3.12
 pillow==10.2.0
 plotly==5.18.0
 pluggy==1.4.0

diff --git a/workspace/prd_resources.py b/workspace/prd_resources.py
@@ -32,7 +32,7 @@
     tag=ws_settings.prd_env,
     enabled=ws_settings.build_images,
     path=str(ws_settings.ws_root),
-    platform="linux/amd64",
+    platforms=["linux/amd64", "linux/arm64"],
     pull=ws_settings.force_pull_images,
     push_image=ws_settings.push_images,
     skip_docker_cache=ws_settings.skip_image_cache,

diff --git a/workspace/settings.py b/workspace/settings.py
@@ -38,5 +38,5 @@
     # Build images locally
     # build_images=True,
     # Push images after building
-    # push_images=True,
+    push_images=True,
 )