Skip to content
This repository has been archived by the owner on Oct 21, 2024. It is now read-only.

Commit

Permalink
v2.3.12
Browse files Browse the repository at this point in the history
  • Loading branch information
ashpreetbedi committed Jan 31, 2024
1 parent 0062b2e commit 0547e45
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 18 deletions.
15 changes: 11 additions & 4 deletions pdf_ai/assistant.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
from typing import Optional, List

from phi.assistant import Assistant
Expand All @@ -17,7 +18,8 @@ def get_pdf_assistant(
debug_mode: bool = False,
) -> Assistant:
pdf_tools = PDFTools(user_id=user_id)
document_names: Optional[List[str]] = pdf_tools.get_document_names()
document_names_json: Optional[str] = pdf_tools.get_document_names()
document_names: Optional[List] = json.loads(document_names_json) if document_names_json else None
logger.info(f"Documents available: {document_names}")

introduction = "Hi, I am PDF AI, built by [phidata](https://github.com/phidatahq/phidata)."
Expand All @@ -26,8 +28,7 @@ def get_pdf_assistant(
"You are made by phidata: https://github.com/phidatahq/phidata",
f"You are interacting with the user: {user_id}",
"You have a knowledge base of PDFs that you can use to answer questions.",
"When the user asks a question, first determine if you should search the web or your knowledge base for the answer.",
"If you need to search the web, use the `search_web` tool to search the web for the answer.",
"When the user asks a question, first determine if you can answer the question from the documents in the knowledge base.",
]
if document_names is None or len(document_names) == 0:
introduction += " Please upload a document to get started."
Expand All @@ -36,11 +37,12 @@ def get_pdf_assistant(
)
elif len(document_names) == 1:
introduction += "\n\nAsk me about: {}".format(", ".join(document_names))
(f"You have the following documents in your knowledge base: {document_names}",)
instructions.extend(
[
f"You have the following documents in your knowledge base: {document_names}",
"If the user asks a specific question, use the `search_latest_document` tool to search the latest document for context.",
"If the user asks a summary, use the `get_latest_document_contents` tool to get the contents of the latest document.",
"You can also search the entire knowledge base using the `search_knowledge_base` tool.",
]
)
else:
Expand All @@ -55,9 +57,14 @@ def get_pdf_assistant(
)
instructions.extend(
[
"You can also search the entire knowledge base using the `search_knowledge_base` tool.",
"Keep your conversation light hearted and fun.",
"Using information from the document, provide the user with a concise and relevant answer.",
"If the user asks what is this? they are asking about the latest document",
"If you cannot find the information in the knowledge base, think if you can find it on the web. If you can find the information on the web, use the `search_web` tool",
"When searching the knowledge base, search for at least 3 documents.",
"When getting document contents, get atleast 3000 words so you get the first few pages.",
"Most documents have a table of contents in the beginning so if you need those, use the `get_document_contents` tool.",
"If the user compliments you, ask them to star phidata on GitHub: https://github.com/phidatahq/phidata",
]
)
Expand Down
20 changes: 10 additions & 10 deletions pdf_ai/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,12 @@ def get_latest_document_contents(self, limit: int = 5000) -> Optional[str]:

return latest_document_content[:limit]

def search_latest_document(self, query: str, num_chunks: int = 5) -> Optional[str]:
def search_latest_document(self, query: str, num_documents: int = 5) -> Optional[str]:
"""Use this function to search the latest document uploaded by the user for a query.
Args:
query (str): Query to search for
num_chunks (int): Number of chunks to return. Defaults to 5.
num_documents (int): Number of documents to return. Defaults to 5.
Returns:
str: JSON string of the search results
Expand Down Expand Up @@ -91,7 +91,7 @@ def search_latest_document(self, query: str, num_chunks: int = 5) -> Optional[st
return "Sorry could not find latest document"

search_results: List[Document] = vector_db.search(
query=query, limit=num_chunks, filters={"name": latest_document_name}
query=query, limit=num_documents, filters={"name": latest_document_name}
)
logger.debug(f"Search result: {search_results}")

Expand All @@ -100,7 +100,7 @@ def search_latest_document(self, query: str, num_chunks: int = 5) -> Optional[st

return json.dumps([doc.to_dict() for doc in search_results])

def get_document_names(self, limit: int = 20) -> Optional[List[str]]:
def get_document_names(self, limit: int = 20) -> Optional[str]:
"""Use this function to get the names of the documents uploaded by the user.
Args:
Expand Down Expand Up @@ -130,17 +130,17 @@ def get_document_names(self, limit: int = 20) -> Optional[List[str]]:
document_name = row.name
document_names.append(document_name)

return document_names
return json.dumps(document_names)
except Exception as e:
logger.error(f"Error getting document names: {e}")
return None

def search_document(self, query: str, document_name: str, num_chunks: int = 5) -> Optional[str]:
def search_document(self, query: str, document_name: str, num_documents: int = 5) -> Optional[str]:
"""Use this function to search the latest document uploaded by the user for a query.
Args:
query (str): Query to search for
num_chunks (int): Number of chunks to return. Defaults to 5.
num_documents (int): Number of documents to return. Defaults to 5.
Returns:
str: JSON string of the search results
Expand All @@ -151,7 +151,7 @@ def search_document(self, query: str, document_name: str, num_chunks: int = 5) -
return "Sorry could not search latest document"

search_results: List[Document] = self.knowledge_base.vector_db.search(
query=query, limit=num_chunks, filters={"name": document_name}
query=query, limit=num_documents, filters={"name": document_name}
)
logger.debug(f"Search result: {search_results}")

Expand Down Expand Up @@ -188,7 +188,7 @@ def get_document_contents(self, document_name: str, limit: int = 5000) -> Option

return document_content[:limit]

# def get_document_introduction(self) -> Optional[str]:
# def get_documents_with_intro_section(self) -> Optional[str]:
# """Use this function to get a quick introduction to the documents uploaded by the user.
# This function will return a dictionary of document names and their first 200 characters.

Expand All @@ -211,7 +211,7 @@ def get_document_contents(self, document_name: str, limit: int = 5000) -> Option
# if rows is None:
# return "Sorry could not find any documents"

# document_names = []
# document_intro = {}
# for row in rows:
# document_name = row.name
# document_names.append(document_name)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ dependencies = [
# Linting and Formatting
"ruff",
# phidata
"phidata==2.3.11",
"phidata==2.3.12",
]

[build-system]
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ packaging==23.2
pandas==2.2.0
pandas-stubs==2.1.4.231227
pgvector==0.2.4
phidata==2.3.10
phidata==2.3.12
pillow==10.2.0
plotly==5.18.0
pluggy==1.4.0
Expand Down
2 changes: 1 addition & 1 deletion workspace/prd_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
tag=ws_settings.prd_env,
enabled=ws_settings.build_images,
path=str(ws_settings.ws_root),
platform="linux/amd64",
platforms=["linux/amd64", "linux/arm64"],
pull=ws_settings.force_pull_images,
push_image=ws_settings.push_images,
skip_docker_cache=ws_settings.skip_image_cache,
Expand Down
2 changes: 1 addition & 1 deletion workspace/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,5 @@
# Build images locally
# build_images=True,
# Push images after building
# push_images=True,
push_images=True,
)

0 comments on commit 0547e45

Please sign in to comment.