v2.3.72

agno-agi · Apr 17, 2024 · 8902502 · 8902502
1 parent 04404f8
commit 8902502
Show file tree

Hide file tree

Showing 10 changed files with 349 additions and 10 deletions.
diff --git a/medical_ai/__init__.py b/medical_ai/__init__.py
diff --git a/medical_ai/app.py b/medical_ai/app.py
@@ -0,0 +1,156 @@
+import json
+from typing import List, Optional
+
+import streamlit as st
+from phi.tools.streamlit.components import get_username_sidebar
+
+from medical_ai.assistants import (
+    SearchTerms,
+    SearchResults,
+    search_term_generator,
+    arxiv_search_assistant,
+    research_editor,
+    arxiv_toolkit,
+)
+from medical_ai.search import exa_search
+
+
+st.set_page_config(
+    page_title="Medical Research AI",
+    page_icon=":orange_heart:",
+    layout="wide",
+)
+st.title("Medical Research AI")
+st.markdown("##### :orange_heart: built using [phidata](https://github.com/phidatahq/phidata)")
+
+disclaimer = """\
+This application is not intended to replace professional medical advice, diagnosis, or treatment. The information provided by our AI technology is based on data input and should not be used as the sole basis for making medical decisions. While we strive to offer accurate and up-to-date medical information, the outputs provided by the AI are predictions and may be subject to inaccuracies.
+
+Please consult with a qualified healthcare provider for any questions concerning your medical condition or treatment. Reliance on any information provided by this application is solely at your own risk. The developers and distributors of this app are not liable for any damages or health complications that may result from users interpreting and using the AI-generated medical information.
+
+Use of this application does not establish a doctor-patient relationship. Remember to always seek the advice of your physician or other qualified health provider with any questions you may have regarding a medical condition.
+
+By using this app, you agree to the terms outlined in this disclaimer.\
+"""
+with st.expander(":rainbow[:point_down: Disclaimer]"):
+    st.markdown(disclaimer)
+
+
+def main() -> None:
+    # Get username
+    # username = get_username_sidebar()
+    # if username:
+    #     st.sidebar.info(f":female-doctor: User: {username}")
+    # else:
+    #     st.markdown("---")
+    #     st.markdown("#### :female-doctor: Please enter a username")
+    #     return
+
+    # Get topic for report
+    input_topic = st.text_input(
+        ":female-doctor: Enter a topic to generate a report",
+        value="AI in Healthcare",
+    )
+    # Button to generate report
+    generate_report = st.button("Generate Report")
+    if generate_report:
+        st.session_state["topic"] = input_topic
+
+    # Checkboxes for search
+    st.sidebar.markdown("## Search Options")
+    search_arxiv = st.sidebar.checkbox("Search ArXiv", value=True)
+    search_web = st.sidebar.checkbox("Search Web", value=True)
+    search_pubmed = st.sidebar.checkbox("Search PubMed", disabled=True)
+    search_google_scholar = st.sidebar.checkbox("Search Google Scholar", disabled=True)
+    use_cache = st.sidebar.toggle("Use Cache", value=True)
+    num_search_terms = st.sidebar.number_input(
+        "Number of Search Terms", value=1, min_value=1, max_value=3, help="This will increase latency."
+    )
+
+    st.sidebar.markdown("---")
+    st.sidebar.markdown("## Trending Topics")
+    if st.sidebar.button("AI in Healthcare"):
+        st.session_state["topic"] = "AI in Healthcare"
+
+    if "topic" in st.session_state:
+        report_topic = st.session_state["topic"]
+
+        search_terms: Optional[SearchTerms] = None
+        with st.status("Generating Search Terms", expanded=True) as status:
+            with st.container():
+                search_terms_container = st.empty()
+                search_generator_input = {"topic": report_topic, "num_terms": num_search_terms}
+                search_terms = search_term_generator.run(json.dumps(search_generator_input))
+                if search_terms:
+                    search_terms_container.json(search_terms.model_dump())
+            status.update(label="Search Terms Generated", state="complete", expanded=False)
+
+        if not search_terms:
+            st.write("Sorry report generation failed. Please try again.")
+            return
+
+        arxiv_content: Optional[str] = None
+        web_content: Optional[str] = None
+        if search_arxiv:
+            arxiv_search_results: List[SearchResults] = []
+            with st.status("Searching ArXiv (this takes a while)", expanded=True) as status:
+                with st.container():
+                    search_results_container = st.empty()
+                    for search_term in search_terms.terms:
+                        search_results = arxiv_search_assistant.run(search_term)
+                        if search_results:
+                            arxiv_search_results.append(search_results)
+
+                    if len(arxiv_search_results) > 0:
+                        search_results_container.json(
+                            [result.model_dump() for result in arxiv_search_results]
+                        )
+                status.update(label="ArXiv Search Complete", state="complete", expanded=False)
+
+            if len(arxiv_search_results) > 0:
+                arxiv_paper_ids = []
+                for search_result in arxiv_search_results:
+                    arxiv_paper_ids.extend([result.id for result in search_result.results])
+
+                if len(arxiv_paper_ids) > 0:
+                    with st.status("Reading ArXiv Papers", expanded=True) as status:
+                        with st.container():
+                            arxiv_paper_ids_container = st.empty()
+                            arxiv_content = arxiv_toolkit.read_arxiv_papers(arxiv_paper_ids, pages_to_read=2)
+                            arxiv_paper_ids_container.json(arxiv_paper_ids)
+                        status.update(label="Reading ArXiv Papers Complete", state="complete", expanded=False)
+
+        if search_web:
+            _content = {}
+            for search_term in search_terms.terms:
+                _content[search_term] = exa_search(search_term)
+            web_content = json.dumps(_content, indent=4)
+
+        report_input = ""
+        report_input += f"# Topic: {report_topic}\n\n"
+        report_input += f"## Search Terms\n\n"
+        report_input += f"{search_terms}\n\n"
+        if arxiv_content:
+            report_input += f"## ArXiv Papers\n\n"
+            report_input += "<arxiv_papers>\n\n"
+            report_input += f"{arxiv_content}\n\n"
+            report_input += "</arxiv_papers>\n\n"
+        if web_content:
+            report_input += f"## Web Content\n\n"
+            report_input += "<web_content>\n\n"
+            report_input += f"{web_content}\n\n"
+            report_input += "</web_content>\n\n"
+
+        with st.spinner("Generating Report"):
+            final_report = ""
+            final_report_container = st.empty()
+            for delta in research_editor.run(report_input):
+                final_report += delta  # type: ignore
+                final_report_container.markdown(final_report)
+
+    st.sidebar.markdown("---")
+    if st.sidebar.button("Restart"):
+        st.rerun()
+
+
+main()
diff --git a/medical_ai/assistants.py b/medical_ai/assistants.py
@@ -0,0 +1,74 @@
+from textwrap import dedent
+from typing import List
+from pathlib import Path
+
+from pydantic import BaseModel, Field
+from phi.assistant.team import Assistant
+from phi.tools.arxiv_toolkit import ArxivToolkit
+
+arxiv_toolkit = ArxivToolkit(
+    download_dir=Path(__file__).parent.parent.parent.parent.joinpath("wip", "arxiv_pdfs")
+)
+
+
+class SearchTerms(BaseModel):
+    terms: List[str] = Field(..., description="List of 2 search terms related to a topic.")
+
+
+class SearchResult(BaseModel):
+    title: str = Field(..., description="Title of the article.")
+    id: str = Field(..., description="The ID of the article.")
+    summary: str = Field(..., description="Summary from the article.")
+    pdf_url: str = Field(..., description="Url of the PDF from the article.")
+    links: List[str] = Field(..., description="Links for the article.")
+    reasoning: str = Field(
+        ..., description="Clear description of why you chose this article from the results."
+    )
+
+
+class SearchResults(BaseModel):
+    results: List[SearchResult] = Field(..., description="List of top search results.")
+
+
+search_term_generator = Assistant(
+    name="Medical Search Generator",
+    description=dedent(
+        """\
+    You are a world-class medical researcher assigned a very important task.
+    You will be given a topic and number of search terms to generate.
+    You will generate a list of search terms for writing an article on that topic.
+    These terms will be used to search the web for the most relevant articles on the topic.\
+    """
+    ),
+    output_model=SearchTerms,
+    debug_mode=True,
+)
+
+arxiv_search_assistant = Assistant(
+    name="Arxiv Search Assistant",
+    description=dedent(
+        """\
+    You are a world-class medical researcher assigned a very important task.
+    Given a topic, search ArXiv for the top 10 articles about that topic and return the 3 most relevant articles to that topic.
+    This is an important task and your output should be highly relevant to the original topic.\
+    """
+    ),
+    tools=[arxiv_toolkit],
+    output_model=SearchResults,
+    debug_mode=True,
+)
+
+research_editor = Assistant(
+    name="Medical Research Editor",
+    description="You are a world-class medical researcher and your task is to generate a medical journal worthy report in the style of New York Times.",
+    instructions=[
+        "You will be provided with a topic and a list of articles along with their summary and content.",
+        "Carefully read each articles and generate a medical journal worthy report in the style of New York Times.",
+        "The report should be clear, concise, and informative.",
+        "Focus on providing a high-level overview of the topic and the key findings from the articles.",
+        "Do not copy the content from the articles, but use the information to generate a high-quality report.",
+        "Do not include any personal opinions or biases in the report.",
+    ],
+    markdown=True,
+    debug_mode=True,
+)
diff --git a/medical_ai/generate_report.py b/medical_ai/generate_report.py
@@ -0,0 +1,38 @@
+import json
+from typing import List
+
+from rich.pretty import pprint
+
+from medical_ai.assistants import (
+    SearchTerms,
+    SearchResults,
+    search_term_generator,
+    arxiv_search_assistant,
+    research_editor,
+    arxiv_toolkit,
+)
+
+# Topic to generate a report on
+topic = "AI in Healthcare"
+
+# Generate a list of search terms
+search_terms: SearchTerms = search_term_generator.run(topic)  # type: ignore
+pprint(search_terms)
+
+# Generate a list of search results
+arxiv_search_results: List[SearchResults] = []
+for search_term in search_terms.terms:
+    search_results: SearchResults = arxiv_search_assistant.run(search_term)  # type: ignore
+    arxiv_search_results.append(search_results)
+# pprint(arxiv_search_results)
+
+search_result_ids = []
+for search_result in arxiv_search_results:
+    search_result_ids.extend([result.id for result in search_result.results])
+
+# Read the content of the search results
+search_result_content = arxiv_toolkit.read_arxiv_papers(search_result_ids, pages_to_read=2)
+
+research_editor.print_response(
+    json.dumps({"topic": "AI in Healthcare", "articles": search_result_content}, indent=4), show_message=False
+)
diff --git a/medical_ai/search.py b/medical_ai/search.py
@@ -0,0 +1,7 @@
+from phi.tools.exa import ExaTools
+
+exa_tools = ExaTools()
+
+
+def exa_search(query: str) -> str:
+    return exa_tools.search_exa_with_contents(query=query)
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,6 +21,7 @@ dependencies = [
   "sqlalchemy",
   # Project libraries
   "arxiv",
+  "exa_py",
   "openai",
   "pypdf",
   "aiohttp",
@@ -49,7 +50,7 @@ dependencies = [
   # Linting and Formatting
   "ruff",
   # phidata
-  "phidata==2.3.40",
+  "phidata[aws]==2.3.72",
 ]
 
 [build-system]

diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.11
+# This file is autogenerated by pip-compile with Python 3.9
 # by the following command:
 #
 #    ./scripts/upgrade.sh
@@ -11,11 +11,12 @@ altair==5.2.0
 annotated-types==0.6.0
 anyio==4.2.0
 arxiv==2.1.0
+async-timeout==4.0.3
 attrs==23.2.0
 beautifulsoup4==4.12.3
 blinker==1.7.0
-boto3==1.34.34
-botocore==1.34.34
+boto3==1.7.84
+botocore==1.10.84
 cachetools==5.3.2
 certifi==2024.2.2
 charset-normalizer==3.3.2
@@ -25,7 +26,10 @@ cycler==0.12.1
 discord-py==2.3.2
 distro==1.9.0
 docker==7.0.0
+docutils==0.21.1
 duckdb==0.9.2
+exa-py==1.0.9
+exceptiongroup==1.2.0
 fastapi==0.109.2
 feedparser==6.0.10
 fonttools==4.47.2
@@ -38,9 +42,10 @@ httpcore==1.0.2
 httpx==0.26.0
 idna==3.6
 importlib-metadata==7.0.1
+importlib-resources==6.4.0
 iniconfig==2.0.0
 jinja2==3.1.3
-jmespath==1.0.1
+jmespath==0.10.0
 jsonschema==4.21.1
 jsonschema-specifications==2023.12.1
 kiwisolver==1.4.5
@@ -58,7 +63,7 @@ packaging==23.2
 pandas==2.2.0
 pandas-stubs==2.1.4.231227
 pgvector==0.2.4
-phidata==2.3.40
+phidata[aws]==2.3.72
 pillow==10.2.0
 plotly==5.18.0
 pluggy==1.4.0
@@ -86,7 +91,7 @@ resend==0.7.2
 rich==13.7.0
 rpds-py==0.17.1
 ruff==0.2.0
-s3transfer==0.10.0
+s3transfer==0.1.13
 scipy==1.12.0
 seaborn==0.13.2
 sgmllib3k==1.0.0