From 31d2b731e4d438a1a3b99f2cbb64347d98a9860e Mon Sep 17 00:00:00 2001 From: Arkaprava De Date: Sat, 22 Feb 2025 00:20:40 -0800 Subject: [PATCH 1/3] Added AWS Bedrock support as a provider **Description** Added AWS Bedrock support as a provider **Motivation** Ability run using AWS Bedrock **Testing Done** Tested locally **Backwards Compatibility Criteria (if any)** --- .env.example | 4 ++++ requirements.txt | 1 + src/utils/utils.py | 21 ++++++++++++++++++++- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/.env.example b/.env.example index 8e979020..89208bf4 100644 --- a/.env.example +++ b/.env.example @@ -24,6 +24,10 @@ ALIBABA_API_KEY= MOONSHOT_ENDPOINT=https://api.moonshot.cn/v1 MOONSHOT_API_KEY= +# AWS only needs the region and the default aws credentials. TODO: Add support for AWS profiles +AWS_BEDROCK_REGION=us-west-2 +BEDROCK_API_KEY= + # Set to false to disable anonymized telemetry ANONYMIZED_TELEMETRY=true diff --git a/requirements.txt b/requirements.txt index 74f08744..7d456dc3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ pyperclip==1.9.0 gradio==5.10.0 json-repair langchain-mistralai==0.2.4 +boto3==1.18.63 diff --git a/src/utils/utils.py b/src/utils/utils.py index b604812b..3c524733 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -4,12 +4,14 @@ from pathlib import Path from typing import Dict, Optional import requests +import boto3 from langchain_anthropic import ChatAnthropic from langchain_mistralai import ChatMistralAI from langchain_google_genai import ChatGoogleGenerativeAI from langchain_ollama import ChatOllama from langchain_openai import AzureChatOpenAI, ChatOpenAI +from langchain_aws import ChatBedrock import gradio as gr from .llm import DeepSeekR1ChatOpenAI, DeepSeekR1ChatOllama @@ -21,7 +23,8 @@ "deepseek": "DeepSeek", "google": "Google", "alibaba": "Alibaba", - "moonshot": "MoonShot" + "moonshot": "MoonShot", + "bedrock": "AWS Bedrock" } def get_llm_model(provider: str, **kwargs): @@ -158,6 +161,21 @@ def get_llm_model(provider: str, **kwargs): base_url=os.getenv("MOONSHOT_ENDPOINT"), api_key=os.getenv("MOONSHOT_API_KEY"), ) + elif provider == "bedrock": + region = kwargs.get("region", "") or os.getenv("AWS_BEDROCK_REGION", "us-west-2") + + session = boto3.Session(region_name=region) + bedrock_runtime = session.client( + service_name="bedrock-runtime", + region_name=region, + ) + + model_id = kwargs.get("model_name", "anthropic.claude-3-5-sonnet-20241022-v2:0") + + return ChatBedrock( + client=bedrock_runtime, + model_id=model_id, + ) else: raise ValueError(f"Unsupported provider: {provider}") @@ -172,6 +190,7 @@ def get_llm_model(provider: str, **kwargs): "mistral": ["pixtral-large-latest", "mistral-large-latest", "mistral-small-latest", "ministral-8b-latest"], "alibaba": ["qwen-plus", "qwen-max", "qwen-turbo", "qwen-long"], "moonshot": ["moonshot-v1-32k-vision-preview", "moonshot-v1-8k-vision-preview"], + "bedrock": ["anthropic.claude-3-5-sonnet-20241022-v2:0"] } # Callback to update the model name dropdown based on the selected provider From d86a1e31c8a529195def165d6c06323265059801 Mon Sep 17 00:00:00 2001 From: Arkaprava De Date: Wed, 26 Mar 2025 11:58:29 -0700 Subject: [PATCH 2/3] Fixes: not sure --- docker-compose.yml | 1 + requirements.txt | 2 +- src/utils/deep_research.py | 377 --------------------------- src/utils/default_config_settings.py | 18 +- tests/test_deep_research.py | 30 --- webui.py | 142 +++------- 6 files changed, 55 insertions(+), 515 deletions(-) delete mode 100644 src/utils/deep_research.py delete mode 100644 tests/test_deep_research.py diff --git a/docker-compose.yml b/docker-compose.yml index 7f398e81..6aad310b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -35,6 +35,7 @@ services: - CHROME_DEBUGGING_HOST=localhost volumes: - /tmp/.X11-unix:/tmp/.X11-unix + - ~/.aws/credentials:/root/.aws/credentials restart: unless-stopped shm_size: '2gb' cap_add: diff --git a/requirements.txt b/requirements.txt index 7d456dc3..133a6031 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,4 @@ pyperclip==1.9.0 gradio==5.10.0 json-repair langchain-mistralai==0.2.4 -boto3==1.18.63 +langchain-aws diff --git a/src/utils/deep_research.py b/src/utils/deep_research.py deleted file mode 100644 index 834c0e11..00000000 --- a/src/utils/deep_research.py +++ /dev/null @@ -1,377 +0,0 @@ -import pdb - -from dotenv import load_dotenv - -load_dotenv() -import asyncio -import os -import sys -import logging -from pprint import pprint -from uuid import uuid4 -from src.utils import utils -from src.agent.custom_agent import CustomAgent -import json -import re -from browser_use.agent.service import Agent -from browser_use.browser.browser import BrowserConfig, Browser -from browser_use.agent.views import ActionResult -from browser_use.browser.context import BrowserContext -from browser_use.controller.service import Controller, DoneAction -from main_content_extractor import MainContentExtractor -from langchain.schema import SystemMessage, HumanMessage -from json_repair import repair_json -from src.agent.custom_prompts import CustomSystemPrompt, CustomAgentMessagePrompt -from src.controller.custom_controller import CustomController -from src.browser.custom_browser import CustomBrowser -from src.browser.custom_context import BrowserContextConfig, BrowserContext -from browser_use.browser.context import ( - BrowserContextConfig, - BrowserContextWindowSize, -) - -logger = logging.getLogger(__name__) - - -async def deep_research(task, llm, agent_state=None, **kwargs): - task_id = str(uuid4()) - save_dir = kwargs.get("save_dir", os.path.join(f"./tmp/deep_research/{task_id}")) - logger.info(f"Save Deep Research at: {save_dir}") - os.makedirs(save_dir, exist_ok=True) - - # max qyery num per iteration - max_query_num = kwargs.get("max_query_num", 3) - - use_own_browser = kwargs.get("use_own_browser", False) - extra_chromium_args = [] - if use_own_browser: - # TODO: if use own browser, max query num must be 1 per iter, how to solve it? - max_query_num = 1 - chrome_path = os.getenv("CHROME_PATH", None) - if chrome_path == "": - chrome_path = None - chrome_user_data = os.getenv("CHROME_USER_DATA", None) - if chrome_user_data: - extra_chromium_args += [f"--user-data-dir={chrome_user_data}"] - - browser = CustomBrowser( - config=BrowserConfig( - headless=kwargs.get("headless", False), - disable_security=kwargs.get("disable_security", True), - chrome_instance_path=chrome_path, - extra_chromium_args=extra_chromium_args, - ) - ) - browser_context = await browser.new_context() - else: - browser = None - browser_context = None - - controller = CustomController() - - @controller.registry.action( - 'Extract page content to get the pure markdown.', - ) - async def extract_content(browser: BrowserContext): - page = await browser.get_current_page() - # use jina reader - url = page.url - - jina_url = f"https://r.jina.ai/{url}" - await page.goto(jina_url) - output_format = 'markdown' - content = MainContentExtractor.extract( # type: ignore - html=await page.content(), - output_format=output_format, - ) - # go back to org url - await page.go_back() - msg = f'Extracted page content:\n {content}\n' - logger.info(msg) - return ActionResult(extracted_content=msg) - - search_system_prompt = f""" - You are a **Deep Researcher**, an AI agent specializing in in-depth information gathering and research using a web browser with **automated execution capabilities**. Your expertise lies in formulating comprehensive research plans and executing them meticulously to fulfill complex user requests. You will analyze user instructions, devise a detailed research plan, and determine the necessary search queries to gather the required information. - - **Your Task:** - - Given a user's research topic, you will: - - 1. **Develop a Research Plan:** Outline the key aspects and subtopics that need to be investigated to thoroughly address the user's request. This plan should be a high-level overview of the research direction. - 2. **Generate Search Queries:** Based on your research plan, generate a list of specific search queries to be executed in a web browser. These queries should be designed to efficiently gather relevant information for each aspect of your plan. - - **Output Format:** - - Your output will be a JSON object with the following structure: - - ```json - {{ - "plan": "A concise, high-level research plan outlining the key areas to investigate.", - "queries": [ - "search query 1", - "search query 2", - //... up to a maximum of {max_query_num} search queries - ] - }} - ``` - - **Important:** - - * Limit your output to a **maximum of {max_query_num}** search queries. - * Make the search queries to help the automated agent find the needed information. Consider what keywords are most likely to lead to useful results. - * If you have gathered for all the information you want and no further search queries are required, output queries with an empty list: `[]` - * Make sure output search queries are different from the history queries. - - **Inputs:** - - 1. **User Instruction:** The original instruction given by the user. - 2. **Previous Queries:** History Queries. - 3. **Previous Search Results:** Textual data gathered from prior search queries. If there are no previous search results this string will be empty. - """ - search_messages = [SystemMessage(content=search_system_prompt)] - - record_system_prompt = """ - You are an expert information recorder. Your role is to process user instructions, current search results, and previously recorded information to extract, summarize, and record new, useful information that helps fulfill the user's request. Your output will be a JSON formatted list, where each element represents a piece of extracted information and follows the structure: `{"url": "source_url", "title": "source_title", "summary_content": "concise_summary", "thinking": "reasoning"}`. - -**Important Considerations:** - -1. **Minimize Information Loss:** While concise, prioritize retaining important details and nuances from the sources. Aim for a summary that captures the essence of the information without over-simplification. **Crucially, ensure to preserve key data and figures within the `summary_content`. This is essential for later stages, such as generating tables and reports.** - -2. **Avoid Redundancy:** Do not record information that is already present in the Previous Recorded Information. Check for semantic similarity, not just exact matches. However, if the same information is expressed differently in a new source and this variation adds valuable context or clarity, it should be included. - -3. **Source Information:** Extract and include the source title and URL for each piece of information summarized. This is crucial for verification and context. **The Current Search Results are provided in a specific format, where each item starts with "Title:", followed by the title, then "URL Source:", followed by the URL, and finally "Markdown Content:", followed by the content. Please extract the title and URL from this structure.** If a piece of information cannot be attributed to a specific source from the provided search results, use `"url": "unknown"` and `"title": "unknown"`. - -4. **Thinking and Report Structure:** For each extracted piece of information, add a `"thinking"` key. This field should contain your assessment of how this information could be used in a report, which section it might belong to (e.g., introduction, background, analysis, conclusion, specific subtopics), and any other relevant thoughts about its significance or connection to other information. - -**Output Format:** - -Provide your output as a JSON formatted list. Each item in the list must adhere to the following format: - -```json -[ - { - "url": "source_url_1", - "title": "source_title_1", - "summary_content": "Concise summary of content. Remember to include key data and figures here.", - "thinking": "This could be used in the introduction to set the context. It also relates to the section on the history of the topic." - }, - // ... more entries - { - "url": "unknown", - "title": "unknown", - "summary_content": "concise_summary_of_content_without_clear_source", - "thinking": "This might be useful background information, but I need to verify its accuracy. Could be used in the methodology section to explain how data was collected." - } -] -``` - -**Inputs:** - -1. **User Instruction:** The original instruction given by the user. This helps you determine what kind of information will be useful and how to structure your thinking. -2. **Previous Recorded Information:** Textual data gathered and recorded from previous searches and processing, represented as a single text string. -3. **Current Search Plan:** Research plan for current search. -4. **Current Search Query:** The current search query. -5. **Current Search Results:** Textual data gathered from the most recent search query. - """ - record_messages = [SystemMessage(content=record_system_prompt)] - - search_iteration = 0 - max_search_iterations = kwargs.get("max_search_iterations", 10) # Limit search iterations to prevent infinite loop - use_vision = kwargs.get("use_vision", False) - - history_query = [] - history_infos = [] - try: - while search_iteration < max_search_iterations: - search_iteration += 1 - logger.info(f"Start {search_iteration}th Search...") - history_query_ = json.dumps(history_query, indent=4) - history_infos_ = json.dumps(history_infos, indent=4) - query_prompt = f"This is search {search_iteration} of {max_search_iterations} maximum searches allowed.\n User Instruction:{task} \n Previous Queries:\n {history_query_} \n Previous Search Results:\n {history_infos_}\n" - search_messages.append(HumanMessage(content=query_prompt)) - ai_query_msg = llm.invoke(search_messages[:1] + search_messages[1:][-1:]) - search_messages.append(ai_query_msg) - if hasattr(ai_query_msg, "reasoning_content"): - logger.info("đŸ¤¯ Start Search Deep Thinking: ") - logger.info(ai_query_msg.reasoning_content) - logger.info("đŸ¤¯ End Search Deep Thinking") - ai_query_content = ai_query_msg.content.replace("```json", "").replace("```", "") - ai_query_content = repair_json(ai_query_content) - ai_query_content = json.loads(ai_query_content) - query_plan = ai_query_content["plan"] - logger.info(f"Current Iteration {search_iteration} Planing:") - logger.info(query_plan) - query_tasks = ai_query_content["queries"] - if not query_tasks: - break - else: - query_tasks = query_tasks[:max_query_num] - history_query.extend(query_tasks) - logger.info("Query tasks:") - logger.info(query_tasks) - - # 2. Perform Web Search and Auto exec - # Parallel BU agents - add_infos = "1. Please click on the most relevant link to get information and go deeper, instead of just staying on the search page. \n" \ - "2. When opening a PDF file, please remember to extract the content using extract_content instead of simply opening it for the user to view.\n" - if use_own_browser: - agent = CustomAgent( - task=query_tasks[0], - llm=llm, - add_infos=add_infos, - browser=browser, - browser_context=browser_context, - use_vision=use_vision, - system_prompt_class=CustomSystemPrompt, - agent_prompt_class=CustomAgentMessagePrompt, - max_actions_per_step=5, - controller=controller - ) - agent_result = await agent.run(max_steps=kwargs.get("max_steps", 10)) - query_results = [agent_result] - # Manually close all tab - session = await browser_context.get_session() - pages = session.context.pages - await browser_context.create_new_tab() - for page_id, page in enumerate(pages): - await page.close() - - else: - agents = [CustomAgent( - task=task, - llm=llm, - add_infos=add_infos, - browser=browser, - browser_context=browser_context, - use_vision=use_vision, - system_prompt_class=CustomSystemPrompt, - agent_prompt_class=CustomAgentMessagePrompt, - max_actions_per_step=5, - controller=controller, - ) for task in query_tasks] - query_results = await asyncio.gather( - *[agent.run(max_steps=kwargs.get("max_steps", 10)) for agent in agents]) - - if agent_state and agent_state.is_stop_requested(): - # Stop - break - # 3. Summarize Search Result - query_result_dir = os.path.join(save_dir, "query_results") - os.makedirs(query_result_dir, exist_ok=True) - for i in range(len(query_tasks)): - query_result = query_results[i].final_result() - if not query_result: - continue - querr_save_path = os.path.join(query_result_dir, f"{search_iteration}-{i}.md") - logger.info(f"save query: {query_tasks[i]} at {querr_save_path}") - with open(querr_save_path, "w", encoding="utf-8") as fw: - fw.write(f"Query: {query_tasks[i]}\n") - fw.write(query_result) - # split query result in case the content is too long - query_results_split = query_result.split("Extracted page content:") - for qi, query_result_ in enumerate(query_results_split): - if not query_result_: - continue - else: - # TODO: limit content lenght: 128k tokens, ~3 chars per token - query_result_ = query_result_[:128000 * 3] - history_infos_ = json.dumps(history_infos, indent=4) - record_prompt = f"User Instruction:{task}. \nPrevious Recorded Information:\n {history_infos_}\n Current Search Iteration: {search_iteration}\n Current Search Plan:\n{query_plan}\n Current Search Query:\n {query_tasks[i]}\n Current Search Results: {query_result_}\n " - record_messages.append(HumanMessage(content=record_prompt)) - ai_record_msg = llm.invoke(record_messages[:1] + record_messages[-1:]) - record_messages.append(ai_record_msg) - if hasattr(ai_record_msg, "reasoning_content"): - logger.info("đŸ¤¯ Start Record Deep Thinking: ") - logger.info(ai_record_msg.reasoning_content) - logger.info("đŸ¤¯ End Record Deep Thinking") - record_content = ai_record_msg.content - record_content = repair_json(record_content) - new_record_infos = json.loads(record_content) - history_infos.extend(new_record_infos) - if agent_state and agent_state.is_stop_requested(): - # Stop - break - - logger.info("\nFinish Searching, Start Generating Report...") - - # 5. Report Generation in Markdown (or JSON if you prefer) - return await generate_final_report(task, history_infos, save_dir, llm) - - except Exception as e: - logger.error(f"Deep research Error: {e}") - return await generate_final_report(task, history_infos, save_dir, llm, str(e)) - finally: - if browser: - await browser.close() - if browser_context: - await browser_context.close() - logger.info("Browser closed.") - -async def generate_final_report(task, history_infos, save_dir, llm, error_msg=None): - """Generate report from collected information with error handling""" - try: - logger.info("\nAttempting to generate final report from collected data...") - - writer_system_prompt = """ - You are a **Deep Researcher** and a professional report writer tasked with creating polished, high-quality reports that fully meet the user's needs, based on the user's instructions and the relevant information provided. You will write the report using Markdown format, ensuring it is both informative and visually appealing. - -**Specific Instructions:** - -* **Structure for Impact:** The report must have a clear, logical, and impactful structure. Begin with a compelling introduction that immediately grabs the reader's attention. Develop well-structured body paragraphs that flow smoothly and logically, and conclude with a concise and memorable conclusion that summarizes key takeaways and leaves a lasting impression. -* **Engaging and Vivid Language:** Employ precise, vivid, and descriptive language to make the report captivating and enjoyable to read. Use stylistic techniques to enhance engagement. Tailor your tone, vocabulary, and writing style to perfectly suit the subject matter and the intended audience to maximize impact and readability. -* **Accuracy, Credibility, and Citations:** Ensure that all information presented is meticulously accurate, rigorously truthful, and robustly supported by the available data. **Cite sources exclusively using bracketed sequential numbers within the text (e.g., [1], [2], etc.). If no references are used, omit citations entirely.** These numbers must correspond to a numbered list of references at the end of the report. -* **Publication-Ready Formatting:** Adhere strictly to Markdown formatting for excellent readability and a clean, highly professional visual appearance. Pay close attention to formatting details like headings, lists, emphasis, and spacing to optimize the visual presentation and reader experience. The report should be ready for immediate publication upon completion, requiring minimal to no further editing for style or format. -* **Conciseness and Clarity (Unless Specified Otherwise):** When the user does not provide a specific length, prioritize concise and to-the-point writing, maximizing information density while maintaining clarity. -* **Data-Driven Comparisons with Tables:** **When appropriate and beneficial for enhancing clarity and impact, present data comparisons in well-structured Markdown tables. This is especially encouraged when dealing with numerical data or when a visual comparison can significantly improve the reader's understanding.** -* **Length Adherence:** When the user specifies a length constraint, meticulously stay within reasonable bounds of that specification, ensuring the content is appropriately scaled without sacrificing quality or completeness. -* **Comprehensive Instruction Following:** Pay meticulous attention to all details and nuances provided in the user instructions. Strive to fulfill every aspect of the user's request with the highest degree of accuracy and attention to detail, creating a report that not only meets but exceeds expectations for quality and professionalism. -* **Reference List Formatting:** The reference list at the end must be formatted as follows: - `[1] Title (URL, if available)` - **Each reference must be separated by a blank line to ensure proper spacing.** For example: - - ``` - [1] Title 1 (URL1, if available) - - [2] Title 2 (URL2, if available) - ``` - **Furthermore, ensure that the reference list is free of duplicates. Each unique source should be listed only once, regardless of how many times it is cited in the text.** -* **ABSOLUTE FINAL OUTPUT RESTRICTION:** **Your output must contain ONLY the finished, publication-ready Markdown report. Do not include ANY extraneous text, phrases, preambles, meta-commentary, or markdown code indicators (e.g., "```markdown```"). The report should begin directly with the title and introductory paragraph, and end directly after the conclusion and the reference list (if applicable).** **Your response will be deemed a failure if this instruction is not followed precisely.** - -**Inputs:** - -1. **User Instruction:** The original instruction given by the user. This helps you determine what kind of information will be useful and how to structure your thinking. -2. **Search Information:** Information gathered from the search queries. - """ - - history_infos_ = json.dumps(history_infos, indent=4) - record_json_path = os.path.join(save_dir, "record_infos.json") - logger.info(f"save All recorded information at {record_json_path}") - with open(record_json_path, "w") as fw: - json.dump(history_infos, fw, indent=4) - report_prompt = f"User Instruction:{task} \n Search Information:\n {history_infos_}" - report_messages = [SystemMessage(content=writer_system_prompt), - HumanMessage(content=report_prompt)] # New context for report generation - ai_report_msg = llm.invoke(report_messages) - if hasattr(ai_report_msg, "reasoning_content"): - logger.info("đŸ¤¯ Start Report Deep Thinking: ") - logger.info(ai_report_msg.reasoning_content) - logger.info("đŸ¤¯ End Report Deep Thinking") - report_content = ai_report_msg.content - report_content = re.sub(r"^```\s*markdown\s*|^\s*```|```\s*$", "", report_content, flags=re.MULTILINE) - report_content = report_content.strip() - - # Add error notification to the report - if error_msg: - report_content = f"## âš ī¸ Research Incomplete - Partial Results\n" \ - f"**The research process was interrupted by an error:** {error_msg}\n\n" \ - f"{report_content}" - - report_file_path = os.path.join(save_dir, "final_report.md") - with open(report_file_path, "w", encoding="utf-8") as f: - f.write(report_content) - logger.info(f"Save Report at: {report_file_path}") - return report_content, report_file_path - - except Exception as report_error: - logger.error(f"Failed to generate partial report: {report_error}") - return f"Error generating report: {str(report_error)}", None diff --git a/src/utils/default_config_settings.py b/src/utils/default_config_settings.py index e6fa88f9..5daf83ca 100644 --- a/src/utils/default_config_settings.py +++ b/src/utils/default_config_settings.py @@ -12,8 +12,22 @@ def default_config(): "max_actions_per_step": 10, "use_vision": True, "tool_calling_method": "auto", - "llm_provider": "openai", - "llm_model_name": "gpt-4o", + "llm_provider": "bedrock", + "llm_model_name": "anthropic.claude-3-5-sonnet-20241022-v2:0", + "prerequisite": """ +import boto3 + +session = boto3.Session(region_name="us-west-2") +sagemaker_client = session.client("sagemaker") + +response = sagemaker_client.create_presigned_domain_url( + DomainId="d-1234567890", + UserProfileName="arkaprav-ssh-test" + ) + +PLACEHOLDER = response["AuthorizedUrl"] + +""", "llm_num_ctx": 32000, "llm_temperature": 1.0, "llm_base_url": "", diff --git a/tests/test_deep_research.py b/tests/test_deep_research.py deleted file mode 100644 index 762345d0..00000000 --- a/tests/test_deep_research.py +++ /dev/null @@ -1,30 +0,0 @@ -import asyncio -import os -from dotenv import load_dotenv - -load_dotenv() -import sys - -sys.path.append(".") - -async def test_deep_research(): - from src.utils.deep_research import deep_research - from src.utils import utils - - task = "write a report about DeepSeek-R1, get its pdf" - llm = utils.get_llm_model( - provider="gemini", - model_name="gemini-2.0-flash-thinking-exp-01-21", - temperature=1.0, - api_key=os.getenv("GOOGLE_API_KEY", "") - ) - - report_content, report_file_path = await deep_research(task=task, llm=llm, agent_state=None, - max_search_iterations=1, - max_query_num=3, - use_own_browser=False) - - - -if __name__ == "__main__": - asyncio.run(test_deep_research()) \ No newline at end of file diff --git a/webui.py b/webui.py index e770d99d..a07fe5b6 100644 --- a/webui.py +++ b/webui.py @@ -94,31 +94,6 @@ async def stop_agent(): gr.update(value="Stop", interactive=True), gr.update(interactive=True) ) - -async def stop_research_agent(): - """Request the agent to stop and update UI with enhanced feedback""" - global _global_agent_state, _global_browser_context, _global_browser - - try: - # Request stop - _global_agent_state.request_stop() - - # Update UI immediately - message = "Stop requested - the agent will halt at the next safe point" - logger.info(f"🛑 {message}") - - # Return UI updates - return ( # errors_output - gr.update(value="Stopping...", interactive=False), # stop_button - gr.update(interactive=False), # run_button - ) - except Exception as e: - error_msg = f"Error during stop: {str(e)}" - logger.error(error_msg) - return ( - gr.update(value="Stop", interactive=True), - gr.update(interactive=True) - ) async def run_browser_agent( agent_type, @@ -657,32 +632,6 @@ async def close_global_browser(): if _global_browser: await _global_browser.close() _global_browser = None - -async def run_deep_search(research_task, max_search_iteration_input, max_query_per_iter_input, llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, use_vision, use_own_browser, headless): - from src.utils.deep_research import deep_research - global _global_agent_state - - # Clear any previous stop request - _global_agent_state.clear_stop() - - llm = utils.get_llm_model( - provider=llm_provider, - model_name=llm_model_name, - num_ctx=llm_num_ctx, - temperature=llm_temperature, - base_url=llm_base_url, - api_key=llm_api_key, - ) - markdown_content, file_path = await deep_research(research_task, llm, _global_agent_state, - max_search_iterations=max_search_iteration_input, - max_query_num=max_query_per_iter_input, - use_vision=use_vision, - headless=headless, - use_own_browser=use_own_browser - ) - - return markdown_content, file_path, gr.update(value="Stop", interactive=True), gr.update(interactive=True) - def create_ui(config, theme_name="Ocean"): css = """ @@ -715,7 +664,40 @@ def create_ui(config, theme_name="Ocean"): ) with gr.Tabs() as tabs: - with gr.TabItem("âš™ī¸ Agent Settings", id=1): + with gr.TabItem("🤖 Run Agent", id=1): + prerequisite = gr.Textbox( + label="Prerequisite", + lines=10, + placeholder="Add any prerequisites...", + value=config['prerequisite'], + info="Optional prerequisites for the task", + ) + + task = gr.Textbox( + label="Task Description", + lines=4, + placeholder="Enter your task here...", + value=config['task'], + info="Describe what you want the agent to do", + ) + + add_infos = gr.Textbox( + label="Additional Information", + lines=3, + placeholder="Add any helpful context or instructions...", + info="Optional hints to help the LLM complete the task", + ) + + with gr.Row(): + run_button = gr.Button("â–ļī¸ Run Agent", variant="primary", scale=2) + stop_button = gr.Button("âšī¸ Stop", variant="stop", scale=1) + + with gr.Row(): + browser_view = gr.HTML( + value="

Waiting for browser session...

", + label="Live Browser View", + ) + with gr.TabItem("âš™ī¸ Agent Settings", id=2): with gr.Group(): agent_type = gr.Radio( ["org", "custom"], @@ -756,7 +738,7 @@ def create_ui(config, theme_name="Ocean"): visible=False ) - with gr.TabItem("🔧 LLM Configuration", id=2): + with gr.TabItem("🔧 LLM Configuration", id=3): with gr.Group(): llm_provider = gr.Dropdown( choices=[provider for provider,model in utils.model_names.items()], @@ -813,7 +795,7 @@ def update_llm_num_ctx_visibility(llm_provider): outputs=llm_num_ctx ) - with gr.TabItem("🌐 Browser Settings", id=3): + with gr.TabItem("🌐 Browser Settings", id=4): with gr.Group(): with gr.Row(): use_own_browser = gr.Checkbox( @@ -878,44 +860,7 @@ def update_llm_num_ctx_visibility(llm_provider): interactive=True, ) - with gr.TabItem("🤖 Run Agent", id=4): - task = gr.Textbox( - label="Task Description", - lines=4, - placeholder="Enter your task here...", - value=config['task'], - info="Describe what you want the agent to do", - ) - add_infos = gr.Textbox( - label="Additional Information", - lines=3, - placeholder="Add any helpful context or instructions...", - info="Optional hints to help the LLM complete the task", - ) - - with gr.Row(): - run_button = gr.Button("â–ļī¸ Run Agent", variant="primary", scale=2) - stop_button = gr.Button("âšī¸ Stop", variant="stop", scale=1) - - with gr.Row(): - browser_view = gr.HTML( - value="

Waiting for browser session...

", - label="Live Browser View", - ) - - with gr.TabItem("🧐 Deep Research", id=5): - research_task_input = gr.Textbox(label="Research Task", lines=5, value="Compose a report on the use of Reinforcement Learning for training Large Language Models, encompassing its origins, current advancements, and future prospects, substantiated with examples of relevant models and techniques. The report should reflect original insights and analysis, moving beyond mere summarization of existing literature.") - with gr.Row(): - max_search_iteration_input = gr.Number(label="Max Search Iteration", value=3, precision=0) # precision=0 įĄŽäŋæ˜¯æ•´æ•° - max_query_per_iter_input = gr.Number(label="Max Query per Iteration", value=1, precision=0) # precision=0 įĄŽäŋæ˜¯æ•´æ•° - with gr.Row(): - research_button = gr.Button("â–ļī¸ Run Deep Research", variant="primary", scale=2) - stop_research_button = gr.Button("âšī¸ Stop", variant="stop", scale=1) - markdown_output_display = gr.Markdown(label="Research Report") - markdown_download = gr.File(label="Download Research Report") - - - with gr.TabItem("📊 Results", id=6): + with gr.TabItem("📊 Results", id=5): with gr.Group(): recording_display = gr.Video(label="Latest Recording") @@ -973,19 +918,6 @@ def update_llm_num_ctx_visibility(llm_provider): run_button # Run button ], ) - - # Run Deep Research - research_button.click( - fn=run_deep_search, - inputs=[research_task_input, max_search_iteration_input, max_query_per_iter_input, llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, use_vision, use_own_browser, headless], - outputs=[markdown_output_display, markdown_download, stop_research_button, research_button] - ) - # Bind the stop button click event after errors_output is defined - stop_research_button.click( - fn=stop_research_agent, - inputs=[], - outputs=[stop_research_button, research_button], - ) with gr.TabItem("đŸŽĨ Recordings", id=7): def list_recordings(save_recording_path): From f917cded7ce15e69d5be879547e58891876b2b62 Mon Sep 17 00:00:00 2001 From: Arkaprava De Date: Wed, 26 Mar 2025 14:10:55 -0700 Subject: [PATCH 3/3] Fixed placeholders --- src/agent/custom_agent.py | 14 +++++++++- src/utils/default_config_settings.py | 10 +++++-- webui.py | 41 ++++++++++++++++++++++------ 3 files changed, 53 insertions(+), 12 deletions(-) diff --git a/src/agent/custom_agent.py b/src/agent/custom_agent.py index bfeb33ca..f68c0ce5 100644 --- a/src/agent/custom_agent.py +++ b/src/agent/custom_agent.py @@ -89,9 +89,13 @@ def __init__( tool_calling_method: Optional[str] = 'auto', page_extraction_llm: Optional[BaseChatModel] = None, planner_llm: Optional[BaseChatModel] = None, - planner_interval: int = 1, # Run planner every N steps + planner_interval: int = 1, # Run planner every N steps, + placeholders: Optional[str] = None, ): + # make placeholders available to the class + self.placeholders = placeholders + # Load sensitive data from environment variables env_sensitive_data = {} for key, value in os.environ.items(): @@ -239,6 +243,14 @@ async def get_next_action(self, input_messages: list[BaseMessage]) -> AgentOutpu ai_content = ai_content.replace("```json", "").replace("```", "") ai_content = repair_json(ai_content) + + + # Replace placeholders in ai_content with values from self.placeholders + for key, value in self.placeholders.items(): + print(key, value) + ai_content = ai_content.replace(key, value) + + parsed_json = json.loads(ai_content) parsed: AgentOutput = self.AgentOutput(**parsed_json) diff --git a/src/utils/default_config_settings.py b/src/utils/default_config_settings.py index 5daf83ca..58b54da5 100644 --- a/src/utils/default_config_settings.py +++ b/src/utils/default_config_settings.py @@ -15,17 +15,21 @@ def default_config(): "llm_provider": "bedrock", "llm_model_name": "anthropic.claude-3-5-sonnet-20241022-v2:0", "prerequisite": """ + import boto3 session = boto3.Session(region_name="us-west-2") sagemaker_client = session.client("sagemaker") response = sagemaker_client.create_presigned_domain_url( - DomainId="d-1234567890", + DomainId="d-8aldpksok8tq", UserProfileName="arkaprav-ssh-test" ) -PLACEHOLDER = response["AuthorizedUrl"] +PLACEHOLDERS={} +PLACEHOLDERS["PLACEHOLDER_URL"] = response["AuthorizedUrl"] + + """, "llm_num_ctx": 32000, @@ -42,7 +46,7 @@ def default_config(): "save_recording_path": "./tmp/record_videos", "save_trace_path": "./tmp/traces", "save_agent_history_path": "./tmp/agent_history", - "task": "go to google.com and type 'OpenAI' click search and give me the first url", + "task": "open PLACEHOLDER_URL and open space names test1234 else create it", } diff --git a/webui.py b/webui.py index a07fe5b6..eb6d510b 100644 --- a/webui.py +++ b/webui.py @@ -118,7 +118,8 @@ async def run_browser_agent( max_steps, use_vision, max_actions_per_step, - tool_calling_method + tool_calling_method, + placeholders ): global _global_agent_state _global_agent_state.clear_stop() # Clear any previous stop requests @@ -186,7 +187,8 @@ async def run_browser_agent( max_steps=max_steps, use_vision=use_vision, max_actions_per_step=max_actions_per_step, - tool_calling_method=tool_calling_method + tool_calling_method=tool_calling_method, + placeholders=placeholders ) else: raise ValueError(f"Invalid agent type: {agent_type}") @@ -345,7 +347,8 @@ async def run_custom_agent( max_steps, use_vision, max_actions_per_step, - tool_calling_method + tool_calling_method, + placeholders ): try: global _global_browser, _global_browser_context, _global_agent_state, _global_agent @@ -402,7 +405,8 @@ async def run_custom_agent( system_prompt_class=CustomSystemPrompt, agent_prompt_class=CustomAgentMessagePrompt, max_actions_per_step=max_actions_per_step, - tool_calling_method=tool_calling_method + tool_calling_method=tool_calling_method, + placeholders=placeholders ) history = await _global_agent.run(max_steps=max_steps) @@ -457,8 +461,27 @@ async def run_with_stream( max_steps, use_vision, max_actions_per_step, - tool_calling_method + tool_calling_method, + prerequisite ): + + # Execute the prerequisite variable as Python code + global_vars = {} + try: + if prerequisite.strip(): + exec(prerequisite, globals(), global_vars) + except Exception as e: + raise RuntimeError(f"Error executing prerequisite: {str(e)}") + + # Export all global variables into a map + + # get PLACEHOLDERS key value as dict from global_vars.items() + placeholders = dict(global_vars.items())["PLACEHOLDERS"] + + print("arkaprava") + print(placeholders) + + global _global_agent_state stream_vw = 80 stream_vh = int(80 * window_h // window_w) @@ -486,7 +509,8 @@ async def run_with_stream( max_steps=max_steps, use_vision=use_vision, max_actions_per_step=max_actions_per_step, - tool_calling_method=tool_calling_method + tool_calling_method=tool_calling_method, + placeholders=placeholders ) # Add HTML content at the start of the result array html_content = f"

Using browser...

" @@ -519,7 +543,8 @@ async def run_with_stream( max_steps=max_steps, use_vision=use_vision, max_actions_per_step=max_actions_per_step, - tool_calling_method=tool_calling_method + tool_calling_method=tool_calling_method, + placeholders=placeholders ) ) @@ -903,7 +928,7 @@ def update_llm_num_ctx_visibility(llm_provider): agent_type, llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, use_own_browser, keep_browser_open, headless, disable_security, window_w, window_h, save_recording_path, save_agent_history_path, save_trace_path, # Include the new path - enable_recording, task, add_infos, max_steps, use_vision, max_actions_per_step, tool_calling_method + enable_recording, task, add_infos, max_steps, use_vision, max_actions_per_step, tool_calling_method, prerequisite ], outputs=[ browser_view, # Browser view