diff --git a/Dockerfile b/Dockerfile index 7b6d39fe..44d47511 100644 --- a/Dockerfile +++ b/Dockerfile @@ -71,11 +71,11 @@ ENV BROWSER_USE_LOGGING_LEVEL=info ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome ENV ANONYMIZED_TELEMETRY=false ENV DISPLAY=:99 -ENV RESOLUTION=1920x1080x24 +ENV RESOLUTION=960x540x24 ENV VNC_PASSWORD=vncpassword ENV CHROME_PERSISTENT_SESSION=true -ENV RESOLUTION_WIDTH=1920 -ENV RESOLUTION_HEIGHT=1080 +ENV RESOLUTION_WIDTH=960 +ENV RESOLUTION_HEIGHT=540 # Set up supervisor configuration RUN mkdir -p /var/log/supervisor diff --git a/custom_theme.py b/custom_theme.py new file mode 100644 index 00000000..80c25394 --- /dev/null +++ b/custom_theme.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +from collections.abc import Iterable + +from gradio.themes.base import Base +from gradio.themes.utils import colors, fonts, sizes + + +class custom_theme(Base): + def __init__( + self, + *, + primary_hue: colors.Color | str = colors.blue, + secondary_hue: colors.Color | str = colors.sky, + neutral_hue: colors.Color | str = colors.gray, + spacing_size: sizes.Size | str = sizes.spacing_md, + radius_size: sizes.Size | str = sizes.radius_lg, + text_size: sizes.Size | str = sizes.text_md, + font: fonts.Font | str | Iterable[fonts.Font | str] = ( + fonts.GoogleFont("Montserrat"), + "ui-sans-serif", + "system-ui", + "sans-serif", + ), + font_mono: fonts.Font | str | Iterable[fonts.Font | str] = ( + fonts.GoogleFont("Inter"), + "ui-monospace", + "Consolas", + "monospace", + ), + ): + super().__init__( + primary_hue=primary_hue, + secondary_hue=secondary_hue, + neutral_hue=neutral_hue, + spacing_size=spacing_size, + radius_size=radius_size, + text_size=text_size, + font=font, + font_mono=font_mono, + ) + self.name = "custom_theme" + super().set( + button_border_width="0px", + checkbox_label_border_width="1px", + button_transform_hover="scale(1.02)", + button_transition="all 0.1s ease-in-out", + slider_color="*primary_400", + button_primary_background_fill="linear-gradient(120deg, *secondary_500 0%, *primary_300 60%, *primary_400 100%)", + button_primary_background_fill_hover="linear-gradient(120deg, *secondary_400 0%, *primary_300 60%, *primary_300 100%)", + button_primary_text_color="*button_secondary_text_color", + button_secondary_background_fill="linear-gradient(120deg, *neutral_300 0%, *neutral_100 60%, *neutral_200 100%)", + button_secondary_background_fill_hover="linear-gradient(120deg, *neutral_200 0%, *neutral_100 60%, *neutral_100 100%)", + checkbox_label_background_fill_selected="linear-gradient(120deg, *primary_400 0%, *primary_300 60%, *primary_400 100%)", + checkbox_label_border_color_selected="*primary_400", + checkbox_background_color_selected="*primary_400", + checkbox_label_text_color_selected="*button_secondary_text_color", + slider_color_dark="*primary_500", + button_primary_background_fill_dark="linear-gradient(120deg, *secondary_600 0%, *primary_500 60%, *primary_600 100%)", + button_primary_background_fill_hover_dark="linear-gradient(120deg, *secondary_500 0%, *primary_500 60%, *primary_500 100%)", + button_primary_text_color_dark="*button_secondary_text_color", + button_secondary_background_fill_dark="linear-gradient(120deg, *neutral_700 0%, *neutral_600 60%, *neutral_700 100%)", + button_secondary_background_fill_hover_dark="linear-gradient(120deg, *neutral_600 0%, *neutral_600 60%, *neutral_700 100%)", + checkbox_label_background_fill_selected_dark="linear-gradient(120deg, *primary_600 0%, *primary_500 60%, *primary_600 100%)", + checkbox_label_border_color_selected_dark="*primary_600", + checkbox_background_color_selected_dark="*primary_600", + checkbox_label_text_color_selected_dark="*button_secondary_text_color", + block_shadow="*shadow_drop_lg", + button_secondary_shadow_hover="*shadow_drop_lg", + button_primary_shadow_hover="0 1px 3px 0 *primary_200, 0 1px 2px -1px *primary_200", + button_secondary_shadow_dark="none", + button_primary_shadow_dark="none", + ) diff --git a/docker-setup.ps1 b/docker-setup.ps1 new file mode 100644 index 00000000..82e9ceae --- /dev/null +++ b/docker-setup.ps1 @@ -0,0 +1,2 @@ +docker build -t agent . +docker run -p 7788:7788 -p 6080:6080 -p 5901:5901 agent \ No newline at end of file diff --git a/logo.png b/logo.png new file mode 100644 index 00000000..99ebb6fc Binary files /dev/null and b/logo.png differ diff --git a/prompts/prompt.text b/prompts/prompt.text new file mode 100644 index 00000000..f55fc85d --- /dev/null +++ b/prompts/prompt.text @@ -0,0 +1,47 @@ +Step-by-Step Instructions + +1. Navigate to Athenahealth Preview Environment + - Open a web browser and go to: https://preview.athenahealth.com/ + +2. Log In + - Enter the credentials: + - Username: p-bkumar1 + - Password: Xcaliber@12345 + - Click the Login button. + +3. Select the Default Department + - If prompted, choose the default department from the list (e.g., "7 Hills Department"). + +4. Access the "Patients" Menu + - Locate the header at the top of the dashboard. + - Click on the "Patients" menu to open the dropdown. + +5. Access Document Search + - Option 1 (Primary Attempt) + - In the dropdown, look for "Documents > Document Search" and click it. + - If the primary attempt fails (error 404 or element not found): + - Refresh the page. + - Retry clicking "Document Search" (up to 3 times with 2-second intervals). + +6. Handle Iframes (Fallback Approach) + - Use the following sequence if Document Search is nested in iframes: + - Switch to the main iframe context: + - Locate and switch to iframe[name="frMain"]. + - Switch to the sub-iframe: + - Locate and switch to iframe[id="searchFrame"] or iframe[name="frMain"] > iframe (if nested). + - Fill the DOCUMENTID and click Search: + - Enter the value "116873" in the DOCUMENTID field. + - Click the "Search" button. + - Retry up to 3 times: + - Wait 2 seconds between each retry if elements are missing. + +7. Observe and Report + - After clicking "Document Search" or executing the iframe fallback: + +Common Issues and Solutions +- Element Not Found: Ensure the iframe is fully loaded (wait for 5–10 seconds). +- Button Not Clickable: try to force click button again. +- Network Errors: Verify your internet connection and retry the login process. + +Result +- After following the steps, patient lab reports will be displayed. The task is completed successfully. diff --git a/setup.ps1 b/setup.ps1 new file mode 100644 index 00000000..6a2d69fd --- /dev/null +++ b/setup.ps1 @@ -0,0 +1,18 @@ +deactivate + +Remove-Item -Recurse -Force .venv + +# Step 2: Set Up Python Environment +uv venv --python 3.11 + +# Activate the virtual environment +.\.venv\Scripts\Activate.ps1 + +# Step 3: Install Dependencies +uv pip install -r requirements.txt +playwright install + + +# Step 4: Run web ui in local +python webui.py --ip 127.0.0.1 --port 7788 +Write-Output "Setup complete. Virtual environment activated." diff --git a/src/agent/custom_agent.py b/src/agent/custom_agent.py index bfeb33ca..27379cc0 100644 --- a/src/agent/custom_agent.py +++ b/src/agent/custom_agent.py @@ -53,15 +53,15 @@ def __init__( browser: Browser | None = None, browser_context: BrowserContext | None = None, controller: Controller = Controller(), - use_vision: bool = True, + use_vision: bool = False, use_vision_for_planner: bool = False, save_conversation_path: Optional[str] = None, save_conversation_path_encoding: Optional[str] = 'utf-8', - max_failures: int = 3, + max_failures: int = 5, retry_delay: int = 10, system_prompt_class: Type[SystemPrompt] = SystemPrompt, agent_prompt_class: Type[AgentMessagePrompt] = AgentMessagePrompt, - max_input_tokens: int = 128000, + max_input_tokens: int = 1280000, validate_output: bool = False, message_context: Optional[str] = None, generate_gif: bool | str = True, @@ -281,8 +281,8 @@ async def _run_planner(self) -> Optional[str]: planner_messages[-1] = HumanMessage(content=new_msg) # Get planner output - response = await self.planner_llm.ainvoke(planner_messages) - plan = response.content + response = await self.ainvoke(planner_messages) + plan = response.contentplanner_llm last_state_message = planner_messages[-1] # remove image from last state message if isinstance(last_state_message.content, list): diff --git a/src/agent/custom_prompts.py b/src/agent/custom_prompts.py index ab8c9a1e..6b0e9d7b 100644 --- a/src/agent/custom_prompts.py +++ b/src/agent/custom_prompts.py @@ -43,6 +43,12 @@ def important_rules(self) -> str: {"go_to_url": {"url": "https://example.com"}}, {"extract_page_content": {}} ] + - Iframe interaction: [ + {"switch_frame": {"frame_name": "GlobalNav"}}, + {"click_element": {"index": 1}}, + {"switch_frame": {"frame_name": "frameContent"}}, + {"click_element": {"index": 2}} + ] 3. ELEMENT INTERACTION: @@ -82,8 +88,39 @@ def important_rules(self) -> str: - Only provide the action sequence until you think the page will change. - Try to be efficient, e.g. fill forms at once, or chain actions where nothing changes on the page like saving, extracting, checkboxes... - only use multiple actions if it makes sense. - -9. Extraction: +9. IFrames: + - Identify iframes using their names or unique identifiers + - Switch to iframes before interacting with nested elements + - Use frame locators for element interaction within iframes + - Example action sequence for iframe interaction: + [ + {"switch_frame": {"frame_name": "GlobalNav"}}, + {"click_element": {"index": 1}}, + {"switch_frame": {"frame_name": "frameContent"}}, + {"click_element": {"index": 2}} + ] + - Always return to the main frame after iframe operations + - Handle nested iframes by chaining switch_frame actions + 10. Action Sequencing for Iframes: + - Always start iframe interactions with switch_frame + - Perform all element interactions within the iframe context + - Use back_to_main_frame after completing iframe operations + - For nested iframes, chain switch_frame actions + - Example nested iframe sequence: + [ + {"switch_frame": {"frame_name": "outerFrame"}}, + {"switch_frame": {"frame_name": "innerFrame"}}, + {"click_element": {"index": 1}}, + {"back_to_main_frame": {}} + ] + + 11. Visual Context for Iframes: + - Bounding boxes for iframe elements will have frame name labels + - Example: [GlobalNav] + - Use frame labels to identify element context + - Elements without frame labels are in the main page + +12. Extraction: - If your task is to find information or do research - call extract_content on the specific pages to get and store the information. """ diff --git a/src/utils/default_config_settings.py b/src/utils/default_config_settings.py index e6fa88f9..5d7c6662 100644 --- a/src/utils/default_config_settings.py +++ b/src/utils/default_config_settings.py @@ -10,7 +10,7 @@ def default_config(): "agent_type": "custom", "max_steps": 100, "max_actions_per_step": 10, - "use_vision": True, + "use_vision": False, "tool_calling_method": "auto", "llm_provider": "openai", "llm_model_name": "gpt-4o", diff --git a/supervisord.conf b/supervisord.conf index 3410b912..a59a94e5 100644 --- a/supervisord.conf +++ b/supervisord.conf @@ -1,3 +1,100 @@ +# [supervisord] +# user=root +# nodaemon=true +# logfile=/dev/stdout +# logfile_maxbytes=0 +# loglevel=debug + +# [program:xvfb] +# command=Xvfb :99 -screen 0 %(ENV_RESOLUTION)s -ac +extension GLX +render -noreset +# autorestart=true +# stdout_logfile=/dev/stdout +# stdout_logfile_maxbytes=0 +# stderr_logfile=/dev/stderr +# stderr_logfile_maxbytes=0 +# priority=100 +# startsecs=3 +# stopsignal=TERM +# stopwaitsecs=10 + +# [program:vnc_setup] +# command=bash -c "mkdir -p ~/.vnc && echo '%(ENV_VNC_PASSWORD)s' | vncpasswd -f > ~/.vnc/passwd && chmod 600 ~/.vnc/passwd && ls -la ~/.vnc/passwd" +# autorestart=false +# startsecs=0 +# priority=150 +# stdout_logfile=/dev/stdout +# stdout_logfile_maxbytes=0 +# stderr_logfile=/dev/stderr +# stderr_logfile_maxbytes=0 + +# [program:x11vnc] +# command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && chmod 666 /var/log/x11vnc.log && sleep 5 && DISPLAY=:99 x11vnc -display :99 -forever -shared -rfbauth /root/.vnc/passwd -bg -rfbport 5901 -o /var/log/x11vnc.log" +# autorestart=true +# stdout_logfile=/dev/stdout +# stdout_logfile_maxbytes=0 +# stderr_logfile=/dev/stderr +# stderr_logfile_maxbytes=0 +# priority=200 +# startretries=10 +# startsecs=10 +# stopsignal=TERM +# stopwaitsecs=10 +# depends_on=vnc_setup,xvfb + +# [program:x11vnc_log] +# command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && tail -f /var/log/x11vnc.log" +# autorestart=true +# stdout_logfile=/dev/stdout +# stdout_logfile_maxbytes=0 +# stderr_logfile=/dev/stderr +# stderr_logfile_maxbytes=0 +# priority=250 +# stopsignal=TERM +# stopwaitsecs=5 +# depends_on=x11vnc + +# [program:novnc] +# command=bash -c "sleep 5 && cd /opt/novnc && ./utils/novnc_proxy --vnc localhost:5901 --listen 0.0.0.0:6080 --web /opt/novnc --http-header='Content-Security-Policy: frame-ancestors http://localhost:7788/'" +# autorestart=true +# stdout_logfile=/dev/stdout +# stdout_logfile_maxbytes=0 +# stderr_logfile=/dev/stderr +# stderr_logfile_maxbytes=0 +# priority=300 +# startretries=5 +# startsecs=3 +# depends_on=x11vnc + +# [program:persistent_browser] +# environment=START_URL="data:text/html,

Browser Ready

" +# command=bash -c "mkdir -p /app/data/chrome_data && sleep 8 && $(find /ms-playwright/chromium-*/chrome-linux -name chrome) --user-data-dir=/app/data/chrome_data --window-position=0,0 --window-size=%(ENV_RESOLUTION_WIDTH)s,%(ENV_RESOLUTION_HEIGHT)s --start-maximized --no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-setuid-sandbox --no-first-run --no-default-browser-check --no-experiments --ignore-certificate-errors --remote-debugging-port=9222 --remote-debugging-address=0.0.0.0 \"$START_URL\"" +# autorestart=true +# stdout_logfile=/dev/stdout +# stdout_logfile_maxbytes=0 +# stderr_logfile=/dev/stderr +# stderr_logfile_maxbytes=0 +# priority=350 +# startretries=5 +# startsecs=10 +# stopsignal=TERM +# stopwaitsecs=15 +# depends_on=novnc + +# [program:webui] +# command=python webui.py --ip 0.0.0.0 --port 7788 +# directory=/app +# autorestart=true +# stdout_logfile=/dev/stdout +# stdout_logfile_maxbytes=0 +# stderr_logfile=/dev/stderr +# stderr_logfile_maxbytes=0 +# priority=400 +# startretries=3 +# startsecs=3 +# stopsignal=TERM +# stopwaitsecs=10 +# depends_on=persistent_browser + [supervisord] user=root nodaemon=true @@ -17,18 +114,18 @@ startsecs=3 stopsignal=TERM stopwaitsecs=10 -[program:vnc_setup] -command=bash -c "mkdir -p ~/.vnc && echo '%(ENV_VNC_PASSWORD)s' | vncpasswd -f > ~/.vnc/passwd && chmod 600 ~/.vnc/passwd && ls -la ~/.vnc/passwd" -autorestart=false -startsecs=0 -priority=150 -stdout_logfile=/dev/stdout -stdout_logfile_maxbytes=0 -stderr_logfile=/dev/stderr -stderr_logfile_maxbytes=0 +# [program:vnc_setup] +# command=bash -c "mkdir -p ~/.vnc && echo '%(ENV_VNC_PASSWORD)s' | vncpasswd -f > ~/.vnc/passwd && chmod 600 ~/.vnc/passwd && ls -la ~/.vnc/passwd" +# autorestart=false +# startsecs=0 +# priority=150 +# stdout_logfile=/dev/stdout +# stdout_logfile_maxbytes=0 +# stderr_logfile=/dev/stderr +# stderr_logfile_maxbytes=0 [program:x11vnc] -command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && chmod 666 /var/log/x11vnc.log && sleep 5 && DISPLAY=:99 x11vnc -display :99 -forever -shared -rfbauth /root/.vnc/passwd -rfbport 5901 -o /var/log/x11vnc.log" +command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && chmod 666 /var/log/x11vnc.log && sleep 5 && DISPLAY=:99 x11vnc -display :99 -nopw -forever -shared -bg -rfbport 5901 -o /var/log/x11vnc.log" autorestart=true stdout_logfile=/dev/stdout stdout_logfile_maxbytes=0 @@ -39,7 +136,7 @@ startretries=10 startsecs=10 stopsignal=TERM stopwaitsecs=10 -depends_on=vnc_setup,xvfb +depends_on=xvfb [program:x11vnc_log] command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && tail -f /var/log/x11vnc.log" @@ -93,4 +190,4 @@ startretries=3 startsecs=3 stopsignal=TERM stopwaitsecs=10 -depends_on=persistent_browser +depends_on=persistent_browser \ No newline at end of file diff --git a/webui.py b/webui.py index e770d99d..2adaf628 100644 --- a/webui.py +++ b/webui.py @@ -1,5 +1,8 @@ import pdb import logging +import requests + + from dotenv import load_dotenv @@ -32,6 +35,7 @@ from src.browser.custom_context import BrowserContextConfig, CustomBrowserContext from src.controller.custom_controller import CustomController from gradio.themes import Citrus, Default, Glass, Monochrome, Ocean, Origin, Soft, Base +from custom_theme import custom_theme from src.utils.default_config_settings import default_config, load_config_from_file, save_config_to_file, save_current_config, update_ui_from_config from src.utils.utils import update_model_dropdown, get_latest_files, capture_screenshot @@ -68,6 +72,15 @@ def resolve_sensitive_env_variables(text): return result +def open_modal(): + return gr.update(visible=True) + +def close_modal(): + return gr.update(visible=False) + +def show_iframe_and_save(): + return gr.update(visible=True), gr.update(visible=True) + async def stop_agent(): """Request the agent to stop and update UI with enhanced feedback""" global _global_agent_state, _global_browser_context, _global_browser, _global_agent @@ -644,7 +657,8 @@ async def run_with_stream( "Origin": Origin(), "Citrus": Citrus(), "Ocean": Ocean(), - "Base": Base() + "Base": Base(), + "custom_theme": custom_theme() } async def close_global_browser(): @@ -682,9 +696,50 @@ async def run_deep_search(research_task, max_search_iteration_input, max_query_p ) return markdown_content, file_path, gr.update(value="Stop", interactive=True), gr.update(interactive=True) +import requests + + +def send_post_request(title, task): + try: + print(f"🔍 title: {title}, task: {task}") + + base_url = os.getenv("BASE_URL") + if not base_url: + raise ValueError("BASE_URL environment variable is not set") + + url = f"{base_url}/agent/operations" + payload = { + "title": title, + "prompt": task + } + headers = {"Content-Type": "application/json"} + + print(f"đŸ“Ļ Payload: {payload}") + + response = requests.post(url, json=payload, headers=headers) + + print(f"🛑 Response Status Code: {response.status_code}") + print(f"📩 Response Content: {response.text}") + + response.raise_for_status() # Raises an HTTPError for bad responses (4xx and 5xx) + + return "Title and Task saved successfully!" + + except requests.exceptions.RequestException as e: + print(f" Request Error: {e}") + return f"Error: {e}" + except ValueError as ve: + print(f" Value Error: {ve}") + return str(ve) + + except Exception as ex: + print(f"âš ī¸ Unexpected Error: {ex}") + return f"Unexpected Error: {ex}" + + -def create_ui(config, theme_name="Ocean"): +def create_ui(config, theme_name="custom_theme"): css = """ .gradio-container { max-width: 1200px !important; @@ -700,367 +755,880 @@ def create_ui(config, theme_name="Ocean"): padding: 15px; border-radius: 10px; } + footer { + display: none !important; /* Hide the footer */ + } + #built-with-gradio, #settings { + display: none !important; + } """ + custom_favicon = """ + + """ + + + with gr.Blocks( - title="Browser Use WebUI", theme=theme_map[theme_name], css=css + title="EHR Operator", theme=theme_map[theme_name], css="body { display: flex; justify-content: center; } #main-container { max-width: 1200px; width: 100%; } footer { display: none !important; }" ) as demo: - with gr.Row(): - gr.Markdown( - """ - # 🌐 Browser Use WebUI - ### Control your browser with AI assistance - """, - elem_classes=["header-text"], - ) + gr.HTML(custom_favicon) + + # with gr.Row(): + # gr.Markdown( + # """ + #

🌐 EHR Operator

+ #

Control your EHR via prompts

+ # """, + # elem_classes=["header-text"], + # ) - with gr.Tabs() as tabs: - with gr.TabItem("âš™ī¸ Agent Settings", id=1): - with gr.Group(): - agent_type = gr.Radio( - ["org", "custom"], - label="Agent Type", - value=config['agent_type'], - info="Select the type of agent to use", - ) - with gr.Column(): - max_steps = gr.Slider( - minimum=1, - maximum=200, - value=config['max_steps'], - step=1, - label="Max Run Steps", - info="Maximum number of steps the agent will take", - ) - max_actions_per_step = gr.Slider( - minimum=1, - maximum=20, - value=config['max_actions_per_step'], - step=1, - label="Max Actions per Step", - info="Maximum number of actions the agent will take per step", - ) - with gr.Column(): - use_vision = gr.Checkbox( - label="Use Vision", - value=config['use_vision'], - info="Enable visual processing capabilities", - ) - tool_calling_method = gr.Dropdown( - label="Tool Calling Method", - value=config['tool_calling_method'], - interactive=True, - allow_custom_value=True, # Allow users to input custom model names - choices=["auto", "json_schema", "function_calling"], - info="Tool Calls Funtion Name", - visible=False + + with gr.Blocks(elem_id="main-container"): + # Main Row (Contains left-column and right-column iframe) + with gr.Row(equal_height=True): + with gr.Column(scale=1, min_width=480, elem_id="left-column"): + with gr.Group(): + title = gr.Textbox( + label="Operation Title", + lines=1, + placeholder="Enter your title here...", + value="Sample Title", ) - with gr.TabItem("🔧 LLM Configuration", id=2): - with gr.Group(): - llm_provider = gr.Dropdown( - choices=[provider for provider,model in utils.model_names.items()], - label="LLM Provider", - value=config['llm_provider'], - info="Select your preferred language model provider" - ) - llm_model_name = gr.Dropdown( - label="Model Name", - choices=utils.model_names['openai'], - value=config['llm_model_name'], - interactive=True, - allow_custom_value=True, # Allow users to input custom model names - info="Select a model from the dropdown or type a custom model name" - ) - llm_num_ctx = gr.Slider( - minimum=2**8, - maximum=2**16, - value=config['llm_num_ctx'], - step=1, - label="Max Context Length", - info="Controls max context length model needs to handle (less = faster)", - visible=config['llm_provider'] == "ollama" - ) - llm_temperature = gr.Slider( - minimum=0.0, - maximum=2.0, - value=config['llm_temperature'], - step=0.1, - label="Temperature", - info="Controls randomness in model outputs" - ) - with gr.Row(): - llm_base_url = gr.Textbox( - label="Base URL", - value=config['llm_base_url'], - info="API endpoint URL (if required)" - ) - llm_api_key = gr.Textbox( - label="API Key", - type="password", - value=config['llm_api_key'], - info="Your API key (leave blank to use .env)" + task = gr.Textbox( + label="Task Description", + lines=10, + placeholder="Enter your task here...", + value="Your Task Here", + info="Describe what you want the agent to do", ) - # Change event to update context length slider - def update_llm_num_ctx_visibility(llm_provider): - return gr.update(visible=llm_provider == "ollama") - - # Bind the change event of llm_provider to update the visibility of context length slider - llm_provider.change( - fn=update_llm_num_ctx_visibility, - inputs=llm_provider, - outputs=llm_num_ctx - ) - - with gr.TabItem("🌐 Browser Settings", id=3): - with gr.Group(): - with gr.Row(): - use_own_browser = gr.Checkbox( - label="Use Own Browser", - value=config['use_own_browser'], - info="Use your existing browser instance", - ) - keep_browser_open = gr.Checkbox( - label="Keep Browser Open", - value=config['keep_browser_open'], - info="Keep Browser Open between Tasks", - ) - headless = gr.Checkbox( - label="Headless Mode", - value=config['headless'], - info="Run browser without GUI", - ) - disable_security = gr.Checkbox( - label="Disable Security", - value=config['disable_security'], - info="Disable browser security features", - ) - enable_recording = gr.Checkbox( - label="Enable Recording", - value=config['enable_recording'], - info="Enable saving browser recordings", + add_infos = gr.Textbox( + label="Additional Information", + lines=4, + placeholder="Add any helpful context or instructions...", + info="Optional hints to help the LLM complete the task", ) with gr.Row(): - window_w = gr.Number( - label="Window Width", - value=config['window_w'], - info="Browser window width", - ) - window_h = gr.Number( - label="Window Height", - value=config['window_h'], - info="Browser window height", - ) - - save_recording_path = gr.Textbox( - label="Recording Path", - placeholder="e.g. ./tmp/record_videos", - value=config['save_recording_path'], - info="Path to save browser recordings", - interactive=True, # Allow editing only if recording is enabled + run_button = gr.Button("Run Agent", variant="primary", scale=1) + stop_button = gr.Button("Stop", variant="stop", scale=1) + save_button = gr.Button("Save", variant="secondary", scale=1, visible=False) # Initially Hidden + # Initially hidden iframe column inside the SAME Row + with gr.Column(scale=3, min_width=720, elem_id="right-column", visible=False) as iframe_row: + gr.HTML( + """ +
+ +
+ """ ) - save_trace_path = gr.Textbox( - label="Trace Path", - placeholder="e.g. ./tmp/traces", - value=config['save_trace_path'], - info="Path to save Agent traces", - interactive=True, - ) + # Button click will now reveal the iframe in the SAME row + run_button.click(show_iframe_and_save, outputs=[iframe_row, save_button]) + + # Save button click triggers POST request + save_button.click(send_post_request, inputs=[title, task], outputs=None) + + + + gr.HTML( + """ + + """ + ) - save_agent_history_path = gr.Textbox( - label="Agent History Save Path", - placeholder="e.g., ./tmp/agent_history", - value=config['save_agent_history_path'], - info="Specify the directory where agent history should be saved.", - interactive=True, - ) + with gr.Row(elem_id="settings-button-container"): + open_modal_button = gr.Button("âš™ī¸", variant="secondary", elem_id="settings-button",visible =False) - with gr.TabItem("🤖 Run Agent", id=4): - task = gr.Textbox( - label="Task Description", - lines=4, - placeholder="Enter your task here...", - value=config['task'], - info="Describe what you want the agent to do", - ) - add_infos = gr.Textbox( - label="Additional Information", - lines=3, - placeholder="Add any helpful context or instructions...", - info="Optional hints to help the LLM complete the task", - ) - with gr.Row(): - run_button = gr.Button("â–ļī¸ Run Agent", variant="primary", scale=2) - stop_button = gr.Button("âšī¸ Stop", variant="stop", scale=1) + # Modal Container (Initially Hidden) + with gr.Group(visible=False) as modal: + with gr.Blocks(css=".tab-container { min-width: 800px; }"): + with gr.Tabs() as tabs: - with gr.Row(): - browser_view = gr.HTML( - value="

Waiting for browser session...

", - label="Live Browser View", - ) - - with gr.TabItem("🧐 Deep Research", id=5): - research_task_input = gr.Textbox(label="Research Task", lines=5, value="Compose a report on the use of Reinforcement Learning for training Large Language Models, encompassing its origins, current advancements, and future prospects, substantiated with examples of relevant models and techniques. The report should reflect original insights and analysis, moving beyond mere summarization of existing literature.") - with gr.Row(): - max_search_iteration_input = gr.Number(label="Max Search Iteration", value=3, precision=0) # precision=0 įĄŽäŋæ˜¯æ•´æ•° - max_query_per_iter_input = gr.Number(label="Max Query per Iteration", value=1, precision=0) # precision=0 įĄŽäŋæ˜¯æ•´æ•° - with gr.Row(): - research_button = gr.Button("â–ļī¸ Run Deep Research", variant="primary", scale=2) - stop_research_button = gr.Button("âšī¸ Stop", variant="stop", scale=1) - markdown_output_display = gr.Markdown(label="Research Report") - markdown_download = gr.File(label="Download Research Report") - + with gr.TabItem("🌐 Browser Settings", id=1): + with gr.Group(): + with gr.Row(): + use_own_browser = gr.Checkbox( + label="Use Own Browser", + value=config['use_own_browser'], + info="Use your existing browser instance", + ) + keep_browser_open = gr.Checkbox( + label="Keep Browser Open", + value=config['keep_browser_open'], + info="Keep Browser Open between Tasks", + ) + headless = gr.Checkbox( + label="Headless Mode", + value=config['headless'], + info="Run browser without GUI", + ) + disable_security = gr.Checkbox( + label="Disable Security", + value=config['disable_security'], + info="Disable browser security features", + ) + enable_recording = gr.Checkbox( + label="Enable Recording", + value=config['enable_recording'], + info="Enable saving browser recordings", + ) + + with gr.Row(): + window_w = gr.Number( + label="Window Width", + value=config['window_w'], + info="Browser window width", + ) + window_h = gr.Number( + label="Window Height", + value=config['window_h'], + info="Browser window height", + ) + + save_recording_path = gr.Textbox( + label="Recording Path", + placeholder="e.g. ./tmp/record_videos", + value=config['save_recording_path'], + info="Path to save browser recordings", + interactive=True, # Allow editing only if recording is enabled + ) - with gr.TabItem("📊 Results", id=6): - with gr.Group(): + save_trace_path = gr.Textbox( + label="Trace Path", + placeholder="e.g. ./tmp/traces", + value=config['save_trace_path'], + info="Path to save Agent traces", + interactive=True, + ) - recording_display = gr.Video(label="Latest Recording") + save_agent_history_path = gr.Textbox( + label="Agent History Save Path", + placeholder="e.g., ./tmp/agent_history", + value=config['save_agent_history_path'], + info="Specify the directory where agent history should be saved.", + interactive=True, + ) - gr.Markdown("### Results") - with gr.Row(): - with gr.Column(): - final_result_output = gr.Textbox( - label="Final Result", lines=3, show_label=True + with gr.TabItem("âš™ī¸ Agent Settings", id=2): + with gr.Group(): + agent_type = gr.Radio( + ["org", "custom"], + label="Agent Type", + value=config['agent_type'], + info="Select the type of agent to use", ) - with gr.Column(): - errors_output = gr.Textbox( - label="Errors", lines=3, show_label=True + with gr.Column(): + max_steps = gr.Slider( + minimum=1, + maximum=200, + value=config['max_steps'], + step=1, + label="Max Run Steps", + info="Maximum number of steps the agent will take", + ) + max_actions_per_step = gr.Slider( + minimum=1, + maximum=20, + value=config['max_actions_per_step'], + step=1, + label="Max Actions per Step", + info="Maximum number of actions the agent will take per step", + ) + with gr.Column(): + use_vision = gr.Checkbox( + label="Use Vision", + value=config['use_vision'], + info="Enable visual processing capabilities", + ) + tool_calling_method = gr.Dropdown( + label="Tool Calling Method", + value=config['tool_calling_method'], + interactive=True, + allow_custom_value=True, # Allow users to input custom model names + choices=["auto", "json_schema", "function_calling"], + info="Tool Calls Funtion Name", + visible=False + ) + + with gr.TabItem("🔧 LLM Configuration", id=3): + with gr.Group(): + llm_provider = gr.Dropdown( + choices=[provider for provider,model in utils.model_names.items()], + label="LLM Provider", + value=config['llm_provider'], + info="Select your preferred language model provider" ) - with gr.Row(): - with gr.Column(): - model_actions_output = gr.Textbox( - label="Model Actions", lines=3, show_label=True + llm_model_name = gr.Dropdown( + label="Model Name", + choices=utils.model_names['openai'], + value=config['llm_model_name'], + interactive=True, + allow_custom_value=True, # Allow users to input custom model names + info="Select a model from the dropdown or type a custom model name" + ) + llm_num_ctx = gr.Slider( + minimum=2**8, + maximum=2**16, + value=config['llm_num_ctx'], + step=1, + label="Max Context Length", + info="Controls max context length model needs to handle (less = faster)", + visible=config['llm_provider'] == "ollama" ) - with gr.Column(): - model_thoughts_output = gr.Textbox( - label="Model Thoughts", lines=3, show_label=True + llm_temperature = gr.Slider( + minimum=0.0, + maximum=2.0, + value=config['llm_temperature'], + step=0.1, + label="Temperature", + info="Controls randomness in model outputs" ) + with gr.Row(): + llm_base_url = gr.Textbox( + label="Base URL", + value=config['llm_base_url'], + info="API endpoint URL (if required)" + ) + llm_api_key = gr.Textbox( + label="API Key", + type="password", + value=config['llm_api_key'], + info="Your API key (leave blank to use .env)" + ) + + # Change event to update context length slider + def update_llm_num_ctx_visibility(llm_provider): + return gr.update(visible=llm_provider == "ollama") + + # Bind the change event of llm_provider to update the visibility of context length slider + llm_provider.change( + fn=update_llm_num_ctx_visibility, + inputs=llm_provider, + outputs=llm_num_ctx + ) - trace_file = gr.File(label="Trace File") + + + # with gr.TabItem("🤖 Run Agent", id=4): + # task = gr.Textbox( + # label="Task Description", + # lines=4, + # placeholder="Enter your task here...", + # value=config['task'], + # info="Describe what you want the agent to do", + # ) + # add_infos = gr.Textbox( + # label="Additional Information", + # lines=3, + # placeholder="Add any helpful context or instructions...", + # info="Optional hints to help the LLM complete the task", + # ) + + # with gr.Row(): + # run_button = gr.Button("â–ļī¸ Run Agent", variant="primary", scale=2) + # stop_button = gr.Button("âšī¸ Stop", variant="stop", scale=1) + + # # with gr.Row(): + # # browser_view = gr.HTML( + # # value="

Waiting for browser session...

", + # # label="Live Browser View", + # # ) + + # with gr.Row(): + # gr.HTML( + # """ + # + # """ + # ) + + with gr.TabItem("🧐 Deep Research", id=5): + research_task_input = gr.Textbox(label="Research Task", lines=5, value="Compose a report on the use of Reinforcement Learning for training Large Language Models, encompassing its origins, current advancements, and future prospects, substantiated with examples of relevant models and techniques. The report should reflect original insights and analysis, moving beyond mere summarization of existing literature.") + with gr.Row(): + max_search_iteration_input = gr.Number(label="Max Search Iteration", value=3, precision=0) # precision=0 įĄŽäŋæ˜¯æ•´æ•° + max_query_per_iter_input = gr.Number(label="Max Query per Iteration", value=1, precision=0) # precision=0 įĄŽäŋæ˜¯æ•´æ•° + with gr.Row(): + research_button = gr.Button("â–ļī¸ Run Deep Research", variant="primary", scale=2) + stop_research_button = gr.Button("âšī¸ Stop", variant="stop", scale=1) + markdown_output_display = gr.Markdown(label="Research Report") + markdown_download = gr.File(label="Download Research Report") + + + with gr.TabItem("📊 Results", id=6): + with gr.Group(): + + recording_display = gr.Video(label="Latest Recording") + + gr.Markdown("### Results") + with gr.Row(): + with gr.Column(): + final_result_output = gr.Textbox( + label="Final Result", lines=3, show_label=True + ) + with gr.Column(): + errors_output = gr.Textbox( + label="Errors", lines=3, show_label=True + ) + with gr.Row(): + with gr.Column(): + model_actions_output = gr.Textbox( + label="Model Actions", lines=3, show_label=True + ) + with gr.Column(): + model_thoughts_output = gr.Textbox( + label="Model Thoughts", lines=3, show_label=True + ) + + trace_file = gr.File(label="Trace File") + + agent_history_file = gr.File(label="Agent History") + + # # Bind the stop button click event after errors_output is defined + # stop_button.click( + # fn=stop_agent, + # inputs=[], + # outputs=[errors_output, stop_button, run_button], + # ) + + # # Run button click handler + # run_button.click( + # fn=run_with_stream, + # inputs=[ + # agent_type, llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, + # use_own_browser, keep_browser_open, headless, disable_security, window_w, window_h, + # save_recording_path, save_agent_history_path, save_trace_path, # Include the new path + # enable_recording, task, add_infos, max_steps, use_vision, max_actions_per_step, tool_calling_method + # ], + # outputs=[ + # # browser_view, # Browser view + # final_result_output, # Final result + # errors_output, # Errors + # model_actions_output, # Model actions + # model_thoughts_output, # Model thoughts + # recording_display, # Latest recording + # trace_file, # Trace file + # agent_history_file, # Agent history file + # stop_button, # Stop button + # run_button # Run button + # ], + # ) + + # Run Deep Research + research_button.click( + fn=run_deep_search, + inputs=[research_task_input, max_search_iteration_input, max_query_per_iter_input, llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, use_vision, use_own_browser, headless], + outputs=[markdown_output_display, markdown_download, stop_research_button, research_button] + ) + # Bind the stop button click event after errors_output is defined + stop_research_button.click( + fn=stop_research_agent, + inputs=[], + outputs=[stop_research_button, research_button], + ) - agent_history_file = gr.File(label="Agent History") + with gr.TabItem("đŸŽĨ Recordings", id=7): + def list_recordings(save_recording_path): + if not os.path.exists(save_recording_path): + return [] - # Bind the stop button click event after errors_output is defined - stop_button.click( - fn=stop_agent, - inputs=[], - outputs=[errors_output, stop_button, run_button], - ) + # Get all video files + recordings = glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4")) + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]")) - # Run button click handler - run_button.click( - fn=run_with_stream, - inputs=[ - agent_type, llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, - use_own_browser, keep_browser_open, headless, disable_security, window_w, window_h, - save_recording_path, save_agent_history_path, save_trace_path, # Include the new path - enable_recording, task, add_infos, max_steps, use_vision, max_actions_per_step, tool_calling_method - ], - outputs=[ - browser_view, # Browser view - final_result_output, # Final result - errors_output, # Errors - model_actions_output, # Model actions - model_thoughts_output, # Model thoughts - recording_display, # Latest recording - trace_file, # Trace file - agent_history_file, # Agent history file - stop_button, # Stop button - run_button # Run button - ], - ) - - # Run Deep Research - research_button.click( - fn=run_deep_search, - inputs=[research_task_input, max_search_iteration_input, max_query_per_iter_input, llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, use_vision, use_own_browser, headless], - outputs=[markdown_output_display, markdown_download, stop_research_button, research_button] - ) - # Bind the stop button click event after errors_output is defined - stop_research_button.click( - fn=stop_research_agent, - inputs=[], - outputs=[stop_research_button, research_button], - ) + # Sort recordings by creation time (oldest first) + recordings.sort(key=os.path.getctime) - with gr.TabItem("đŸŽĨ Recordings", id=7): - def list_recordings(save_recording_path): - if not os.path.exists(save_recording_path): - return [] + # Add numbering to the recordings + numbered_recordings = [] + for idx, recording in enumerate(recordings, start=1): + filename = os.path.basename(recording) + numbered_recordings.append((recording, f"{idx}. {filename}")) - # Get all video files - recordings = glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4")) + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]")) + return numbered_recordings - # Sort recordings by creation time (oldest first) - recordings.sort(key=os.path.getctime) + recordings_gallery = gr.Gallery( + label="Recordings", + value=list_recordings(config['save_recording_path']), + columns=3, + height="auto", + object_fit="contain" + ) - # Add numbering to the recordings - numbered_recordings = [] - for idx, recording in enumerate(recordings, start=1): - filename = os.path.basename(recording) - numbered_recordings.append((recording, f"{idx}. {filename}")) + refresh_button = gr.Button("🔄 Refresh Recordings", variant="secondary") + refresh_button.click( + fn=list_recordings, + inputs=save_recording_path, + outputs=recordings_gallery + ) + + with gr.TabItem("📁 Configuration", id=8): + with gr.Group(): + config_file_input = gr.File( + label="Load Config File", + file_types=[".pkl"], + interactive=True + ) - return numbered_recordings + load_config_button = gr.Button("Load Existing Config From File", variant="primary") + save_config_button = gr.Button("Save Current Config", variant="primary") - recordings_gallery = gr.Gallery( - label="Recordings", - value=list_recordings(config['save_recording_path']), - columns=3, - height="auto", - object_fit="contain" - ) + config_status = gr.Textbox( + label="Status", + lines=2, + interactive=False + ) - refresh_button = gr.Button("🔄 Refresh Recordings", variant="secondary") - refresh_button.click( - fn=list_recordings, - inputs=save_recording_path, - outputs=recordings_gallery - ) - - with gr.TabItem("📁 Configuration", id=8): - with gr.Group(): - config_file_input = gr.File( - label="Load Config File", - file_types=[".pkl"], - interactive=True - ) + load_config_button.click( + fn=update_ui_from_config, + inputs=[config_file_input], + outputs=[ + agent_type, max_steps, max_actions_per_step, use_vision, tool_calling_method, + llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, + use_own_browser, keep_browser_open, headless, disable_security, enable_recording, + window_w, window_h, save_recording_path, save_trace_path, save_agent_history_path, + config_status + ] + ) - load_config_button = gr.Button("Load Existing Config From File", variant="primary") - save_config_button = gr.Button("Save Current Config", variant="primary") + save_config_button.click( + fn=save_current_config, + inputs=[ + agent_type, max_steps, max_actions_per_step, use_vision, tool_calling_method, + llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, + use_own_browser, keep_browser_open, headless, disable_security, + enable_recording, window_w, window_h, save_recording_path, save_trace_path, + save_agent_history_path, + ], + outputs=[config_status] + ) - config_status = gr.Textbox( - label="Status", - lines=2, - interactive=False - ) - load_config_button.click( - fn=update_ui_from_config, - inputs=[config_file_input], - outputs=[ - agent_type, max_steps, max_actions_per_step, use_vision, tool_calling_method, - llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, - use_own_browser, keep_browser_open, headless, disable_security, enable_recording, - window_w, window_h, save_recording_path, save_trace_path, save_agent_history_path, - task, config_status - ] - ) + # Close Button + close_modal_button = gr.Button("❌ Close", variant="stop") - save_config_button.click( - fn=save_current_config, - inputs=[ - agent_type, max_steps, max_actions_per_step, use_vision, tool_calling_method, - llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, - use_own_browser, keep_browser_open, headless, disable_security, - enable_recording, window_w, window_h, save_recording_path, save_trace_path, - save_agent_history_path, task, - ], - outputs=[config_status] - ) + # Bind Events + open_modal_button.click(fn=lambda: gr.update(visible=True), inputs=[], outputs=modal) + + close_modal_button.click(fn=close_modal, inputs=[], outputs=modal) + + + + + + # Run button click handler + run_button.click( + fn=run_with_stream, + inputs=[ + agent_type, llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, + use_own_browser, keep_browser_open, headless, disable_security, window_w, window_h, + save_recording_path, save_agent_history_path, save_trace_path, # Include the new path + enable_recording, task, add_infos, max_steps, use_vision, max_actions_per_step, tool_calling_method + ], + outputs=[ + final_result_output, errors_output, model_actions_output, model_thoughts_output, + recording_display, trace_file, agent_history_file, stop_button, run_button + ], + ) + + stop_button.click( + fn=stop_agent, + inputs=[], + outputs=[errors_output, stop_button, run_button], + ) + + # with gr.Tabs() as tabs: + # with gr.TabItem("âš™ī¸ Agent Settings", id=1): + # with gr.Group(): + # agent_type = gr.Radio( + # ["org", "custom"], + # label="Agent Type", + # value=config['agent_type'], + # info="Select the type of agent to use", + # ) + # with gr.Column(): + # max_steps = gr.Slider( + # minimum=1, + # maximum=200, + # value=config['max_steps'], + # step=1, + # label="Max Run Steps", + # info="Maximum number of steps the agent will take", + # ) + # max_actions_per_step = gr.Slider( + # minimum=1, + # maximum=20, + # value=config['max_actions_per_step'], + # step=1, + # label="Max Actions per Step", + # info="Maximum number of actions the agent will take per step", + # ) + # with gr.Column(): + # use_vision = gr.Checkbox( + # label="Use Vision", + # value=config['use_vision'], + # info="Enable visual processing capabilities", + # ) + # tool_calling_method = gr.Dropdown( + # label="Tool Calling Method", + # value=config['tool_calling_method'], + # interactive=True, + # allow_custom_value=True, # Allow users to input custom model names + # choices=["auto", "json_schema", "function_calling"], + # info="Tool Calls Funtion Name", + # visible=False + # ) + + # with gr.TabItem("🔧 LLM Configuration", id=2): + # with gr.Group(): + # llm_provider = gr.Dropdown( + # choices=[provider for provider,model in utils.model_names.items()], + # label="LLM Provider", + # value=config['llm_provider'], + # info="Select your preferred language model provider" + # ) + # llm_model_name = gr.Dropdown( + # label="Model Name", + # choices=utils.model_names['openai'], + # value=config['llm_model_name'], + # interactive=True, + # allow_custom_value=True, # Allow users to input custom model names + # info="Select a model from the dropdown or type a custom model name" + # ) + # llm_num_ctx = gr.Slider( + # minimum=2**8, + # maximum=2**16, + # value=config['llm_num_ctx'], + # step=1, + # label="Max Context Length", + # info="Controls max context length model needs to handle (less = faster)", + # visible=config['llm_provider'] == "ollama" + # ) + # llm_temperature = gr.Slider( + # minimum=0.0, + # maximum=2.0, + # value=config['llm_temperature'], + # step=0.1, + # label="Temperature", + # info="Controls randomness in model outputs" + # ) + # with gr.Row(): + # llm_base_url = gr.Textbox( + # label="Base URL", + # value=config['llm_base_url'], + # info="API endpoint URL (if required)" + # ) + # llm_api_key = gr.Textbox( + # label="API Key", + # type="password", + # value=config['llm_api_key'], + # info="Your API key (leave blank to use .env)" + # ) + + # # Change event to update context length slider + # def update_llm_num_ctx_visibility(llm_provider): + # return gr.update(visible=llm_provider == "ollama") + + # # Bind the change event of llm_provider to update the visibility of context length slider + # llm_provider.change( + # fn=update_llm_num_ctx_visibility, + # inputs=llm_provider, + # outputs=llm_num_ctx + # ) + + # with gr.TabItem("🌐 Browser Settings", id=3): + # with gr.Group(): + # with gr.Row(): + # use_own_browser = gr.Checkbox( + # label="Use Own Browser", + # value=config['use_own_browser'], + # info="Use your existing browser instance", + # ) + # keep_browser_open = gr.Checkbox( + # label="Keep Browser Open", + # value=config['keep_browser_open'], + # info="Keep Browser Open between Tasks", + # ) + # headless = gr.Checkbox( + # label="Headless Mode", + # value=config['headless'], + # info="Run browser without GUI", + # ) + # disable_security = gr.Checkbox( + # label="Disable Security", + # value=config['disable_security'], + # info="Disable browser security features", + # ) + # enable_recording = gr.Checkbox( + # label="Enable Recording", + # value=config['enable_recording'], + # info="Enable saving browser recordings", + # ) + + # with gr.Row(): + # window_w = gr.Number( + # label="Window Width", + # value=config['window_w'], + # info="Browser window width", + # ) + # window_h = gr.Number( + # label="Window Height", + # value=config['window_h'], + # info="Browser window height", + # ) + + # save_recording_path = gr.Textbox( + # label="Recording Path", + # placeholder="e.g. ./tmp/record_videos", + # value=config['save_recording_path'], + # info="Path to save browser recordings", + # interactive=True, # Allow editing only if recording is enabled + # ) + + # save_trace_path = gr.Textbox( + # label="Trace Path", + # placeholder="e.g. ./tmp/traces", + # value=config['save_trace_path'], + # info="Path to save Agent traces", + # interactive=True, + # ) + + # save_agent_history_path = gr.Textbox( + # label="Agent History Save Path", + # placeholder="e.g., ./tmp/agent_history", + # value=config['save_agent_history_path'], + # info="Specify the directory where agent history should be saved.", + # interactive=True, + # ) + + # with gr.TabItem("🤖 Run Agent", id=4): + # task = gr.Textbox( + # label="Task Description", + # lines=4, + # placeholder="Enter your task here...", + # value=config['task'], + # info="Describe what you want the agent to do", + # ) + # add_infos = gr.Textbox( + # label="Additional Information", + # lines=3, + # placeholder="Add any helpful context or instructions...", + # info="Optional hints to help the LLM complete the task", + # ) + + # with gr.Row(): + # run_button = gr.Button("â–ļī¸ Run Agent", variant="primary", scale=2) + # stop_button = gr.Button("âšī¸ Stop", variant="stop", scale=1) + + # # with gr.Row(): + # # browser_view = gr.HTML( + # # value="

Waiting for browser session...

", + # # label="Live Browser View", + # # ) + + # with gr.Row(): + # gr.HTML( + # """ + # + # """ + # ) + + # with gr.TabItem("🧐 Deep Research", id=5): + # research_task_input = gr.Textbox(label="Research Task", lines=5, value="Compose a report on the use of Reinforcement Learning for training Large Language Models, encompassing its origins, current advancements, and future prospects, substantiated with examples of relevant models and techniques. The report should reflect original insights and analysis, moving beyond mere summarization of existing literature.") + # with gr.Row(): + # max_search_iteration_input = gr.Number(label="Max Search Iteration", value=3, precision=0) # precision=0 įĄŽäŋæ˜¯æ•´æ•° + # max_query_per_iter_input = gr.Number(label="Max Query per Iteration", value=1, precision=0) # precision=0 įĄŽäŋæ˜¯æ•´æ•° + # with gr.Row(): + # research_button = gr.Button("â–ļī¸ Run Deep Research", variant="primary", scale=2) + # stop_research_button = gr.Button("âšī¸ Stop", variant="stop", scale=1) + # markdown_output_display = gr.Markdown(label="Research Report") + # markdown_download = gr.File(label="Download Research Report") + + + # with gr.TabItem("📊 Results", id=6): + # with gr.Group(): + + # recording_display = gr.Video(label="Latest Recording") + + # gr.Markdown("### Results") + # with gr.Row(): + # with gr.Column(): + # final_result_output = gr.Textbox( + # label="Final Result", lines=3, show_label=True + # ) + # with gr.Column(): + # errors_output = gr.Textbox( + # label="Errors", lines=3, show_label=True + # ) + # with gr.Row(): + # with gr.Column(): + # model_actions_output = gr.Textbox( + # label="Model Actions", lines=3, show_label=True + # ) + # with gr.Column(): + # model_thoughts_output = gr.Textbox( + # label="Model Thoughts", lines=3, show_label=True + # ) + + # trace_file = gr.File(label="Trace File") + + # agent_history_file = gr.File(label="Agent History") + + # # Bind the stop button click event after errors_output is defined + # stop_button.click( + # fn=stop_agent, + # inputs=[], + # outputs=[errors_output, stop_button, run_button], + # ) + + # # Run button click handler + # run_button.click( + # fn=run_with_stream, + # inputs=[ + # agent_type, llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, + # use_own_browser, keep_browser_open, headless, disable_security, window_w, window_h, + # save_recording_path, save_agent_history_path, save_trace_path, # Include the new path + # enable_recording, task, add_infos, max_steps, use_vision, max_actions_per_step, tool_calling_method + # ], + # outputs=[ + # # browser_view, # Browser view + # final_result_output, # Final result + # errors_output, # Errors + # model_actions_output, # Model actions + # model_thoughts_output, # Model thoughts + # recording_display, # Latest recording + # trace_file, # Trace file + # agent_history_file, # Agent history file + # stop_button, # Stop button + # run_button # Run button + # ], + # ) + + # # Run Deep Research + # research_button.click( + # fn=run_deep_search, + # inputs=[research_task_input, max_search_iteration_input, max_query_per_iter_input, llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, use_vision, use_own_browser, headless], + # outputs=[markdown_output_display, markdown_download, stop_research_button, research_button] + # ) + # # Bind the stop button click event after errors_output is defined + # stop_research_button.click( + # fn=stop_research_agent, + # inputs=[], + # outputs=[stop_research_button, research_button], + # ) + + # with gr.TabItem("đŸŽĨ Recordings", id=7): + # def list_recordings(save_recording_path): + # if not os.path.exists(save_recording_path): + # return [] + + # # Get all video files + # recordings = glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4")) + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]")) + + # # Sort recordings by creation time (oldest first) + # recordings.sort(key=os.path.getctime) + + # # Add numbering to the recordings + # numbered_recordings = [] + # for idx, recording in enumerate(recordings, start=1): + # filename = os.path.basename(recording) + # numbered_recordings.append((recording, f"{idx}. {filename}")) + + # return numbered_recordings + + # recordings_gallery = gr.Gallery( + # label="Recordings", + # value=list_recordings(config['save_recording_path']), + # columns=3, + # height="auto", + # object_fit="contain" + # ) + + # refresh_button = gr.Button("🔄 Refresh Recordings", variant="secondary") + # refresh_button.click( + # fn=list_recordings, + # inputs=save_recording_path, + # outputs=recordings_gallery + # ) + + # with gr.TabItem("📁 Configuration", id=8): + # with gr.Group(): + # config_file_input = gr.File( + # label="Load Config File", + # file_types=[".pkl"], + # interactive=True + # ) + + # load_config_button = gr.Button("Load Existing Config From File", variant="primary") + # save_config_button = gr.Button("Save Current Config", variant="primary") + + # config_status = gr.Textbox( + # label="Status", + # lines=2, + # interactive=False + # ) + + # load_config_button.click( + # fn=update_ui_from_config, + # inputs=[config_file_input], + # outputs=[ + # agent_type, max_steps, max_actions_per_step, use_vision, tool_calling_method, + # llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, + # use_own_browser, keep_browser_open, headless, disable_security, enable_recording, + # window_w, window_h, save_recording_path, save_trace_path, save_agent_history_path, + # task, config_status + # ] + # ) + + # save_config_button.click( + # fn=save_current_config, + # inputs=[ + # agent_type, max_steps, max_actions_per_step, use_vision, tool_calling_method, + # llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, + # use_own_browser, keep_browser_open, headless, disable_security, + # enable_recording, window_w, window_h, save_recording_path, save_trace_path, + # save_agent_history_path, task, + # ], + # outputs=[config_status] + # ) # Attach the callback to the LLM provider dropdown @@ -1086,14 +1654,15 @@ def main(): parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent") parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to") parser.add_argument("--port", type=int, default=7788, help="Port to listen on") - parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI") + parser.add_argument("--theme", type=str, default="custom_theme", choices=theme_map.keys(), help="Theme to use for the UI") parser.add_argument("--dark-mode", action="store_true", help="Enable dark mode") args = parser.parse_args() config_dict = default_config() demo = create_ui(config_dict, theme_name=args.theme) - demo.launch(server_name=args.ip, server_port=args.port) + demo.queue(False) + demo.launch(server_name=args.ip, server_port=args.port,favicon_path="logo.png",show_api=False,share=True) if __name__ == '__main__': main()