Skip to content

Pratham/UI #368

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,11 @@ ENV BROWSER_USE_LOGGING_LEVEL=info
ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
ENV ANONYMIZED_TELEMETRY=false
ENV DISPLAY=:99
ENV RESOLUTION=1920x1080x24
ENV RESOLUTION=960x540x24
ENV VNC_PASSWORD=vncpassword
ENV CHROME_PERSISTENT_SESSION=true
ENV RESOLUTION_WIDTH=1920
ENV RESOLUTION_HEIGHT=1080
ENV RESOLUTION_WIDTH=960
ENV RESOLUTION_HEIGHT=540

# Set up supervisor configuration
RUN mkdir -p /var/log/supervisor
Expand Down
73 changes: 73 additions & 0 deletions custom_theme.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from __future__ import annotations

from collections.abc import Iterable

from gradio.themes.base import Base
from gradio.themes.utils import colors, fonts, sizes


class custom_theme(Base):
def __init__(
self,
*,
primary_hue: colors.Color | str = colors.blue,
secondary_hue: colors.Color | str = colors.sky,
neutral_hue: colors.Color | str = colors.gray,
spacing_size: sizes.Size | str = sizes.spacing_md,
radius_size: sizes.Size | str = sizes.radius_lg,
text_size: sizes.Size | str = sizes.text_md,
font: fonts.Font | str | Iterable[fonts.Font | str] = (
fonts.GoogleFont("Montserrat"),
"ui-sans-serif",
"system-ui",
"sans-serif",
),
font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
fonts.GoogleFont("Inter"),
"ui-monospace",
"Consolas",
"monospace",
),
):
super().__init__(
primary_hue=primary_hue,
secondary_hue=secondary_hue,
neutral_hue=neutral_hue,
spacing_size=spacing_size,
radius_size=radius_size,
text_size=text_size,
font=font,
font_mono=font_mono,
)
self.name = "custom_theme"
super().set(
button_border_width="0px",
checkbox_label_border_width="1px",
button_transform_hover="scale(1.02)",
button_transition="all 0.1s ease-in-out",
slider_color="*primary_400",
button_primary_background_fill="linear-gradient(120deg, *secondary_500 0%, *primary_300 60%, *primary_400 100%)",
button_primary_background_fill_hover="linear-gradient(120deg, *secondary_400 0%, *primary_300 60%, *primary_300 100%)",
button_primary_text_color="*button_secondary_text_color",
button_secondary_background_fill="linear-gradient(120deg, *neutral_300 0%, *neutral_100 60%, *neutral_200 100%)",
button_secondary_background_fill_hover="linear-gradient(120deg, *neutral_200 0%, *neutral_100 60%, *neutral_100 100%)",
checkbox_label_background_fill_selected="linear-gradient(120deg, *primary_400 0%, *primary_300 60%, *primary_400 100%)",
checkbox_label_border_color_selected="*primary_400",
checkbox_background_color_selected="*primary_400",
checkbox_label_text_color_selected="*button_secondary_text_color",
slider_color_dark="*primary_500",
button_primary_background_fill_dark="linear-gradient(120deg, *secondary_600 0%, *primary_500 60%, *primary_600 100%)",
button_primary_background_fill_hover_dark="linear-gradient(120deg, *secondary_500 0%, *primary_500 60%, *primary_500 100%)",
button_primary_text_color_dark="*button_secondary_text_color",
button_secondary_background_fill_dark="linear-gradient(120deg, *neutral_700 0%, *neutral_600 60%, *neutral_700 100%)",
button_secondary_background_fill_hover_dark="linear-gradient(120deg, *neutral_600 0%, *neutral_600 60%, *neutral_700 100%)",
checkbox_label_background_fill_selected_dark="linear-gradient(120deg, *primary_600 0%, *primary_500 60%, *primary_600 100%)",
checkbox_label_border_color_selected_dark="*primary_600",
checkbox_background_color_selected_dark="*primary_600",
checkbox_label_text_color_selected_dark="*button_secondary_text_color",
block_shadow="*shadow_drop_lg",
button_secondary_shadow_hover="*shadow_drop_lg",
button_primary_shadow_hover="0 1px 3px 0 *primary_200, 0 1px 2px -1px *primary_200",
button_secondary_shadow_dark="none",
button_primary_shadow_dark="none",
)
2 changes: 2 additions & 0 deletions docker-setup.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
docker build -t agent .
docker run -p 7788:7788 -p 6080:6080 -p 5901:5901 agent
Binary file added logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
47 changes: 47 additions & 0 deletions prompts/prompt.text
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
Step-by-Step Instructions

1. Navigate to Athenahealth Preview Environment
- Open a web browser and go to: https://preview.athenahealth.com/

2. Log In
- Enter the credentials:
- Username: p-bkumar1
- Password: Xcaliber@12345
- Click the Login button.

3. Select the Default Department
- If prompted, choose the default department from the list (e.g., "7 Hills Department").

4. Access the "Patients" Menu
- Locate the header at the top of the dashboard.
- Click on the "Patients" menu to open the dropdown.

5. Access Document Search
- Option 1 (Primary Attempt)
- In the dropdown, look for "Documents > Document Search" and click it.
- If the primary attempt fails (error 404 or element not found):
- Refresh the page.
- Retry clicking "Document Search" (up to 3 times with 2-second intervals).

6. Handle Iframes (Fallback Approach)
- Use the following sequence if Document Search is nested in iframes:
- Switch to the main iframe context:
- Locate and switch to iframe[name="frMain"].
- Switch to the sub-iframe:
- Locate and switch to iframe[id="searchFrame"] or iframe[name="frMain"] > iframe (if nested).
- Fill the DOCUMENTID and click Search:
- Enter the value "116873" in the DOCUMENTID field.
- Click the "Search" button.
- Retry up to 3 times:
- Wait 2 seconds between each retry if elements are missing.

7. Observe and Report
- After clicking "Document Search" or executing the iframe fallback:

Common Issues and Solutions
- Element Not Found: Ensure the iframe is fully loaded (wait for 5–10 seconds).
- Button Not Clickable: try to force click button again.
- Network Errors: Verify your internet connection and retry the login process.

Result
- After following the steps, patient lab reports will be displayed. The task is completed successfully.
18 changes: 18 additions & 0 deletions setup.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
deactivate

Remove-Item -Recurse -Force .venv

# Step 2: Set Up Python Environment
uv venv --python 3.11

# Activate the virtual environment
.\.venv\Scripts\Activate.ps1

# Step 3: Install Dependencies
uv pip install -r requirements.txt
playwright install


# Step 4: Run web ui in local
python webui.py --ip 127.0.0.1 --port 7788
Write-Output "Setup complete. Virtual environment activated."
10 changes: 5 additions & 5 deletions src/agent/custom_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,15 @@ def __init__(
browser: Browser | None = None,
browser_context: BrowserContext | None = None,
controller: Controller = Controller(),
use_vision: bool = True,
use_vision: bool = False,
use_vision_for_planner: bool = False,
save_conversation_path: Optional[str] = None,
save_conversation_path_encoding: Optional[str] = 'utf-8',
max_failures: int = 3,
max_failures: int = 5,
retry_delay: int = 10,
system_prompt_class: Type[SystemPrompt] = SystemPrompt,
agent_prompt_class: Type[AgentMessagePrompt] = AgentMessagePrompt,
max_input_tokens: int = 128000,
max_input_tokens: int = 1280000,
validate_output: bool = False,
message_context: Optional[str] = None,
generate_gif: bool | str = True,
Expand Down Expand Up @@ -281,8 +281,8 @@ async def _run_planner(self) -> Optional[str]:
planner_messages[-1] = HumanMessage(content=new_msg)

# Get planner output
response = await self.planner_llm.ainvoke(planner_messages)
plan = response.content
response = await self.ainvoke(planner_messages)
plan = response.contentplanner_llm
last_state_message = planner_messages[-1]
# remove image from last state message
if isinstance(last_state_message.content, list):
Expand Down
41 changes: 39 additions & 2 deletions src/agent/custom_prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ def important_rules(self) -> str:
{"go_to_url": {"url": "https://example.com"}},
{"extract_page_content": {}}
]
- Iframe interaction: [
{"switch_frame": {"frame_name": "GlobalNav"}},
{"click_element": {"index": 1}},
{"switch_frame": {"frame_name": "frameContent"}},
{"click_element": {"index": 2}}
]


3. ELEMENT INTERACTION:
Expand Down Expand Up @@ -82,8 +88,39 @@ def important_rules(self) -> str:
- Only provide the action sequence until you think the page will change.
- Try to be efficient, e.g. fill forms at once, or chain actions where nothing changes on the page like saving, extracting, checkboxes...
- only use multiple actions if it makes sense.

9. Extraction:
9. IFrames:
- Identify iframes using their names or unique identifiers
- Switch to iframes before interacting with nested elements
- Use frame locators for element interaction within iframes
- Example action sequence for iframe interaction:
[
{"switch_frame": {"frame_name": "GlobalNav"}},
{"click_element": {"index": 1}},
{"switch_frame": {"frame_name": "frameContent"}},
{"click_element": {"index": 2}}
]
- Always return to the main frame after iframe operations
- Handle nested iframes by chaining switch_frame actions
10. Action Sequencing for Iframes:
- Always start iframe interactions with switch_frame
- Perform all element interactions within the iframe context
- Use back_to_main_frame after completing iframe operations
- For nested iframes, chain switch_frame actions
- Example nested iframe sequence:
[
{"switch_frame": {"frame_name": "outerFrame"}},
{"switch_frame": {"frame_name": "innerFrame"}},
{"click_element": {"index": 1}},
{"back_to_main_frame": {}}
]

11. Visual Context for Iframes:
- Bounding boxes for iframe elements will have frame name labels
- Example: [GlobalNav] <button>Patients</button>
- Use frame labels to identify element context
- Elements without frame labels are in the main page

12. Extraction:
- If your task is to find information or do research - call extract_content on the specific pages to get and store the information.

"""
Expand Down
2 changes: 1 addition & 1 deletion src/utils/default_config_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def default_config():
"agent_type": "custom",
"max_steps": 100,
"max_actions_per_step": 10,
"use_vision": True,
"use_vision": False,
"tool_calling_method": "auto",
"llm_provider": "openai",
"llm_model_name": "gpt-4o",
Expand Down
121 changes: 109 additions & 12 deletions supervisord.conf
Original file line number Diff line number Diff line change
@@ -1,3 +1,100 @@
# [supervisord]
# user=root
# nodaemon=true
# logfile=/dev/stdout
# logfile_maxbytes=0
# loglevel=debug

# [program:xvfb]
# command=Xvfb :99 -screen 0 %(ENV_RESOLUTION)s -ac +extension GLX +render -noreset
# autorestart=true
# stdout_logfile=/dev/stdout
# stdout_logfile_maxbytes=0
# stderr_logfile=/dev/stderr
# stderr_logfile_maxbytes=0
# priority=100
# startsecs=3
# stopsignal=TERM
# stopwaitsecs=10

# [program:vnc_setup]
# command=bash -c "mkdir -p ~/.vnc && echo '%(ENV_VNC_PASSWORD)s' | vncpasswd -f > ~/.vnc/passwd && chmod 600 ~/.vnc/passwd && ls -la ~/.vnc/passwd"
# autorestart=false
# startsecs=0
# priority=150
# stdout_logfile=/dev/stdout
# stdout_logfile_maxbytes=0
# stderr_logfile=/dev/stderr
# stderr_logfile_maxbytes=0

# [program:x11vnc]
# command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && chmod 666 /var/log/x11vnc.log && sleep 5 && DISPLAY=:99 x11vnc -display :99 -forever -shared -rfbauth /root/.vnc/passwd -bg -rfbport 5901 -o /var/log/x11vnc.log"
# autorestart=true
# stdout_logfile=/dev/stdout
# stdout_logfile_maxbytes=0
# stderr_logfile=/dev/stderr
# stderr_logfile_maxbytes=0
# priority=200
# startretries=10
# startsecs=10
# stopsignal=TERM
# stopwaitsecs=10
# depends_on=vnc_setup,xvfb

# [program:x11vnc_log]
# command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && tail -f /var/log/x11vnc.log"
# autorestart=true
# stdout_logfile=/dev/stdout
# stdout_logfile_maxbytes=0
# stderr_logfile=/dev/stderr
# stderr_logfile_maxbytes=0
# priority=250
# stopsignal=TERM
# stopwaitsecs=5
# depends_on=x11vnc

# [program:novnc]
# command=bash -c "sleep 5 && cd /opt/novnc && ./utils/novnc_proxy --vnc localhost:5901 --listen 0.0.0.0:6080 --web /opt/novnc --http-header='Content-Security-Policy: frame-ancestors http://localhost:7788/'"
# autorestart=true
# stdout_logfile=/dev/stdout
# stdout_logfile_maxbytes=0
# stderr_logfile=/dev/stderr
# stderr_logfile_maxbytes=0
# priority=300
# startretries=5
# startsecs=3
# depends_on=x11vnc

# [program:persistent_browser]
# environment=START_URL="data:text/html,<html><body><h1>Browser Ready</h1></body></html>"
# command=bash -c "mkdir -p /app/data/chrome_data && sleep 8 && $(find /ms-playwright/chromium-*/chrome-linux -name chrome) --user-data-dir=/app/data/chrome_data --window-position=0,0 --window-size=%(ENV_RESOLUTION_WIDTH)s,%(ENV_RESOLUTION_HEIGHT)s --start-maximized --no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-setuid-sandbox --no-first-run --no-default-browser-check --no-experiments --ignore-certificate-errors --remote-debugging-port=9222 --remote-debugging-address=0.0.0.0 \"$START_URL\""
# autorestart=true
# stdout_logfile=/dev/stdout
# stdout_logfile_maxbytes=0
# stderr_logfile=/dev/stderr
# stderr_logfile_maxbytes=0
# priority=350
# startretries=5
# startsecs=10
# stopsignal=TERM
# stopwaitsecs=15
# depends_on=novnc

# [program:webui]
# command=python webui.py --ip 0.0.0.0 --port 7788
# directory=/app
# autorestart=true
# stdout_logfile=/dev/stdout
# stdout_logfile_maxbytes=0
# stderr_logfile=/dev/stderr
# stderr_logfile_maxbytes=0
# priority=400
# startretries=3
# startsecs=3
# stopsignal=TERM
# stopwaitsecs=10
# depends_on=persistent_browser

[supervisord]
user=root
nodaemon=true
Expand All @@ -17,18 +114,18 @@ startsecs=3
stopsignal=TERM
stopwaitsecs=10

[program:vnc_setup]
command=bash -c "mkdir -p ~/.vnc && echo '%(ENV_VNC_PASSWORD)s' | vncpasswd -f > ~/.vnc/passwd && chmod 600 ~/.vnc/passwd && ls -la ~/.vnc/passwd"
autorestart=false
startsecs=0
priority=150
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
# [program:vnc_setup]
# command=bash -c "mkdir -p ~/.vnc && echo '%(ENV_VNC_PASSWORD)s' | vncpasswd -f > ~/.vnc/passwd && chmod 600 ~/.vnc/passwd && ls -la ~/.vnc/passwd"
# autorestart=false
# startsecs=0
# priority=150
# stdout_logfile=/dev/stdout
# stdout_logfile_maxbytes=0
# stderr_logfile=/dev/stderr
# stderr_logfile_maxbytes=0

[program:x11vnc]
command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && chmod 666 /var/log/x11vnc.log && sleep 5 && DISPLAY=:99 x11vnc -display :99 -forever -shared -rfbauth /root/.vnc/passwd -rfbport 5901 -o /var/log/x11vnc.log"
command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && chmod 666 /var/log/x11vnc.log && sleep 5 && DISPLAY=:99 x11vnc -display :99 -nopw -forever -shared -bg -rfbport 5901 -o /var/log/x11vnc.log"
autorestart=true
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
Expand All @@ -39,7 +136,7 @@ startretries=10
startsecs=10
stopsignal=TERM
stopwaitsecs=10
depends_on=vnc_setup,xvfb
depends_on=xvfb

[program:x11vnc_log]
command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && tail -f /var/log/x11vnc.log"
Expand Down Expand Up @@ -93,4 +190,4 @@ startretries=3
startsecs=3
stopsignal=TERM
stopwaitsecs=10
depends_on=persistent_browser
depends_on=persistent_browser
Loading