Skip to content

Commit d70db73

Browse files
committed
fix multiple tab
1 parent 2df50b3 commit d70db73

File tree

5 files changed

+45
-25
lines changed

5 files changed

+45
-25
lines changed

src/agent/custom_message_manager.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@ def cut_messages(self):
7474
min_message_len = 2 if self.context_content is not None else 1
7575

7676
while diff > 0 and len(self.state.history.messages) > min_message_len:
77-
self.state.history.remove_message(min_message_len) # always remove the oldest message
77+
msg = self.state.history.messages.pop(min_message_len)
78+
self.state.history.current_tokens -= msg.metadata.tokens
7879
diff = self.state.history.current_tokens - self.settings.max_input_tokens
7980

8081
def add_state_message(
@@ -104,6 +105,7 @@ def _remove_state_message_by_index(self, remove_ind=-1) -> None:
104105
if isinstance(self.state.history.messages[i].message, HumanMessage):
105106
remove_cnt += 1
106107
if remove_cnt == abs(remove_ind):
107-
self.state.history.messages.pop(i)
108+
msg = self.state.history.messages.pop(i)
109+
self.state.history.current_tokens -= msg.metadata.tokens
108110
break
109111
i -= 1

src/agent/custom_prompts.py

+12
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,18 @@ def _load_prompt_template(self) -> None:
2121
except Exception as e:
2222
raise RuntimeError(f'Failed to load system prompt template: {e}')
2323

24+
def get_system_message(self) -> SystemMessage:
25+
"""
26+
Get the system prompt for the agent.
27+
28+
Returns:
29+
SystemMessage: Formatted system prompt
30+
"""
31+
prompt = self.prompt_template.format(max_actions=self.max_actions_per_step,
32+
available_actions=self.default_action_description)
33+
34+
return SystemMessage(content=prompt)
35+
2436

2537
class CustomAgentMessagePrompt(AgentMessagePrompt):
2638
def __init__(

src/agent/custom_system_prompt.md

+6-2
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ Example:
3030
]
3131
}}
3232

33-
2. ACTIONS: You can specify multiple actions in the list to be executed in sequence. But always specify only one action name per item. Use maximum {{max_actions}} actions per sequence.
33+
2. ACTIONS: You can specify multiple actions in the list to be executed in sequence. But always specify only one action name per item. Use maximum {max_actions} actions per sequence.
3434
Common action sequences:
3535
- Form filling: [{{"input_text": {{"index": 1, "text": "username"}}}}, {{"input_text": {{"index": 2, "text": "password"}}}}, {{"click_element": {{"index": 3}}}}]
3636
- Navigation and extraction: [{{"go_to_url": {{"url": "https://example.com"}}}}, {{"extract_content": {{"goal": "extract the names"}}}}]
@@ -39,6 +39,7 @@ Common action sequences:
3939
- Only provide the action sequence until an action which changes the page state significantly.
4040
- Try to be efficient, e.g. fill forms at once, or chain actions where nothing changes on the page
4141
- only use multiple actions if it makes sense.
42+
- Only chose from below available actions.
4243

4344
3. ELEMENT INTERACTION:
4445
- Only use indexes of the interactive elements
@@ -73,4 +74,7 @@ Common action sequences:
7374

7475
9. Extraction:
7576
- If your task is to find information - call extract_content on the specific pages to get and store the information.
76-
Your responses must be always JSON with the specified format.
77+
Your responses must be always JSON with the specified format.
78+
79+
Available Actions:
80+
{available_actions}

tests/test_browser_use.py

+19-19
Original file line numberDiff line numberDiff line change
@@ -118,26 +118,26 @@ async def test_browser_use_custom():
118118
# api_key=os.getenv("OPENAI_API_KEY", ""),
119119
# )
120120

121+
llm = utils.get_llm_model(
122+
provider="azure_openai",
123+
model_name="gpt-4o",
124+
temperature=0.5,
125+
base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
126+
api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
127+
)
128+
121129
# llm = utils.get_llm_model(
122-
# provider="azure_openai",
123-
# model_name="gpt-4o",
130+
# provider="google",
131+
# model_name="gemini-2.0-flash",
124132
# temperature=0.6,
125-
# base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
126-
# api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
133+
# api_key=os.getenv("GOOGLE_API_KEY", "")
127134
# )
128135

129-
llm = utils.get_llm_model(
130-
provider="google",
131-
model_name="gemini-2.0-flash",
132-
temperature=0.6,
133-
api_key=os.getenv("GOOGLE_API_KEY", "")
134-
)
135-
136-
llm = utils.get_llm_model(
137-
provider="deepseek",
138-
model_name="deepseek-reasoner",
139-
temperature=0.8
140-
)
136+
# llm = utils.get_llm_model(
137+
# provider="deepseek",
138+
# model_name="deepseek-reasoner",
139+
# temperature=0.8
140+
# )
141141

142142
# llm = utils.get_llm_model(
143143
# provider="deepseek",
@@ -156,9 +156,9 @@ async def test_browser_use_custom():
156156
controller = CustomController()
157157
use_own_browser = True
158158
disable_security = True
159-
use_vision = False # Set to False when using DeepSeek
159+
use_vision = True # Set to False when using DeepSeek
160160

161-
max_actions_per_step = 1
161+
max_actions_per_step = 10
162162
playwright = None
163163
browser = None
164164
browser_context = None
@@ -193,7 +193,7 @@ async def test_browser_use_custom():
193193
)
194194
)
195195
agent = CustomAgent(
196-
task="Give me stock price of Nvidia",
196+
task="open youtube in tab 1 , open google email in tab 2, open facebook in tab 3",
197197
add_infos="", # some hints for llm to complete the task
198198
llm=llm,
199199
browser=browser,

webui.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ async def run_org_agent(
332332
try:
333333
global _global_browser, _global_browser_context, _global_agent
334334

335-
extra_chromium_args = [f"--window-size={window_w},{window_h}"]
335+
extra_chromium_args = ["--accept_downloads=True", f"--window-size={window_w},{window_h}"]
336336
cdp_url = chrome_cdp
337337

338338
if use_own_browser:
@@ -362,6 +362,7 @@ async def run_org_agent(
362362
config=BrowserContextConfig(
363363
trace_path=save_trace_path if save_trace_path else None,
364364
save_recording_path=save_recording_path if save_recording_path else None,
365+
save_downloads_path="./tmp/downloads",
365366
no_viewport=False,
366367
browser_window_size=BrowserContextWindowSize(
367368
width=window_w, height=window_h
@@ -435,7 +436,7 @@ async def run_custom_agent(
435436
try:
436437
global _global_browser, _global_browser_context, _global_agent
437438

438-
extra_chromium_args = [f"--window-size={window_w},{window_h}"]
439+
extra_chromium_args = ["--accept_downloads=True", f"--window-size={window_w},{window_h}"]
439440
cdp_url = chrome_cdp
440441
if use_own_browser:
441442
cdp_url = os.getenv("CHROME_CDP", chrome_cdp)
@@ -470,6 +471,7 @@ async def run_custom_agent(
470471
trace_path=save_trace_path if save_trace_path else None,
471472
save_recording_path=save_recording_path if save_recording_path else None,
472473
no_viewport=False,
474+
save_downloads_path="./tmp/downloads",
473475
browser_window_size=BrowserContextWindowSize(
474476
width=window_w, height=window_h
475477
),

0 commit comments

Comments
 (0)