24
24
get_label_coordinates ,
25
25
)
26
26
from operate .utils .ocr import get_text_coordinates , get_text_element
27
- from operate .utils .screenshot import capture_screen_with_cursor
27
+ from operate .utils .screenshot import capture_screen_with_cursor , compress_screenshot
28
28
from operate .utils .style import ANSI_BRIGHT_MAGENTA , ANSI_GREEN , ANSI_RED , ANSI_RESET
29
29
30
30
# Load configuration
@@ -153,9 +153,13 @@ async def call_qwen_vl_with_ocr(messages, objective, model):
153
153
if not os .path .exists (screenshots_dir ):
154
154
os .makedirs (screenshots_dir )
155
155
156
- screenshot_filename = os .path .join (screenshots_dir , "screenshot.png" )
157
156
# Call the function to capture the screen with the cursor
158
- capture_screen_with_cursor (screenshot_filename )
157
+ raw_screenshot_filename = os .path .join (screenshots_dir , "raw_screenshot.png" )
158
+ capture_screen_with_cursor (raw_screenshot_filename )
159
+
160
+ # Compress screenshot image to make size be smaller
161
+ screenshot_filename = os .path .join (screenshots_dir , "screenshot.jpeg" )
162
+ compress_screenshot (raw_screenshot_filename , screenshot_filename )
159
163
160
164
with open (screenshot_filename , "rb" ) as img_file :
161
165
img_base64 = base64 .b64encode (img_file .read ()).decode ("utf-8" )
@@ -179,7 +183,7 @@ async def call_qwen_vl_with_ocr(messages, objective, model):
179
183
messages .append (vision_message )
180
184
181
185
response = client .chat .completions .create (
182
- model = "qwen2.5-vl-7b -instruct" ,
186
+ model = "qwen2.5-vl-72b -instruct" ,
183
187
messages = messages ,
184
188
)
185
189
0 commit comments