Skip to content

Commit 4c471cc

Browse files
author
何涛
committed
feat: compress screenshot image to make it smaller
1 parent 395f50d commit 4c471cc

File tree

2 files changed

+23
-4
lines changed

2 files changed

+23
-4
lines changed

operate/models/apis.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
get_label_coordinates,
2525
)
2626
from operate.utils.ocr import get_text_coordinates, get_text_element
27-
from operate.utils.screenshot import capture_screen_with_cursor
27+
from operate.utils.screenshot import capture_screen_with_cursor, compress_screenshot
2828
from operate.utils.style import ANSI_BRIGHT_MAGENTA, ANSI_GREEN, ANSI_RED, ANSI_RESET
2929

3030
# Load configuration
@@ -153,9 +153,13 @@ async def call_qwen_vl_with_ocr(messages, objective, model):
153153
if not os.path.exists(screenshots_dir):
154154
os.makedirs(screenshots_dir)
155155

156-
screenshot_filename = os.path.join(screenshots_dir, "screenshot.png")
157156
# Call the function to capture the screen with the cursor
158-
capture_screen_with_cursor(screenshot_filename)
157+
raw_screenshot_filename = os.path.join(screenshots_dir, "raw_screenshot.png")
158+
capture_screen_with_cursor(raw_screenshot_filename)
159+
160+
# Compress screenshot image to make size be smaller
161+
screenshot_filename = os.path.join(screenshots_dir, "screenshot.jpeg")
162+
compress_screenshot(raw_screenshot_filename, screenshot_filename)
159163

160164
with open(screenshot_filename, "rb") as img_file:
161165
img_base64 = base64.b64encode(img_file.read()).decode("utf-8")
@@ -179,7 +183,7 @@ async def call_qwen_vl_with_ocr(messages, objective, model):
179183
messages.append(vision_message)
180184

181185
response = client.chat.completions.create(
182-
model="qwen2.5-vl-7b-instruct",
186+
model="qwen2.5-vl-72b-instruct",
183187
messages=messages,
184188
)
185189

operate/utils/screenshot.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,18 @@ def capture_screen_with_cursor(file_path):
2525
subprocess.run(["screencapture", "-C", file_path])
2626
else:
2727
print(f"The platform you're using ({user_platform}) is not currently supported")
28+
29+
30+
def compress_screenshot(raw_screenshot_filename, screenshot_filename):
31+
with Image.open(raw_screenshot_filename) as img:
32+
# Check if the image has an alpha channel (transparency)
33+
if img.mode in ('RGBA', 'LA') or (img.mode == 'P' and 'transparency' in img.info):
34+
# Create a white background image
35+
background = Image.new('RGB', img.size, (255, 255, 255))
36+
# Paste the image onto the background, using the alpha channel as mask
37+
background.paste(img, mask=img.split()[3]) # 3 is the alpha channel
38+
# Save the result as JPEG
39+
background.save(screenshot_filename, 'JPEG', quality=85) # Adjust quality as needed
40+
else:
41+
# If no alpha channel, simply convert and save
42+
img.convert('RGB').save(screenshot_filename, 'JPEG', quality=85)

0 commit comments

Comments
 (0)