Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions agent/server.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import contextlib
import json
import os
import platform
Expand Down Expand Up @@ -62,6 +63,14 @@ def docker_login(self, registry):
password = registry["password"]
return self.execute(f"docker login -u {username} -p {password} {url}")

def docker_inspect_manifest(self, image_tag: str):
try:
return self.execute(f"docker manifest inspect {image_tag}")
except AgentException as e:
if "no such manifest" in e.data.get("output", ""):
raise Exception(f"Image {image_tag} not found in registry") from e
raise

def establish_connection_with_registry(self, max_retries: int, registry: dict[str, str]):
"""Given the attempt count try and establish connection with the registry else Raise"""
for attempt in range(max_retries):
Expand Down Expand Up @@ -383,8 +392,11 @@ def archive_bench(self, name):
bench_directory = os.path.join(self.benches_directory, name)
if not os.path.exists(bench_directory):
return

image_tag = None
try:
bench = Bench(name, self)
image_tag = bench.docker_image
except json.JSONDecodeError:
self.disable_production_on_bench(name)
except FileNotFoundError as e:
Expand All @@ -397,6 +409,16 @@ def archive_bench(self, name):

self.container_exists(name)
self.move_bench_to_archived_directory(name)
if image_tag:
self.remove_docker_image(image_tag)

def remove_docker_image(self, image_tag: str):
# Check if the image is present in registry before trying to remove locally
with contextlib.suppress(Exception):
manifest = self.docker_inspect_manifest(image_tag)
if not manifest:
return
Comment on lines +418 to +420
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 if not manifest: return is unreachable dead code

docker_inspect_manifest never returns a falsy value on success — if docker manifest inspect exits cleanly it emits a non-empty JSON blob; if the image is absent it raises Exception("Image ... not found in registry"); for all other failures it re-raises AgentException. None of those paths produce a falsy return, so the early-return guard will never fire. The guard can be removed to avoid confusion about why the subsequent docker rmi might be skipped.

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

self.execute(f"docker rmi {image_tag} --force")
Comment on lines +417 to +421
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Silent exception swallows both "not in registry" and real errors

contextlib.suppress(Exception) catches every exception thrown inside the block — including transient network failures, Docker daemon errors, and authentication problems. When docker_inspect_manifest raises because the registry is temporarily unreachable (the AgentException re-raise path), the image is silently left on disk with no log entry and no way to tell the difference from the intentional "image not in registry, skip removal" case. Adding at least a logger.warning before suppressing would make failures observable without changing the best-effort semantics.


@job("Cleanup Unused Files", priority="low")
def cleanup_unused_files(self, force: bool = False):
Expand Down
Loading