Skip to content

Commit

Permalink
Check if krunkit process is running
Browse files Browse the repository at this point in the history
The podman machine list technique isn't consisent across podman
machine versions.

Signed-off-by: Eric Curtin <[email protected]>
  • Loading branch information
ericcurtin committed Feb 7, 2025
1 parent 9d02f7d commit 7b2fe00
Showing 1 changed file with 73 additions and 66 deletions.
139 changes: 73 additions & 66 deletions ramalama/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,33 +32,37 @@ def container_manager():
return engine

if available("podman"):
if sys.platform != "darwin":
if sys.platform != "darwin" or is_krunkit_running() or is_podman_machine_running():
return "podman"

podman_machine_list = ["podman", "machine", "list"]
conman_args = ["podman", "machine", "list", "--format", "{{ .VMType }}"]
try:
output = run_cmd(podman_machine_list).stdout.decode("utf-8").strip()
if "running" not in output:
return None

output = run_cmd(conman_args).stdout.decode("utf-8").strip()
if output == "krunkit" or output == "libkrun":
return "podman"
else:
return None

except subprocess.CalledProcessError:
pass

return "podman"
return None

if available("docker"):
return "docker"

return None


def is_krunkit_running():
result = subprocess.run(["pgrep", "krunkit"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return result.returncode == 0


def is_podman_machine_running():
podman_machine_list = ["podman", "machine", "list"]
conman_args = ["podman", "machine", "list", "--format", "{{ .VMType }}"]
try:
output = run_cmd(podman_machine_list).stdout.decode("utf-8").strip()
if "running" not in output:
return False

output = run_cmd(conman_args).stdout.decode("utf-8").strip()
return output in {"krunkit", "libkrun"}

except subprocess.CalledProcessError:
return False


def perror(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)

Expand Down Expand Up @@ -198,90 +202,93 @@ def download_file(url, dest_path, headers=None, show_progress=True):
if e.code == HTTP_RANGE_NOT_SATISFIABLE: # "Range Not Satisfiable" error (file already downloaded)
return # No need to retry

except urllib.error.URLError as e:
console.error(f"Network Error: {e.reason}")
retries += 1

except TimeoutError:
retries += 1
console.warning(f"TimeoutError: The server took too long to respond. Retrying {retries}/{max_retries}...")

except RuntimeError as e: # Catch network-related errors from HttpClient
retries += 1
console.warning(f"{e}. Retrying {retries}/{max_retries}...")

except IOError as e:
except (urllib.error.URLError, TimeoutError, RuntimeError, IOError) as e:
handle_download_error(e, retries, max_retries)
retries += 1
console.warning(f"I/O Error: {e}. Retrying {retries}/{max_retries}...")

except Exception as e:
console.error(f"Unexpected error: {str(e)}")
raise

if retries >= max_retries:
error_message = (
"\nDownload failed after multiple attempts.\n"
"Possible causes:\n"
"- Internet connection issue\n"
"- Server is down or unresponsive\n"
"- Firewall or proxy blocking the request\n"
)
console.error(error_message)
handle_max_retries_exceeded()
sys.exit(1)

time.sleep(2**retries * 0.1) # Exponential backoff (0.1s, 0.2s, 0.4s...)


def handle_download_error(e, retries, max_retries):
if isinstance(e, urllib.error.URLError):
console.error(f"Network Error: {e.reason}")
elif isinstance(e, TimeoutError):
console.warning(f"TimeoutError: The server took too long to respond. Retrying {retries}/{max_retries}...")
elif isinstance(e, RuntimeError):
console.warning(f"{e}. Retrying {retries}/{max_retries}...")
elif isinstance(e, IOError):
console.warning(f"I/O Error: {e}. Retrying {retries}/{max_retries}...")


def handle_max_retries_exceeded():
error_message = (
"\nDownload failed after multiple attempts.\n"
"Possible causes:\n"
"- Internet connection issue\n"
"- Server is down or unresponsive\n"
"- Firewall or proxy blocking the request\n"
)
console.error(error_message)


def engine_version(engine):
# Create manifest list for target with imageid
cmd_args = [engine, "version", "--format", "{{ .Client.Version }}"]
return run_cmd(cmd_args).stdout.decode("utf-8").strip()


def get_gpu():

envs = get_env_vars()
# If env vars already set return
if envs:
return

# ASAHI CASE
if os.path.exists('/proc/device-tree/compatible'):
try:
with open('/proc/device-tree/compatible', 'rb') as f:
content = f.read().split(b"\0")
# Check if "apple,arm-platform" is in the content
if b"apple,arm-platform" in content:
os.environ["ASAHI_VISIBLE_DEVICES"] = "1"
except OSError:
# Handle the case where the file does not exist
pass

# NVIDIA CASE
try:
command = ['nvidia-smi']
run_cmd(command).stdout.decode("utf-8")
set_asahi_visible_devices()

if is_nvidia_gpu_present():
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
return
except Exception:

set_amd_visible_devices()


def set_asahi_visible_devices():
try:
with open('/proc/device-tree/compatible', 'rb') as f:
content = f.read().split(b"\0")
if b"apple,arm-platform" in content:
os.environ["ASAHI_VISIBLE_DEVICES"] = "1"
except OSError:
pass

# ROCm/AMD CASE
i = 0
gpu_num = 0
gpu_bytes = 0
for fp in sorted(glob.glob('/sys/bus/pci/devices/*/mem_info_vram_total')):

def is_nvidia_gpu_present():
try:
run_cmd(['nvidia-smi']).stdout.decode("utf-8")
return True
except Exception:
return False


def set_amd_visible_devices():
gpu_num, gpu_bytes = 0, 0
for i, fp in enumerate(sorted(glob.glob('/sys/bus/pci/devices/*/mem_info_vram_total'))):
with open(fp, 'r') as file:
content = int(file.read())
if content > 1073741824 and content > gpu_bytes:
gpu_bytes = content
gpu_num = i

i += 1

if gpu_bytes:
os.environ["HIP_VISIBLE_DEVICES"] = str(gpu_num)
return


def get_env_vars():
Expand Down

0 comments on commit 7b2fe00

Please sign in to comment.