From 7b2fe0003004507083b663eea5483ca58b1097b2 Mon Sep 17 00:00:00 2001 From: Eric Curtin Date: Fri, 7 Feb 2025 19:55:45 +0000 Subject: [PATCH] Check if krunkit process is running The podman machine list technique isn't consisent across podman machine versions. Signed-off-by: Eric Curtin --- ramalama/common.py | 139 ++++++++++++++++++++++++--------------------- 1 file changed, 73 insertions(+), 66 deletions(-) diff --git a/ramalama/common.py b/ramalama/common.py index 58977c0d..d4bf88bf 100644 --- a/ramalama/common.py +++ b/ramalama/common.py @@ -32,26 +32,10 @@ def container_manager(): return engine if available("podman"): - if sys.platform != "darwin": + if sys.platform != "darwin" or is_krunkit_running() or is_podman_machine_running(): return "podman" - podman_machine_list = ["podman", "machine", "list"] - conman_args = ["podman", "machine", "list", "--format", "{{ .VMType }}"] - try: - output = run_cmd(podman_machine_list).stdout.decode("utf-8").strip() - if "running" not in output: - return None - - output = run_cmd(conman_args).stdout.decode("utf-8").strip() - if output == "krunkit" or output == "libkrun": - return "podman" - else: - return None - - except subprocess.CalledProcessError: - pass - - return "podman" + return None if available("docker"): return "docker" @@ -59,6 +43,26 @@ def container_manager(): return None +def is_krunkit_running(): + result = subprocess.run(["pgrep", "krunkit"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + return result.returncode == 0 + + +def is_podman_machine_running(): + podman_machine_list = ["podman", "machine", "list"] + conman_args = ["podman", "machine", "list", "--format", "{{ .VMType }}"] + try: + output = run_cmd(podman_machine_list).stdout.decode("utf-8").strip() + if "running" not in output: + return False + + output = run_cmd(conman_args).stdout.decode("utf-8").strip() + return output in {"krunkit", "libkrun"} + + except subprocess.CalledProcessError: + return False + + def perror(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) @@ -198,40 +202,43 @@ def download_file(url, dest_path, headers=None, show_progress=True): if e.code == HTTP_RANGE_NOT_SATISFIABLE: # "Range Not Satisfiable" error (file already downloaded) return # No need to retry - except urllib.error.URLError as e: - console.error(f"Network Error: {e.reason}") - retries += 1 - - except TimeoutError: - retries += 1 - console.warning(f"TimeoutError: The server took too long to respond. Retrying {retries}/{max_retries}...") - - except RuntimeError as e: # Catch network-related errors from HttpClient - retries += 1 - console.warning(f"{e}. Retrying {retries}/{max_retries}...") - - except IOError as e: + except (urllib.error.URLError, TimeoutError, RuntimeError, IOError) as e: + handle_download_error(e, retries, max_retries) retries += 1 - console.warning(f"I/O Error: {e}. Retrying {retries}/{max_retries}...") except Exception as e: console.error(f"Unexpected error: {str(e)}") raise if retries >= max_retries: - error_message = ( - "\nDownload failed after multiple attempts.\n" - "Possible causes:\n" - "- Internet connection issue\n" - "- Server is down or unresponsive\n" - "- Firewall or proxy blocking the request\n" - ) - console.error(error_message) + handle_max_retries_exceeded() sys.exit(1) time.sleep(2**retries * 0.1) # Exponential backoff (0.1s, 0.2s, 0.4s...) +def handle_download_error(e, retries, max_retries): + if isinstance(e, urllib.error.URLError): + console.error(f"Network Error: {e.reason}") + elif isinstance(e, TimeoutError): + console.warning(f"TimeoutError: The server took too long to respond. Retrying {retries}/{max_retries}...") + elif isinstance(e, RuntimeError): + console.warning(f"{e}. Retrying {retries}/{max_retries}...") + elif isinstance(e, IOError): + console.warning(f"I/O Error: {e}. Retrying {retries}/{max_retries}...") + + +def handle_max_retries_exceeded(): + error_message = ( + "\nDownload failed after multiple attempts.\n" + "Possible causes:\n" + "- Internet connection issue\n" + "- Server is down or unresponsive\n" + "- Firewall or proxy blocking the request\n" + ) + console.error(error_message) + + def engine_version(engine): # Create manifest list for target with imageid cmd_args = [engine, "version", "--format", "{{ .Client.Version }}"] @@ -239,49 +246,49 @@ def engine_version(engine): def get_gpu(): - envs = get_env_vars() - # If env vars already set return if envs: return - # ASAHI CASE if os.path.exists('/proc/device-tree/compatible'): - try: - with open('/proc/device-tree/compatible', 'rb') as f: - content = f.read().split(b"\0") - # Check if "apple,arm-platform" is in the content - if b"apple,arm-platform" in content: - os.environ["ASAHI_VISIBLE_DEVICES"] = "1" - except OSError: - # Handle the case where the file does not exist - pass - - # NVIDIA CASE - try: - command = ['nvidia-smi'] - run_cmd(command).stdout.decode("utf-8") + set_asahi_visible_devices() + + if is_nvidia_gpu_present(): os.environ["CUDA_VISIBLE_DEVICES"] = "0" return - except Exception: + + set_amd_visible_devices() + + +def set_asahi_visible_devices(): + try: + with open('/proc/device-tree/compatible', 'rb') as f: + content = f.read().split(b"\0") + if b"apple,arm-platform" in content: + os.environ["ASAHI_VISIBLE_DEVICES"] = "1" + except OSError: pass - # ROCm/AMD CASE - i = 0 - gpu_num = 0 - gpu_bytes = 0 - for fp in sorted(glob.glob('/sys/bus/pci/devices/*/mem_info_vram_total')): + +def is_nvidia_gpu_present(): + try: + run_cmd(['nvidia-smi']).stdout.decode("utf-8") + return True + except Exception: + return False + + +def set_amd_visible_devices(): + gpu_num, gpu_bytes = 0, 0 + for i, fp in enumerate(sorted(glob.glob('/sys/bus/pci/devices/*/mem_info_vram_total'))): with open(fp, 'r') as file: content = int(file.read()) if content > 1073741824 and content > gpu_bytes: gpu_bytes = content gpu_num = i - i += 1 - if gpu_bytes: os.environ["HIP_VISIBLE_DEVICES"] = str(gpu_num) - return def get_env_vars():