diff --git a/docs/ramalama-run.1.md b/docs/ramalama-run.1.md index 09d1c56a..d845298a 100644 --- a/docs/ramalama-run.1.md +++ b/docs/ramalama-run.1.md @@ -31,6 +31,15 @@ path of the authentication file for OCI registries #### **--ctx-size**, **-c** size of the prompt context (default: 2048, 0 = loaded from model) +#### **--device** +Add a host device to the container. Optional permissions parameter can +be used to specify device permissions by combining r for read, w for +write, and m for mknod(2). + +Example: --device=/dev/dri/renderD128:/dev/xvdc:rwm + +The device specifiaction is passed directly to the underlying container engine. See documentation of the supported container engine for more information. + #### **--help**, **-h** show this help message and exit @@ -43,6 +52,24 @@ name of the container to run the Model in #### **--network**=*none* set the network mode for the container +#### **--privileged** +By default, RamaLama containers are unprivileged (=false) and cannot, for +example, modify parts of the operating system. This is because by de‐ +fault a container is only allowed limited access to devices. A "privi‐ +leged" container is given the same access to devices as the user launch‐ +ing the container, with the exception of virtual consoles (/dev/tty\d+) +when running in systemd mode (--systemd=always). + +A privileged container turns off the security features that isolate the +container from the host. Dropped Capabilities, limited devices, read- +only mount points, Apparmor/SELinux separation, and Seccomp filters are +all disabled. Due to the disabled security features, the privileged +field should almost never be set as containers can easily break out of +confinement. + +Containers running in a user namespace (e.g., rootless containers) can‐ +not have more privileges than the user that launched them. + #### **--seed**= Specify seed rather than using random seed model interaction diff --git a/docs/ramalama-serve.1.md b/docs/ramalama-serve.1.md index 837fdad5..a33074a6 100644 --- a/docs/ramalama-serve.1.md +++ b/docs/ramalama-serve.1.md @@ -46,6 +46,15 @@ The default is TRUE. The --nocontainer option forces this option to False. Use the `ramalama stop` command to stop the container running the served ramalama Model. +#### **--device** +Add a host device to the container. Optional permissions parameter can +be used to specify device permissions by combining r for read, w for +write, and m for mknod(2). + +Example: --device=/dev/dri/renderD128:/dev/xvdc:rwm + +The device specifiaction is passed directly to the underlying container engine. See documentation of the supported container engine for more information. + #### **--generate**=type Generate specified configuration format for running the AI Model as a service @@ -70,6 +79,24 @@ set the network mode for the container #### **--port**, **-p** port for AI Model server to listen on +#### **--privileged** +By default, RamaLama containers are unprivileged (=false) and cannot, for +example, modify parts of the operating system. This is because by de‐ +fault a container is only allowed limited access to devices. A "privi‐ +leged" container is given the same access to devices as the user launch‐ +ing the container, with the exception of virtual consoles (/dev/tty\d+) +when running in systemd mode (--systemd=always). + +A privileged container turns off the security features that isolate the +container from the host. Dropped Capabilities, limited devices, read- +only mount points, Apparmor/SELinux separation, and Seccomp filters are +all disabled. Due to the disabled security features, the privileged +field should almost never be set as containers can easily break out of +confinement. + +Containers running in a user namespace (e.g., rootless containers) can‐ +not have more privileges than the user that launched them. + #### **--seed**= Specify seed rather than using random seed model interaction diff --git a/ramalama/cli.py b/ramalama/cli.py index 58677046..eed72ceb 100644 --- a/ramalama/cli.py +++ b/ramalama/cli.py @@ -801,6 +801,11 @@ def _run(parser): default=config.get('ctx_size', 2048), help="size of the prompt context (0 = loaded from model)", ) + parser.add_argument("--device", + dest="device", + action='append', + type=str, + help="Device to leak in to the running container") parser.add_argument("-n", "--name", dest="name", help="name of container in which the Model will be run") # Disable network access by default, and give the option to pass any supported network mode into # podman if needed: @@ -811,7 +816,12 @@ def _run(parser): default="none", help="set the network mode for the container", ) - + parser.add_argument( + "--privileged", + dest="privileged", + action="store_true", + help="give extended privileges to container" + ) parser.add_argument("--seed", help="override random seed") parser.add_argument( "--temp", default=config.get('temp', "0.8"), help="temperature of the response from the AI model" diff --git a/ramalama/model.py b/ramalama/model.py index 768bf7d5..c4ba9c86 100644 --- a/ramalama/model.py +++ b/ramalama/model.py @@ -200,13 +200,10 @@ def setup_container(self, args): if hasattr(args, "port"): conman_args += ["-p", f"{args.port}:{args.port}"] - # Check for env var RAMALAMA_DEVICE to explicitly declare the GPU device path - device_override = 0 - gpu_device = os.environ.get("RAMALAMA_DEVICE") - if gpu_device: - conman_args += ["--device", gpu_device] - device_override = 1 - if device_override != 1: + if args.device: + for device_arg in args.device: + conman_args += ["--device", device_arg] + else: if (sys.platform == "darwin" and os.path.basename(args.engine) != "docker") or os.path.exists("/dev/dri"): conman_args += ["--device", "/dev/dri"]