containers · rhatdan · Feb 13, 2025 · Feb 13, 2025 · Feb 13, 2025 · Feb 13, 2025
@@ -31,6 +31,9 @@ path of the authentication file for OCI registries
 #### **--ctx-size**, **-c**
 size of the prompt context (default: 2048, 0 = loaded from model)
 
+#### **--device**
+declare host device to leak into the container 
+
 #### **--help**, **-h**
 show this help message and exit
 
@@ -43,6 +46,9 @@ name of the container to run the Model in
 #### **--network**=*none*
 set the network mode for the container
 
+#### **--privileged**
+give extended privileges to container
+
 #### **--seed**=
 Specify seed rather than using random seed model interaction
 

@@ -46,6 +46,9 @@ The default is TRUE. The --nocontainer option forces this option to False.
 
 Use the `ramalama stop` command to stop the container running the served ramalama Model.
 
+#### **--device**
+declare host device to leak into the container 
+
 #### **--generate**=type
 Generate specified configuration format for running the AI Model as a service
 
@@ -70,6 +73,9 @@ set the network mode for the container
 #### **--port**, **-p**
 port for AI Model server to listen on
 
+#### **--privileged**
+give extended privileges to container
+
 #### **--seed**=
 Specify seed rather than using random seed model interaction
 

@@ -233,6 +233,12 @@ def configure_arguments(parser):
         help="store AI Models in the specified directory",
     )
     parser.add_argument("-v", "--version", dest="version", action="store_true", help="show RamaLama version")
+    # parser.add_argument("--device",
+    #     dest="device",
+    #     action='append',
+    #     type=str,
+    #     # nargs=1,
+    #     help="Device to leak in to the running container")
 
 
 def configure_subcommands(parser):
@@ -801,6 +807,11 @@ def _run(parser):
         default=config.get('ctx_size', 2048),
         help="size of the prompt context (0 = loaded from model)",
     )
+    parser.add_argument("--device",
+        dest="device",
+        action='append',
+        type=str,
+        help="Device to leak in to the running container")
     parser.add_argument("-n", "--name", dest="name", help="name of container in which the Model will be run")
     # Disable network access by default, and give the option to pass any supported network mode into
     # podman if needed:
@@ -811,7 +822,12 @@ def _run(parser):
         default="none",
         help="set the network mode for the container",
     )
-
+    parser.add_argument(
+        "--privileged",
+        dest="privileged",
+        action="store_true",
+        help="give extended privileges to container"
+    )
     parser.add_argument("--seed", help="override random seed")
     parser.add_argument(
         "--temp", default=config.get('temp', "0.8"), help="temperature of the response from the AI model"

@@ -187,13 +187,11 @@ def setup_container(self, args):
         if hasattr(args, "port"):
             conman_args += ["-p", f"{args.port}:{args.port}"]
 
-        # Check for env var RAMALAMA_DEVICE to explicitly declare the GPU device path
-        device_override = 0
-        gpu_device = os.environ.get("RAMALAMA_DEVICE")
-        if gpu_device:
-            conman_args += ["--device", gpu_device]
-            device_override = 1
-        if device_override != 1:
+        if args.device:
+            for device_arg in args.device:
+                print(device_arg)
+                conman_args += ["--device", device_arg]
+        else:
             if (sys.platform == "darwin" and os.path.basename(args.engine) != "docker") or os.path.exists("/dev/dri"):
                 conman_args += ["--device", "/dev/dri"]