Make the default of ngl be -1

This means automatically assign a value, which may be 999 or 0 depending on hardware. Signed-off-by: Eric Curtin <[email protected]>
containers · Feb 5, 2025 · 3332c62 · 3332c62
1 parent 03cbf9b
commit 3332c62
Show file tree

Hide file tree

Showing 5 changed files with 10 additions and 7 deletions.
diff --git a/docs/ramalama.1.md b/docs/ramalama.1.md
@@ -115,7 +115,8 @@ pass --group-add keep-groups to podman (default: False)
 Needed to access the gpu on some systems, but has an impact on security, use with caution.
 
 #### **--ngl**
-number of gpu layers (default: 999)
+number of gpu layers, 0 means CPU inferencing, 999 means use max layers (default: -1)
+The default -1, means use whatever is automatically deemed appropriate (0 or 999)
 
 #### **--nocontainer**
 do not run RamaLama in the default container (default: False)

diff --git a/docs/ramalama.conf b/docs/ramalama.conf
@@ -50,8 +50,9 @@
 #keep_groups = false
 
 # Default number of layers offloaded to the gpu
+# -1 means use whatever is automatically deemed appropriate (0 or 999)
 #
-#ngl = 999
+#ngl = -1
 
 # Specify default port for services to listen on
 #

diff --git a/docs/ramalama.conf.5.md b/docs/ramalama.conf.5.md
@@ -92,9 +92,10 @@ RAMALAMA_IMAGE environment variable overrides this field.
 Pass `--group-add keep-groups` to podman, when using podman.
 In some cases this is needed to access the gpu from a rootless container
 
-**ngl**=999
+**ngl**=-1
 
-Default number of layers to offload to the gpu
+number of gpu layers, 0 means CPU inferencing, 999 means use max layers (default: -1)
+The default -1, means use whatever is automatically deemed appropriate (0 or 999)
 
 **port**="8080"
 

diff --git a/ramalama/cli.py b/ramalama/cli.py
@@ -196,8 +196,8 @@ def configure_arguments(parser):
         "--ngl",
         dest="ngl",
         type=int,
-        default=config.get("ngl", 999),
-        help="Number of layers to offload to the gpu, if available",
+        default=config.get("ngl", -1),
+        help="Number of layers to offload to the gpu, if available"
     )
     parser.add_argument(
         "--keep-groups",

diff --git a/ramalama/model.py b/ramalama/model.py
@@ -195,7 +195,7 @@ def setup_container(self, args):
     def gpu_args(self, args, runner=False):
         gpu_args = []
         if (
-            args.gpu
+            args.gpu > 0
             or os.getenv("HIP_VISIBLE_DEVICES")
             or os.getenv("ASAHI_VISIBLE_DEVICES")
             or os.getenv("CUDA_VISIBLE_DEVICES")