diff --git a/docs/ramalama.1.md b/docs/ramalama.1.md
index 7c0fe6d3..38193516 100644
--- a/docs/ramalama.1.md
+++ b/docs/ramalama.1.md
@@ -115,7 +115,7 @@ pass --group-add keep-groups to podman (default: False)
 Needed to access the gpu on some systems, but has an impact on security, use with caution.
 
 #### **--ngl**
-number of gpu layers (default: 999)
+number of gpu layers (default: -1)
 
 #### **--nocontainer**
 do not run RamaLama in the default container (default: False)
diff --git a/docs/ramalama.conf b/docs/ramalama.conf
index e8c18d73..2d0159e0 100644
--- a/docs/ramalama.conf
+++ b/docs/ramalama.conf
@@ -51,7 +51,7 @@
 
 # Default number of layers offloaded to the gpu
 #
-#ngl = 999
+#ngl = -1
 
 # Specify default port for services to listen on
 #
diff --git a/docs/ramalama.conf.5.md b/docs/ramalama.conf.5.md
index 4b7e21ab..be679e6a 100644
--- a/docs/ramalama.conf.5.md
+++ b/docs/ramalama.conf.5.md
@@ -92,7 +92,7 @@ RAMALAMA_IMAGE environment variable overrides this field.
 Pass `--group-add keep-groups` to podman, when using podman.
 In some cases this is needed to access the gpu from a rootless container
 
-**ngl**=999
+**ngl**=-1
 
 Default number of layers to offload to the gpu
 
diff --git a/ramalama/cli.py b/ramalama/cli.py
index d41fc217..8400f36e 100644
--- a/ramalama/cli.py
+++ b/ramalama/cli.py
@@ -196,7 +196,7 @@ def configure_arguments(parser):
         "--ngl",
         dest="ngl",
         type=int,
-        default=config.get("ngl", 999),
+        default=config.get("ngl", -1),
         help="Number of layers to offload to the gpu, if available"
     )
     parser.add_argument(
diff --git a/ramalama/model.py b/ramalama/model.py
index 287d27bb..62ed3db0 100644
--- a/ramalama/model.py
+++ b/ramalama/model.py
@@ -193,7 +193,7 @@ def setup_container(self, args):
     def gpu_args(self, args, runner=False):
         gpu_args = []
         if (
-            args.gpu
+            args.gpu > 0
             or os.getenv("HIP_VISIBLE_DEVICES")
             or os.getenv("ASAHI_VISIBLE_DEVICES")
             or os.getenv("CUDA_VISIBLE_DEVICES")