diff --git a/docs/ramalama.1.md b/docs/ramalama.1.md index 7c0fe6d3..38193516 100644 --- a/docs/ramalama.1.md +++ b/docs/ramalama.1.md @@ -115,7 +115,7 @@ pass --group-add keep-groups to podman (default: False) Needed to access the gpu on some systems, but has an impact on security, use with caution. #### **--ngl** -number of gpu layers (default: 999) +number of gpu layers (default: -1) #### **--nocontainer** do not run RamaLama in the default container (default: False) diff --git a/docs/ramalama.conf b/docs/ramalama.conf index e8c18d73..2d0159e0 100644 --- a/docs/ramalama.conf +++ b/docs/ramalama.conf @@ -51,7 +51,7 @@ # Default number of layers offloaded to the gpu # -#ngl = 999 +#ngl = -1 # Specify default port for services to listen on # diff --git a/docs/ramalama.conf.5.md b/docs/ramalama.conf.5.md index 4b7e21ab..be679e6a 100644 --- a/docs/ramalama.conf.5.md +++ b/docs/ramalama.conf.5.md @@ -92,7 +92,7 @@ RAMALAMA_IMAGE environment variable overrides this field. Pass `--group-add keep-groups` to podman, when using podman. In some cases this is needed to access the gpu from a rootless container -**ngl**=999 +**ngl**=-1 Default number of layers to offload to the gpu diff --git a/ramalama/cli.py b/ramalama/cli.py index d41fc217..8400f36e 100644 --- a/ramalama/cli.py +++ b/ramalama/cli.py @@ -196,7 +196,7 @@ def configure_arguments(parser): "--ngl", dest="ngl", type=int, - default=config.get("ngl", 999), + default=config.get("ngl", -1), help="Number of layers to offload to the gpu, if available" ) parser.add_argument( diff --git a/ramalama/model.py b/ramalama/model.py index 287d27bb..62ed3db0 100644 --- a/ramalama/model.py +++ b/ramalama/model.py @@ -193,7 +193,7 @@ def setup_container(self, args): def gpu_args(self, args, runner=False): gpu_args = [] if ( - args.gpu + args.gpu > 0 or os.getenv("HIP_VISIBLE_DEVICES") or os.getenv("ASAHI_VISIBLE_DEVICES") or os.getenv("CUDA_VISIBLE_DEVICES")