diff --git a/docs/ramalama.1.md b/docs/ramalama.1.md index 7c0fe6d3..c4cdb7e9 100644 --- a/docs/ramalama.1.md +++ b/docs/ramalama.1.md @@ -115,7 +115,8 @@ pass --group-add keep-groups to podman (default: False) Needed to access the gpu on some systems, but has an impact on security, use with caution. #### **--ngl** -number of gpu layers (default: 999) +number of gpu layers, 0 means CPU inferencing, 999 means use max layers (default: -1) +The default -1, means use whatever is automatically deemed appropriate (0 or 999) #### **--nocontainer** do not run RamaLama in the default container (default: False) diff --git a/docs/ramalama.conf b/docs/ramalama.conf index e8c18d73..2d0159e0 100644 --- a/docs/ramalama.conf +++ b/docs/ramalama.conf @@ -51,7 +51,7 @@ # Default number of layers offloaded to the gpu # -#ngl = 999 +#ngl = -1 # Specify default port for services to listen on # diff --git a/docs/ramalama.conf.5.md b/docs/ramalama.conf.5.md index 4b7e21ab..044a6b5c 100644 --- a/docs/ramalama.conf.5.md +++ b/docs/ramalama.conf.5.md @@ -92,9 +92,10 @@ RAMALAMA_IMAGE environment variable overrides this field. Pass `--group-add keep-groups` to podman, when using podman. In some cases this is needed to access the gpu from a rootless container -**ngl**=999 +**ngl**=-1 -Default number of layers to offload to the gpu +number of gpu layers, 0 means CPU inferencing, 999 means use max layers (default: -1) +The default -1, means use whatever is automatically deemed appropriate (0 or 999) **port**="8080" diff --git a/ramalama/cli.py b/ramalama/cli.py index 81829201..3901c19c 100644 --- a/ramalama/cli.py +++ b/ramalama/cli.py @@ -196,8 +196,8 @@ def configure_arguments(parser): "--ngl", dest="ngl", type=int, - default=config.get("ngl", 999), - help="Number of layers to offload to the gpu, if available", + default=config.get("ngl", -1), + help="Number of layers to offload to the gpu, if available" ) parser.add_argument( "--keep-groups", diff --git a/ramalama/model.py b/ramalama/model.py index 24df822a..283f107a 100644 --- a/ramalama/model.py +++ b/ramalama/model.py @@ -195,7 +195,7 @@ def setup_container(self, args): def gpu_args(self, args, runner=False): gpu_args = [] if ( - args.gpu + args.gpu > 0 or os.getenv("HIP_VISIBLE_DEVICES") or os.getenv("ASAHI_VISIBLE_DEVICES") or os.getenv("CUDA_VISIBLE_DEVICES")