From 3332c623b2614bdb4164c3bdf8ac472c18166e25 Mon Sep 17 00:00:00 2001 From: Eric Curtin Date: Sun, 2 Feb 2025 15:30:54 +0000 Subject: [PATCH] Make the default of ngl be -1 This means automatically assign a value, which may be 999 or 0 depending on hardware. Signed-off-by: Eric Curtin --- docs/ramalama.1.md | 3 ++- docs/ramalama.conf | 3 ++- docs/ramalama.conf.5.md | 5 +++-- ramalama/cli.py | 4 ++-- ramalama/model.py | 2 +- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/docs/ramalama.1.md b/docs/ramalama.1.md index 7c0fe6d3..c4cdb7e9 100644 --- a/docs/ramalama.1.md +++ b/docs/ramalama.1.md @@ -115,7 +115,8 @@ pass --group-add keep-groups to podman (default: False) Needed to access the gpu on some systems, but has an impact on security, use with caution. #### **--ngl** -number of gpu layers (default: 999) +number of gpu layers, 0 means CPU inferencing, 999 means use max layers (default: -1) +The default -1, means use whatever is automatically deemed appropriate (0 or 999) #### **--nocontainer** do not run RamaLama in the default container (default: False) diff --git a/docs/ramalama.conf b/docs/ramalama.conf index e8c18d73..8ce47613 100644 --- a/docs/ramalama.conf +++ b/docs/ramalama.conf @@ -50,8 +50,9 @@ #keep_groups = false # Default number of layers offloaded to the gpu +# -1 means use whatever is automatically deemed appropriate (0 or 999) # -#ngl = 999 +#ngl = -1 # Specify default port for services to listen on # diff --git a/docs/ramalama.conf.5.md b/docs/ramalama.conf.5.md index 4b7e21ab..044a6b5c 100644 --- a/docs/ramalama.conf.5.md +++ b/docs/ramalama.conf.5.md @@ -92,9 +92,10 @@ RAMALAMA_IMAGE environment variable overrides this field. Pass `--group-add keep-groups` to podman, when using podman. In some cases this is needed to access the gpu from a rootless container -**ngl**=999 +**ngl**=-1 -Default number of layers to offload to the gpu +number of gpu layers, 0 means CPU inferencing, 999 means use max layers (default: -1) +The default -1, means use whatever is automatically deemed appropriate (0 or 999) **port**="8080" diff --git a/ramalama/cli.py b/ramalama/cli.py index 81829201..3901c19c 100644 --- a/ramalama/cli.py +++ b/ramalama/cli.py @@ -196,8 +196,8 @@ def configure_arguments(parser): "--ngl", dest="ngl", type=int, - default=config.get("ngl", 999), - help="Number of layers to offload to the gpu, if available", + default=config.get("ngl", -1), + help="Number of layers to offload to the gpu, if available" ) parser.add_argument( "--keep-groups", diff --git a/ramalama/model.py b/ramalama/model.py index 24df822a..283f107a 100644 --- a/ramalama/model.py +++ b/ramalama/model.py @@ -195,7 +195,7 @@ def setup_container(self, args): def gpu_args(self, args, runner=False): gpu_args = [] if ( - args.gpu + args.gpu > 0 or os.getenv("HIP_VISIBLE_DEVICES") or os.getenv("ASAHI_VISIBLE_DEVICES") or os.getenv("CUDA_VISIBLE_DEVICES")