Skip to content

Commit

Permalink
Add ramalama run --keepalive option
Browse files Browse the repository at this point in the history
Signed-off-by: Daniel J Walsh <[email protected]>
  • Loading branch information
rhatdan committed Feb 11, 2025
1 parent d13d02b commit ec8d360
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 25 deletions.
15 changes: 12 additions & 3 deletions docs/ramalama-run.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ size of the prompt context (default: 2048, 0 = loaded from model)
#### **--help**, **-h**
show this help message and exit

#### **--keepalive**
duration to keep a model loaded (e.g. 5m)

#### **--name**, **-n**
name of the container to run the Model in

Expand Down Expand Up @@ -70,9 +73,9 @@ ramalama run granite
>
```

Run command with local downloaoded model
Run command with local downloaded model for 10 minutes
```
ramalama run file:///tmp/mymodel
ramalama run --keepalive 10m file:///tmp/mymodel
>
```

Expand All @@ -89,9 +92,15 @@ This program is a Python script that allows the user to interact with a terminal
[end of text]
```

## Exit Codes:

0 Success
124 RamaLama command did not exit within the keepalive time.


## NVIDIA CUDA Support

See **[ramalama-cuda(7)](ramalama.7.md)** for setting up the host Linux system for CUDA support.
See **[ramalama-cuda(7)](ramalama-cuda.7.md)** for setting up the host Linux system for CUDA support.

## SEE ALSO
**[ramalama(1)](ramalama.1.md)**, **[ramalama-cuda(7)](ramalama-cuda.7.md)**
Expand Down
28 changes: 12 additions & 16 deletions ramalama/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,16 @@ def _run(parser):
help="size of the prompt context (0 = loaded from model)",
)
parser.add_argument("-n", "--name", dest="name", help="name of container in which the Model will be run")
# Disable network access by default, and give the option to pass any supported network mode into
# podman if needed:
# https://docs.podman.io/en/latest/markdown/podman-run.1.html#network-mode-net
parser.add_argument(
"--network-mode",
type=str,
default="none",
help="set the network mode for the container",
)

parser.add_argument("--seed", help="override random seed")
parser.add_argument(
"--temp", default=config.get('temp', "0.8"), help="temperature of the response from the AI model"
Expand All @@ -807,23 +817,15 @@ def _run(parser):
help="require HTTPS and verify certificates when contacting registries",
)


def run_parser(subparsers):
parser = subparsers.add_parser("run", help="run specified AI Model as a chatbot")
_run(parser)
# Disable network access by default, and give the option to pass any supported network mode into
# podman if needed:
# https://docs.podman.io/en/latest/markdown/podman-run.1.html#network-mode-net
parser.add_argument(
"--network-mode",
type=str,
default="none",
help="set the network mode for the container",
)
parser.add_argument("--keepalive", type=str, help="Duration to keep a model loaded (e.g. 5m)")
parser.add_argument("MODEL") # positional argument
parser.add_argument(
"ARGS", nargs="*", help="Overrides the default prompt, and the output is returned without entering the chatbot"
)
parser._actions.sort(key=lambda x: x.option_strings)
parser.set_defaults(func=run_cli)


Expand All @@ -845,12 +847,6 @@ def serve_parser(subparsers):
parser.add_argument(
"-p", "--port", default=config.get('port', "8080"), help="port for AI Model server to listen on"
)
parser.add_argument(
"--network-mode",
type=str,
default="",
help="set the network mode for the container",
)
parser.add_argument("MODEL") # positional argument
parser.set_defaults(func=serve_cli)

Expand Down
6 changes: 4 additions & 2 deletions ramalama/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,11 +188,11 @@ def setup_container(self, args):
conman_args += ["-p", f"{args.port}:{args.port}"]

# Check for env var RAMALAMA_DEVICE to explicitly declare the GPU device path
device_override=0
device_override = 0
gpu_device = os.environ.get("RAMALAMA_DEVICE")
if gpu_device:
conman_args += ["--device", gpu_device]
device_override=1
device_override = 1
if device_override != 1:
if (sys.platform == "darwin" and os.path.basename(args.engine) != "docker") or os.path.exists("/dev/dri"):
conman_args += ["--device", "/dev/dri"]
Expand Down Expand Up @@ -265,6 +265,8 @@ def run(self, args):
prompt = self.build_prompt(args)
model_path = self.get_model_path(args)
exec_args = self.build_exec_args_run(args, model_path, prompt)
if args.keepalive:
exec_args = ["timeout", args.keepalive] + exec_args
self.execute_model(model_path, exec_args, args)

def perplexity(self, args):
Expand Down
9 changes: 5 additions & 4 deletions ramalama/ollama.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@ def init_pull(repos, accept, registry_head, model_name, model_tag, models, model
def in_existing_cache(model_name, model_tag):
if not available("ollama"):
return None
default_ollama_caches=[
default_ollama_caches = [
os.path.join(os.environ['HOME'], '.ollama/models'),
'/usr/share/ollama/.ollama/models',
f'C:\\Users\\{os.getlogin()}\\.ollama\\models'
f'C:\\Users\\{os.getlogin()}\\.ollama\\models',
]

for cache_dir in default_ollama_caches:
Expand All @@ -79,10 +79,11 @@ def in_existing_cache(model_name, model_tag):
if layer["mediaType"] == "application/vnd.ollama.image.model":
layer_digest = layer["digest"]
ollama_digest_path = os.path.join(cache_dir, 'blobs', layer_digest)
if os.path.exists(str(ollama_digest_path).replace(':','-')):
return str(ollama_digest_path).replace(':','-')
if os.path.exists(str(ollama_digest_path).replace(':', '-')):
return str(ollama_digest_path).replace(':', '-')
return None


class Ollama(Model):
def __init__(self, model):
model = rm_until_substring(model, "ollama.com/library/")
Expand Down
4 changes: 4 additions & 0 deletions test/system/030-run.bats
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,8 @@ load helpers
run_ramalama run --name foobar tiny "Write a 1 line poem"
}

@test "ramalama run --keepalive" {
run_ramalama 124 run --keepalive 1s tiny
}

# vim: filetype=sh

0 comments on commit ec8d360

Please sign in to comment.