From 496692cd187784fbe2ac887e52c5e42ee70d3523 Mon Sep 17 00:00:00 2001 From: Michael Engel Date: Sat, 8 Feb 2025 08:31:46 +0100 Subject: [PATCH 1/4] Added inspect CLI command to display model info AI Models are shipped with a lot of (meta) information such as the used architecture, the chat template it requires and so on. In order to make these available to the user, the new CLI command inspect with the option support for --all and --json has been implemented. At the moment the GGUF file format - which includes the model as well as the (meta) information in one file - is fully supported. Other formats where the model and information is stored in different files are not (yet) supported and only display basic information such as the model name, path and registry. Signed-off-by: Michael Engel --- docs/ramalama-inspect.1.md | 77 ++++++++++++++ docs/ramalama.1.md | 1 + install.sh | 10 +- ramalama/cli.py | 14 +++ ramalama/common.py | 1 - ramalama/gguf_parser.py | 201 +++++++++++++++++++++++++++++++++++++ ramalama/model.py | 24 +++++ ramalama/model_inspect.py | 111 ++++++++++++++++++++ 8 files changed, 433 insertions(+), 6 deletions(-) create mode 100644 docs/ramalama-inspect.1.md create mode 100644 ramalama/gguf_parser.py create mode 100644 ramalama/model_inspect.py diff --git a/docs/ramalama-inspect.1.md b/docs/ramalama-inspect.1.md new file mode 100644 index 00000000..55d1089a --- /dev/null +++ b/docs/ramalama-inspect.1.md @@ -0,0 +1,77 @@ +% ramalama-inspect 1 + +## NAME +ramalama\-inspect - inspect the specified AI Model + +## SYNOPSIS +**ramalama inspect** [*options*] *model* + +## DESCRIPTION +Inspect the specified AI Model about additional information +like the repository, its metadata and tensor information. + +## OPTIONS + +#### **--all** +Print all available information about the AI Model. +By default, only a basic subset is printed. + +#### **--help**, **-h** +Print usage message + +#### **--json** +Print the AI Model information in json format. + +## EXAMPLES + +Inspect the smollm:135m model for basic information +``` +$ ramalama inspect smollm:135m +smollm:135m + Path: /var/lib/ramalama/models/ollama/smollm:135m + Registry: ollama + Format: GGUF + Version: 3 + Endianness: little + Metadata: 39 entries + Tensors: 272 entries +``` + +Inspect the smollm:135m model for all information in json format +``` +$ ramalama inspect smollm:135m --all --json +{ + "Name": "smollm:135m", + "Path": "/home/mengel/.local/share/ramalama/models/ollama/smollm:135m", + "Registry": "ollama", + "Format": "GGUF", + "Version": 3, + "LittleEndian": true, + "Metadata": { + "general.architecture": "llama", + "general.base_model.0.name": "SmolLM 135M", + "general.base_model.0.organization": "HuggingFaceTB", + "general.base_model.0.repo_url": "https://huggingface.co/HuggingFaceTB/SmolLM-135M", + ... + }, + "Tensors": [ + { + "dimensions": [ + 576, + 49152 + ], + "n_dimensions": 2, + "name": "token_embd.weight", + "offset": 0, + "type": 8 + }, + ... + ] +} +``` + +## SEE ALSO +**[ramalama(1)](ramalama.1.md)** + +## HISTORY +Feb 2025, Originally compiled by Michael Engel diff --git a/docs/ramalama.1.md b/docs/ramalama.1.md index e1d0bd70..25679d04 100644 --- a/docs/ramalama.1.md +++ b/docs/ramalama.1.md @@ -141,6 +141,7 @@ show RamaLama version | [ramalama-bench(1)](ramalama-bench.1.md) | benchmark specified AI Model | | [ramalama-convert(1)](ramalama-convert.1.md) | convert AI Models from local storage to OCI Image | | [ramalama-info(1)](ramalama-info.1.md) | Display RamaLama configuration information | +| [ramalama-inspect(1)](ramalama-inspect.1.md) | inspect the specified AI Model | | [ramalama-list(1)](ramalama-list.1.md) | list all downloaded AI Models | | [ramalama-login(1)](ramalama-login.1.md) | login to remote registry | | [ramalama-logout(1)](ramalama-logout.1.md) | logout from remote registry | diff --git a/install.sh b/install.sh index 0642c7de..b39ad360 100755 --- a/install.sh +++ b/install.sh @@ -126,11 +126,11 @@ setup_ramalama() { syspath="$syspath/ramalama" $sudo install -m755 -d "$syspath" $sudo install -m755 "$to_file" "$ramalama_bin" - local python_files=("cli.py" "huggingface.py" "model.py" "ollama.py" \ - "common.py" "__init__.py" "quadlet.py" "kube.py" \ - "oci.py" "version.py" "shortnames.py" "toml_parser.py" \ - "file.py" "http_client.py" "url.py" "annotations.py" \ - "gpu_detector.py" "console.py") + local python_files=("cli.py" "gguf_parser.py" "huggingface.py" "model.py" \ + "model_inspect.py" "ollama.py" "common.py" "__init__.py" \ + "quadlet.py" "kube.py" "oci.py" "version.py" "shortnames.py" \ + "toml_parser.py" "file.py" "http_client.py" "url.py" \ + "annotations.py" "gpu_detector.py" "console.py") for i in "${python_files[@]}"; do if $local_install; then url="ramalama/${i}" diff --git a/ramalama/cli.py b/ramalama/cli.py index e551a5a3..a4f127c6 100644 --- a/ramalama/cli.py +++ b/ramalama/cli.py @@ -243,6 +243,7 @@ def configure_subcommands(parser): containers_parser(subparsers) convert_parser(subparsers) info_parser(subparsers) + inspect_parser(subparsers) list_parser(subparsers) login_parser(subparsers) logout_parser(subparsers) @@ -994,3 +995,16 @@ def perplexity_parser(subparsers): def perplexity_cli(args): model = New(args.MODEL, args) model.perplexity(args) + + +def inspect_parser(subparsers): + parser = subparsers.add_parser("inspect", help="inspect an AI Model") + parser.add_argument("MODEL") # positional argument + parser.add_argument("--all", dest="all", action="store_true", help="display all available information of AI Model") + parser.add_argument("--json", dest="json", action="store_true", help="display AI Model information in JSON format") + parser.set_defaults(func=inspect_cli) + + +def inspect_cli(args): + model = New(args.MODEL, args) + model.inspect(args) diff --git a/ramalama/common.py b/ramalama/common.py index a5732858..048b9e2b 100644 --- a/ramalama/common.py +++ b/ramalama/common.py @@ -17,7 +17,6 @@ from ramalama.http_client import HttpClient - logging.basicConfig(level=logging.WARNING, format="%(asctime)s - %(levelname)s - %(message)s") MNT_DIR = "/mnt/models" diff --git a/ramalama/gguf_parser.py b/ramalama/gguf_parser.py new file mode 100644 index 00000000..8c89db65 --- /dev/null +++ b/ramalama/gguf_parser.py @@ -0,0 +1,201 @@ +import io +import struct + +from enum import IntEnum +from typing import Dict, Any + +import ramalama.console as console +from ramalama.model_inspect import GGUFModelInfo, Tensor + + +# Based on ggml_type in +# https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#file-structure +class GGML_TYPE(IntEnum): + GGML_TYPE_F32 = (0,) + GGML_TYPE_F16 = (1,) + GGML_TYPE_Q4_0 = (2,) + GGML_TYPE_Q4_1 = (3,) + # GGML_TYPE_Q4_2 = 4, support has been removed + # GGML_TYPE_Q4_3 = 5, support has been removed + GGML_TYPE_Q5_0 = (6,) + GGML_TYPE_Q5_1 = (7,) + GGML_TYPE_Q8_0 = (8,) + GGML_TYPE_Q8_1 = (9,) + GGML_TYPE_Q2_K = (10,) + GGML_TYPE_Q3_K = (11,) + GGML_TYPE_Q4_K = (12,) + GGML_TYPE_Q5_K = (13,) + GGML_TYPE_Q6_K = (14,) + GGML_TYPE_Q8_K = (15,) + GGML_TYPE_IQ2_XXS = (16,) + GGML_TYPE_IQ2_XS = (17,) + GGML_TYPE_IQ3_XXS = (18,) + GGML_TYPE_IQ1_S = (19,) + GGML_TYPE_IQ4_NL = (20,) + GGML_TYPE_IQ3_S = (21,) + GGML_TYPE_IQ2_S = (22,) + GGML_TYPE_IQ4_XS = (23,) + GGML_TYPE_I8 = (24,) + GGML_TYPE_I16 = (25,) + GGML_TYPE_I32 = (26,) + GGML_TYPE_I64 = (27,) + GGML_TYPE_F64 = (28,) + GGML_TYPE_IQ1_M = (29,) + + +# Based on gguf_metadata_value_type in +# https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#file-structure +class GGUFValueType(IntEnum): + UINT8 = (0,) # 8-bit unsigned integer + INT8 = (1,) # 8-bit signed integer + UINT16 = (2,) # 16-bit unsigned little-endian integer + INT16 = (3,) # 16-bit signed little-endian integer + UINT32 = (4,) # 32-bit unsigned little-endian integer + INT32 = (5,) # 32-bit signed little-endian integer + FLOAT32 = (6,) # 32-bit IEEE754 floating point number + + # boolean of 1-byte value where 0 is false and 1 is true. + # Anything else is invalid, and should be treated as either the model being invalid or the reader being buggy. + BOOL = (7,) + + STRING = (8,) # UTF-8 non-null-terminated string, with length prepended. + + # Array of other values, with the length and type prepended. + # Arrays can be nested, and the length of the array is the number of elements in the array, not the number of bytes. + ARRAY = (9,) + + UINT64 = (10,) # 64-bit unsigned little-endian integer + INT64 = (11,) # 64-bit signed little-endian integer + FLOAT64 = (12,) # 64-bit IEEE754 floating point number + + +# Mapping GGUFs value types to python struct librarys format characters +# see https://docs.python.org/3/library/struct.html#format-characters +GGUF_VALUE_TYPE_FORMAT: Dict[GGUFValueType, str] = { + GGUFValueType.UINT8: "B", + GGUFValueType.INT8: "b", + GGUFValueType.UINT16: "H", + GGUFValueType.INT16: "h", + GGUFValueType.UINT32: "I", + GGUFValueType.INT32: "i", + GGUFValueType.FLOAT32: "f", + GGUFValueType.BOOL: "?", + GGUFValueType.UINT64: "Q", + GGUFValueType.INT64: "q", + GGUFValueType.FLOAT64: "d", +} + +GGUF_NUMBER_FORMATS: list[GGUFValueType] = [ + GGUFValueType.UINT8, + GGUFValueType.INT8, + GGUFValueType.UINT16, + GGUFValueType.INT16, + GGUFValueType.UINT32, + GGUFValueType.INT32, + GGUFValueType.FLOAT32, + GGUFValueType.UINT64, + GGUFValueType.INT64, + GGUFValueType.FLOAT64, +] + + +class ParseError(Exception): + pass + + +class GGUFInfoParser: + + def is_model_gguf(model_path: str) -> bool: + try: + with open(model_path, "rb") as model_file: + magic_number = GGUFInfoParser.read_string(model_file, 4) + return magic_number == GGUFModelInfo.MAGIC_NUMBER + except Exception as ex: + console.warning(f" Failed to read model '{model_path}': {ex}") + return False + + @staticmethod + def read_string(model: io.BufferedReader, length: int = -1) -> str: + if length == -1: + type_string = GGUF_VALUE_TYPE_FORMAT[GGUFValueType.UINT64] + length = struct.unpack(type_string, model.read(struct.calcsize(type_string)))[0] + return model.read(length).decode("utf-8") + + @staticmethod + def read_number(model: io.BufferedReader, value_type: GGUFValueType, model_uses_little_endian: bool) -> float: + if value_type not in GGUF_NUMBER_FORMATS: + raise ParseError(f"Value type '{value_type}' not in format dict") + typestring = f"{'<' if model_uses_little_endian else '>'}{GGUF_VALUE_TYPE_FORMAT[value_type]}" + return struct.unpack(typestring, model.read(struct.calcsize(typestring)))[0] + + @staticmethod + def read_bool(model: io.BufferedReader, model_uses_little_endian: bool) -> bool: + typestring = f"{'<' if model_uses_little_endian else '>'}{GGUF_VALUE_TYPE_FORMAT[GGUFValueType.BOOL]}" + value = struct.unpack(typestring, model.read(struct.calcsize(typestring)))[0] + if value not in [0, 1]: + raise ParseError(f"Invalid bool value '{value}'") + return value == 1 + + @staticmethod + def read_value_type(model: io.BufferedReader, model_uses_little_endian: bool) -> GGUFValueType: + value_type = GGUFInfoParser.read_number(model, GGUFValueType.UINT32, model_uses_little_endian) + return GGUFValueType(value_type) + + @staticmethod + def read_value(model: io.BufferedReader, value_type: GGUFValueType, model_uses_little_endian: bool) -> Any: + value = None + if value_type in GGUF_NUMBER_FORMATS: + value = GGUFInfoParser.read_number(model, value_type, model_uses_little_endian) + elif value_type == GGUFValueType.BOOL: + value = GGUFInfoParser.read_bool(model, model_uses_little_endian) + elif value_type == GGUFValueType.STRING: + value = GGUFInfoParser.read_string(model) + elif value_type == GGUFValueType.ARRAY: + array_type = GGUFInfoParser.read_value_type(model, model_uses_little_endian) + array_length = GGUFInfoParser.read_number(model, GGUFValueType.UINT64, model_uses_little_endian) + value = [ + GGUFInfoParser.read_value(model, array_type, model_uses_little_endian) for _ in range(array_length) + ] + + if value is not None: + return value + raise ParseError(f"Unknown type '{value_type}'") + + def parse(model_name: str, model_registry: str, model_path: str, cli_args) -> GGUFModelInfo: + # By default, models are little-endian encoded + is_little_endian = True + + with open(model_path, "rb") as model: + magic_number = GGUFInfoParser.read_string(model, 4) + if magic_number != GGUFModelInfo.MAGIC_NUMBER: + raise ParseError(f"Invalid GGUF magic number '{magic_number}'") + + gguf_version = GGUFInfoParser.read_number(model, GGUFValueType.UINT32, is_little_endian) + # If the read GGUF version is different, then the model could be big-endian encoded + if gguf_version != GGUFModelInfo.VERSION: + is_little_endian = False + gguf_version = GGUFInfoParser.read_number(model, GGUFValueType.UINT32, is_little_endian) + if gguf_version != GGUFModelInfo.VERSION: + raise ParseError(f"Expected GGUF version '{GGUFModelInfo.VERSION}', but got '{gguf_version}'") + + tensor_count = GGUFInfoParser.read_number(model, GGUFValueType.UINT64, is_little_endian) + metadata_kv_count = GGUFInfoParser.read_number(model, GGUFValueType.UINT64, is_little_endian) + + metadata = {} + for _ in range(metadata_kv_count): + key = GGUFInfoParser.read_string(model) + value_type = GGUFInfoParser.read_value_type(model, is_little_endian) + metadata[key] = GGUFInfoParser.read_value(model, value_type, is_little_endian) + + tensors: list[Tensor] = [] + for _ in range(tensor_count): + name = GGUFInfoParser.read_string(model) + n_dimensions = GGUFInfoParser.read_number(model, GGUFValueType.UINT32, is_little_endian) + dimensions: list[int] = [] + for _ in range(n_dimensions): + dimensions.append(GGUFInfoParser.read_number(model, GGUFValueType.UINT64, is_little_endian)) + tensor_type = GGML_TYPE(GGUFInfoParser.read_number(model, GGUFValueType.UINT32, is_little_endian)) + offset = GGUFInfoParser.read_number(model, GGUFValueType.UINT64, is_little_endian) + tensors.append(Tensor(name, n_dimensions, dimensions, tensor_type, offset)) + + return GGUFModelInfo(model_name, model_registry, model_path, metadata, tensors, is_little_endian) diff --git a/ramalama/model.py b/ramalama/model.py index 94889238..587e6d00 100644 --- a/ramalama/model.py +++ b/ramalama/model.py @@ -15,6 +15,8 @@ from ramalama.quadlet import Quadlet from ramalama.kube import Kube from ramalama.common import MNT_DIR, MNT_FILE +from ramalama.model_inspect import GGUFModelInfo, ModelInfoBase +from ramalama.gguf_parser import GGUFInfoParser MODEL_TYPES = ["file", "https", "http", "oci", "huggingface", "hf", "ollama"] @@ -301,6 +303,16 @@ def get_model_path(self, args): return model_path + def get_model_registry(self, args): + model_path = self.get_model_path(args) + if not model_path or args.dryrun: + return "" + + parts = model_path.replace(args.store, "").split(os.sep) + if len(parts) < 3: + return "" + return parts[2] + def build_exec_args_bench(self, args, model_path): exec_model_path = MNT_FILE if args.container else model_path exec_args = ["llama-bench"] @@ -467,6 +479,18 @@ def exists(self, args): def check_valid_model_path(self, relative_target_path, model_path): return os.path.exists(model_path) and os.readlink(model_path) == relative_target_path + def inspect(self, args): + model_name = self.filename + model_path = self.get_model_path(args) + model_registry = self.get_model_registry(args) + + if GGUFInfoParser.is_model_gguf(model_path): + gguf_info: GGUFModelInfo = GGUFInfoParser.parse(model_name, model_registry, model_path, args) + print(gguf_info.serialize(json=args.json, all=args.all)) + return + + print(ModelInfoBase(model_name, model_registry, model_path).serialize(json=args.json)) + def dry_run(args): for arg in args: diff --git a/ramalama/model_inspect.py b/ramalama/model_inspect.py new file mode 100644 index 00000000..41530e71 --- /dev/null +++ b/ramalama/model_inspect.py @@ -0,0 +1,111 @@ +import sys +import shutil +import json + +from typing import Dict, Any +from dataclasses import dataclass + + +def get_terminal_width(): + if sys.stdout.isatty(): + return shutil.get_terminal_size().columns + return 80 + + +def adjust_new_line(line: str) -> str: + filler = "..." + max_width = get_terminal_width() + adjusted_length = max_width - len(filler) + + adjust_for_newline = 1 if line.endswith("\n") else 0 + if len(line) - adjust_for_newline > max_width: + return line[: adjusted_length - adjust_for_newline] + filler + "\n" if adjust_for_newline == 1 else "" + if not line.endswith("\n"): + return line + "\n" + return line + + +@dataclass +class Tensor: + name: str + n_dimensions: int + dimensions: list[int] + type: str + offset: int + + +@dataclass +class ModelInfoBase: + Name: str + Registry: str + Path: str + + def serialize(self, json: bool = False) -> str: + ret = adjust_new_line(f"{self.Name}\n") + ret = ret + adjust_new_line(f" Path: {self.Path}\n") + ret = ret + adjust_new_line(f" Registry: {self.Registry}") + return ret + + def to_json(self) -> str: + return json.dumps(self, sort_keys=True, indent=4) + + +class GGUFModelInfo(ModelInfoBase): + + MAGIC_NUMBER = "GGUF" + VERSION = 3 + + def __init__( + self, + Name: str, + Registry: str, + Path: str, + metadata: Dict[str, Any], + tensors: list[Tensor], + uses_little_endian: bool, + ): + super().__init__(Name, Registry, Path) + + self.Format = GGUFModelInfo.MAGIC_NUMBER + self.Version = GGUFModelInfo.VERSION + self.Metadata: Dict[str, Any] = metadata + self.Tensors: list[Tensor] = tensors + self.LittleEndian: bool = uses_little_endian + + def serialize(self, json: bool = False, all: bool = False) -> str: + if json: + return self.to_json(all) + + ret = super().serialize() + ret = ret + adjust_new_line(f" Format: {GGUFModelInfo.MAGIC_NUMBER}") + ret = ret + adjust_new_line(f" Version: {GGUFModelInfo.VERSION}") + ret = ret + adjust_new_line(f" Endianness: {'little' if self.LittleEndian else 'big'}") + metadata_header = " Metadata: " + if not all: + metadata_header = metadata_header + f"{len(self.Metadata)} entries" + ret = ret + adjust_new_line(metadata_header) + if all: + for key, value in sorted(self.Metadata.items()): + ret = ret + adjust_new_line(f" {key}: {value}") + tensor_header = " Tensors: " + if not all: + tensor_header = tensor_header + f"{len(self.Tensors)} entries" + ret = ret + adjust_new_line(tensor_header) + if all: + i = 0 + for tensor in self.Tensors: + ret = ret + adjust_new_line( + f" {i}: {tensor.name, tensor.type.name, tensor.n_dimensions, tensor.offset}" + ) + i = i + 1 + + return ret + + def to_json(self, all: bool = False) -> str: + if all: + return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4) + + d = {k: v for k, v in self.__dict__.items() if k != "Metadata" and k != "Tensors"} + d["Metadata"] = len(self.Metadata) + d["Tensors"] = len(self.Tensors) + return json.dumps(d, sort_keys=True, indent=4) From 183597cdcc44bc84b54c5dbab087b2df6a93db0a Mon Sep 17 00:00:00 2001 From: Michael Engel Date: Mon, 10 Feb 2025 09:56:25 +0100 Subject: [PATCH 2/4] Fix: Set directory and filename in Model base class The directory and filename of a model is determined by the respective model implementation, e.g. Ollama or Huggingface. If, however, these two fields are not defined in the model base class, then accessing them for a specific model instance might fail since these do not exist. Signed-off-by: Michael Engel --- ramalama/huggingface.py | 3 --- ramalama/model.py | 3 +++ ramalama/url.py | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/ramalama/huggingface.py b/ramalama/huggingface.py index 675c76f4..cdc21348 100644 --- a/ramalama/huggingface.py +++ b/ramalama/huggingface.py @@ -38,9 +38,6 @@ def __init__(self, model): model = model.removeprefix("hf.co/") super().__init__(model) self.type = "huggingface" - split = self.model.rsplit("/", 1) - self.directory = split[0] if len(split) > 1 else "" - self.filename = split[1] if len(split) > 1 else split[0] self.hf_cli_available = is_huggingface_cli_available() def login(self, args): diff --git a/ramalama/model.py b/ramalama/model.py index 587e6d00..d7dda2d0 100644 --- a/ramalama/model.py +++ b/ramalama/model.py @@ -43,6 +43,9 @@ class Model: def __init__(self, model): self.model = model + split = self.model.rsplit("/", 1) + self.directory = split[0] if len(split) > 1 else "" + self.filename = split[1] if len(split) > 1 else split[0] def login(self, args): raise NotImplementedError(f"ramalama login for {self.type} not implemented") diff --git a/ramalama/url.py b/ramalama/url.py index 8e11dfe4..992dc074 100644 --- a/ramalama/url.py +++ b/ramalama/url.py @@ -15,7 +15,6 @@ def __init__(self, model): super().__init__(model) split = self.model.rsplit("/", 1) self.directory = split[0].removeprefix("/") if len(split) > 1 else "" - self.filename = split[1] if len(split) > 1 else split[0] def pull(self, args): model_path = self.model_path(args) From 36bcc94ec3e47909b7097e6058a15ff1031baf57 Mon Sep 17 00:00:00 2001 From: Michael Engel Date: Mon, 10 Feb 2025 13:56:53 +0100 Subject: [PATCH 3/4] Added CI step to check installed python files Signed-off-by: Michael Engel --- .github/workflows/install_ramalama.yml | 9 +++++++-- install.sh | 22 ++++++++++++++-------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/.github/workflows/install_ramalama.yml b/.github/workflows/install_ramalama.yml index 8c5d5f62..905a3c43 100644 --- a/.github/workflows/install_ramalama.yml +++ b/.github/workflows/install_ramalama.yml @@ -38,8 +38,13 @@ jobs: run: | chmod +x install.sh sudo ./install.sh -l - - - name: RamaLama info + + - name: Verify RamaLama installation + run: | + install_dir=$(sudo ./install.sh get_installation_dir)/ramalama/ + ls -f -A ramalama/ | while read file; do ls -f -A $install_dir | grep $file; done + + - name: Ramalama info run: | ramalama info diff --git a/install.sh b/install.sh index b39ad360..f8f1a3b8 100755 --- a/install.sh +++ b/install.sh @@ -100,6 +100,16 @@ check_platform() { return 0 } +get_installation_dir() { + local sharedirs=("/opt/homebrew/share" "/usr/local/share" "/usr/share") + for dir in "${sharedirs[@]}"; do + if [ -d "$dir" ]; then + echo "$dir/ramalama" + break + fi + done +} + setup_ramalama() { local binfile="ramalama" local from_file="${binfile}" @@ -114,13 +124,7 @@ setup_ramalama() { download "$url" "$to_file" local ramalama_bin="${1}/${binfile}" local sharedirs=("/opt/homebrew/share" "/usr/local/share" "/usr/share") - local syspath - for dir in "${sharedirs[@]}"; do - if [ -d "$dir" ]; then - syspath="$dir/ramalama" - break - fi - done + local syspath=$(get_installation_dir) $sudo install -m755 -d "$syspath" syspath="$syspath/ramalama" @@ -154,6 +158,9 @@ main() { local_install="true" shift ;; + get_*) + get_installation_dir + return;; *) break esac @@ -184,4 +191,3 @@ main() { } main "$@" - From 7f8a046cd4d668086cd50490957378d71ac5a099 Mon Sep 17 00:00:00 2001 From: Michael Engel Date: Mon, 10 Feb 2025 16:23:50 +0100 Subject: [PATCH 4/4] Added system tests for new inspect command Signed-off-by: Michael Engel --- test/system/100-inspect.bats | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 test/system/100-inspect.bats diff --git a/test/system/100-inspect.bats b/test/system/100-inspect.bats new file mode 100644 index 00000000..66105067 --- /dev/null +++ b/test/system/100-inspect.bats @@ -0,0 +1,35 @@ +#!/usr/bin/env bats + +load helpers +load helpers.registry +load setup_suite + +# bats test_tags=distro-integration +@test "ramalama inspect GGUF model" { + run_ramalama inspect tiny + + is "${lines[0]}" "tinyllama" "model name" + is "${lines[1]}" " Path: .*models/ollama/tinyllama:latest" "model path" + is "${lines[2]}" " Registry: ollama" "model registry" + is "${lines[3]}" " Format: GGUF" "model format" + is "${lines[4]}" " Version: 3" "model format version" + is "${lines[5]}" " Endianness: little" "model endianness" + is "${lines[6]}" " Metadata: 23 entries" "# of metadata entries" + is "${lines[7]}" " Tensors: 201 entries" "# of tensor entries" +} + +# bats test_tags=distro-integration +@test "ramalama inspect GGUF model with --all" { + run_ramalama inspect --all tiny + + is "${lines[0]}" "tinyllama" "model name" + is "${lines[1]}" " Path: .*models/ollama/tinyllama:latest" "model path" + is "${lines[2]}" " Registry: ollama" "model registry" + is "${lines[3]}" " Format: GGUF" "model format" + is "${lines[4]}" " Version: 3" "model format version" + is "${lines[5]}" " Endianness: little" "model endianness" + is "${lines[6]}" " Metadata: " "metadata header" + is "${lines[7]}" " general.architecture: llama" "metadata general.architecture" +} + +# vim: filetype=sh