diff --git a/.github/workflows/install_ramalama.yml b/.github/workflows/install_ramalama.yml index 8c5d5f62..905a3c43 100644 --- a/.github/workflows/install_ramalama.yml +++ b/.github/workflows/install_ramalama.yml @@ -38,8 +38,13 @@ jobs: run: | chmod +x install.sh sudo ./install.sh -l - - - name: RamaLama info + + - name: Verify RamaLama installation + run: | + install_dir=$(sudo ./install.sh get_installation_dir)/ramalama/ + ls -f -A ramalama/ | while read file; do ls -f -A $install_dir | grep $file; done + + - name: Ramalama info run: | ramalama info diff --git a/docs/ramalama-inspect.1.md b/docs/ramalama-inspect.1.md new file mode 100644 index 00000000..55d1089a --- /dev/null +++ b/docs/ramalama-inspect.1.md @@ -0,0 +1,77 @@ +% ramalama-inspect 1 + +## NAME +ramalama\-inspect - inspect the specified AI Model + +## SYNOPSIS +**ramalama inspect** [*options*] *model* + +## DESCRIPTION +Inspect the specified AI Model about additional information +like the repository, its metadata and tensor information. + +## OPTIONS + +#### **--all** +Print all available information about the AI Model. +By default, only a basic subset is printed. + +#### **--help**, **-h** +Print usage message + +#### **--json** +Print the AI Model information in json format. + +## EXAMPLES + +Inspect the smollm:135m model for basic information +``` +$ ramalama inspect smollm:135m +smollm:135m + Path: /var/lib/ramalama/models/ollama/smollm:135m + Registry: ollama + Format: GGUF + Version: 3 + Endianness: little + Metadata: 39 entries + Tensors: 272 entries +``` + +Inspect the smollm:135m model for all information in json format +``` +$ ramalama inspect smollm:135m --all --json +{ + "Name": "smollm:135m", + "Path": "/home/mengel/.local/share/ramalama/models/ollama/smollm:135m", + "Registry": "ollama", + "Format": "GGUF", + "Version": 3, + "LittleEndian": true, + "Metadata": { + "general.architecture": "llama", + "general.base_model.0.name": "SmolLM 135M", + "general.base_model.0.organization": "HuggingFaceTB", + "general.base_model.0.repo_url": "https://huggingface.co/HuggingFaceTB/SmolLM-135M", + ... + }, + "Tensors": [ + { + "dimensions": [ + 576, + 49152 + ], + "n_dimensions": 2, + "name": "token_embd.weight", + "offset": 0, + "type": 8 + }, + ... + ] +} +``` + +## SEE ALSO +**[ramalama(1)](ramalama.1.md)** + +## HISTORY +Feb 2025, Originally compiled by Michael Engel diff --git a/docs/ramalama.1.md b/docs/ramalama.1.md index e1d0bd70..25679d04 100644 --- a/docs/ramalama.1.md +++ b/docs/ramalama.1.md @@ -141,6 +141,7 @@ show RamaLama version | [ramalama-bench(1)](ramalama-bench.1.md) | benchmark specified AI Model | | [ramalama-convert(1)](ramalama-convert.1.md) | convert AI Models from local storage to OCI Image | | [ramalama-info(1)](ramalama-info.1.md) | Display RamaLama configuration information | +| [ramalama-inspect(1)](ramalama-inspect.1.md) | inspect the specified AI Model | | [ramalama-list(1)](ramalama-list.1.md) | list all downloaded AI Models | | [ramalama-login(1)](ramalama-login.1.md) | login to remote registry | | [ramalama-logout(1)](ramalama-logout.1.md) | logout from remote registry | diff --git a/install.sh b/install.sh index 0642c7de..f8f1a3b8 100755 --- a/install.sh +++ b/install.sh @@ -100,6 +100,16 @@ check_platform() { return 0 } +get_installation_dir() { + local sharedirs=("/opt/homebrew/share" "/usr/local/share" "/usr/share") + for dir in "${sharedirs[@]}"; do + if [ -d "$dir" ]; then + echo "$dir/ramalama" + break + fi + done +} + setup_ramalama() { local binfile="ramalama" local from_file="${binfile}" @@ -114,23 +124,17 @@ setup_ramalama() { download "$url" "$to_file" local ramalama_bin="${1}/${binfile}" local sharedirs=("/opt/homebrew/share" "/usr/local/share" "/usr/share") - local syspath - for dir in "${sharedirs[@]}"; do - if [ -d "$dir" ]; then - syspath="$dir/ramalama" - break - fi - done + local syspath=$(get_installation_dir) $sudo install -m755 -d "$syspath" syspath="$syspath/ramalama" $sudo install -m755 -d "$syspath" $sudo install -m755 "$to_file" "$ramalama_bin" - local python_files=("cli.py" "huggingface.py" "model.py" "ollama.py" \ - "common.py" "__init__.py" "quadlet.py" "kube.py" \ - "oci.py" "version.py" "shortnames.py" "toml_parser.py" \ - "file.py" "http_client.py" "url.py" "annotations.py" \ - "gpu_detector.py" "console.py") + local python_files=("cli.py" "gguf_parser.py" "huggingface.py" "model.py" \ + "model_inspect.py" "ollama.py" "common.py" "__init__.py" \ + "quadlet.py" "kube.py" "oci.py" "version.py" "shortnames.py" \ + "toml_parser.py" "file.py" "http_client.py" "url.py" \ + "annotations.py" "gpu_detector.py" "console.py") for i in "${python_files[@]}"; do if $local_install; then url="ramalama/${i}" @@ -154,6 +158,9 @@ main() { local_install="true" shift ;; + get_*) + get_installation_dir + return;; *) break esac @@ -184,4 +191,3 @@ main() { } main "$@" - diff --git a/ramalama/cli.py b/ramalama/cli.py index e551a5a3..a4f127c6 100644 --- a/ramalama/cli.py +++ b/ramalama/cli.py @@ -243,6 +243,7 @@ def configure_subcommands(parser): containers_parser(subparsers) convert_parser(subparsers) info_parser(subparsers) + inspect_parser(subparsers) list_parser(subparsers) login_parser(subparsers) logout_parser(subparsers) @@ -994,3 +995,16 @@ def perplexity_parser(subparsers): def perplexity_cli(args): model = New(args.MODEL, args) model.perplexity(args) + + +def inspect_parser(subparsers): + parser = subparsers.add_parser("inspect", help="inspect an AI Model") + parser.add_argument("MODEL") # positional argument + parser.add_argument("--all", dest="all", action="store_true", help="display all available information of AI Model") + parser.add_argument("--json", dest="json", action="store_true", help="display AI Model information in JSON format") + parser.set_defaults(func=inspect_cli) + + +def inspect_cli(args): + model = New(args.MODEL, args) + model.inspect(args) diff --git a/ramalama/common.py b/ramalama/common.py index a5732858..048b9e2b 100644 --- a/ramalama/common.py +++ b/ramalama/common.py @@ -17,7 +17,6 @@ from ramalama.http_client import HttpClient - logging.basicConfig(level=logging.WARNING, format="%(asctime)s - %(levelname)s - %(message)s") MNT_DIR = "/mnt/models" diff --git a/ramalama/gguf_parser.py b/ramalama/gguf_parser.py new file mode 100644 index 00000000..8c89db65 --- /dev/null +++ b/ramalama/gguf_parser.py @@ -0,0 +1,201 @@ +import io +import struct + +from enum import IntEnum +from typing import Dict, Any + +import ramalama.console as console +from ramalama.model_inspect import GGUFModelInfo, Tensor + + +# Based on ggml_type in +# https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#file-structure +class GGML_TYPE(IntEnum): + GGML_TYPE_F32 = (0,) + GGML_TYPE_F16 = (1,) + GGML_TYPE_Q4_0 = (2,) + GGML_TYPE_Q4_1 = (3,) + # GGML_TYPE_Q4_2 = 4, support has been removed + # GGML_TYPE_Q4_3 = 5, support has been removed + GGML_TYPE_Q5_0 = (6,) + GGML_TYPE_Q5_1 = (7,) + GGML_TYPE_Q8_0 = (8,) + GGML_TYPE_Q8_1 = (9,) + GGML_TYPE_Q2_K = (10,) + GGML_TYPE_Q3_K = (11,) + GGML_TYPE_Q4_K = (12,) + GGML_TYPE_Q5_K = (13,) + GGML_TYPE_Q6_K = (14,) + GGML_TYPE_Q8_K = (15,) + GGML_TYPE_IQ2_XXS = (16,) + GGML_TYPE_IQ2_XS = (17,) + GGML_TYPE_IQ3_XXS = (18,) + GGML_TYPE_IQ1_S = (19,) + GGML_TYPE_IQ4_NL = (20,) + GGML_TYPE_IQ3_S = (21,) + GGML_TYPE_IQ2_S = (22,) + GGML_TYPE_IQ4_XS = (23,) + GGML_TYPE_I8 = (24,) + GGML_TYPE_I16 = (25,) + GGML_TYPE_I32 = (26,) + GGML_TYPE_I64 = (27,) + GGML_TYPE_F64 = (28,) + GGML_TYPE_IQ1_M = (29,) + + +# Based on gguf_metadata_value_type in +# https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#file-structure +class GGUFValueType(IntEnum): + UINT8 = (0,) # 8-bit unsigned integer + INT8 = (1,) # 8-bit signed integer + UINT16 = (2,) # 16-bit unsigned little-endian integer + INT16 = (3,) # 16-bit signed little-endian integer + UINT32 = (4,) # 32-bit unsigned little-endian integer + INT32 = (5,) # 32-bit signed little-endian integer + FLOAT32 = (6,) # 32-bit IEEE754 floating point number + + # boolean of 1-byte value where 0 is false and 1 is true. + # Anything else is invalid, and should be treated as either the model being invalid or the reader being buggy. + BOOL = (7,) + + STRING = (8,) # UTF-8 non-null-terminated string, with length prepended. + + # Array of other values, with the length and type prepended. + # Arrays can be nested, and the length of the array is the number of elements in the array, not the number of bytes. + ARRAY = (9,) + + UINT64 = (10,) # 64-bit unsigned little-endian integer + INT64 = (11,) # 64-bit signed little-endian integer + FLOAT64 = (12,) # 64-bit IEEE754 floating point number + + +# Mapping GGUFs value types to python struct librarys format characters +# see https://docs.python.org/3/library/struct.html#format-characters +GGUF_VALUE_TYPE_FORMAT: Dict[GGUFValueType, str] = { + GGUFValueType.UINT8: "B", + GGUFValueType.INT8: "b", + GGUFValueType.UINT16: "H", + GGUFValueType.INT16: "h", + GGUFValueType.UINT32: "I", + GGUFValueType.INT32: "i", + GGUFValueType.FLOAT32: "f", + GGUFValueType.BOOL: "?", + GGUFValueType.UINT64: "Q", + GGUFValueType.INT64: "q", + GGUFValueType.FLOAT64: "d", +} + +GGUF_NUMBER_FORMATS: list[GGUFValueType] = [ + GGUFValueType.UINT8, + GGUFValueType.INT8, + GGUFValueType.UINT16, + GGUFValueType.INT16, + GGUFValueType.UINT32, + GGUFValueType.INT32, + GGUFValueType.FLOAT32, + GGUFValueType.UINT64, + GGUFValueType.INT64, + GGUFValueType.FLOAT64, +] + + +class ParseError(Exception): + pass + + +class GGUFInfoParser: + + def is_model_gguf(model_path: str) -> bool: + try: + with open(model_path, "rb") as model_file: + magic_number = GGUFInfoParser.read_string(model_file, 4) + return magic_number == GGUFModelInfo.MAGIC_NUMBER + except Exception as ex: + console.warning(f" Failed to read model '{model_path}': {ex}") + return False + + @staticmethod + def read_string(model: io.BufferedReader, length: int = -1) -> str: + if length == -1: + type_string = GGUF_VALUE_TYPE_FORMAT[GGUFValueType.UINT64] + length = struct.unpack(type_string, model.read(struct.calcsize(type_string)))[0] + return model.read(length).decode("utf-8") + + @staticmethod + def read_number(model: io.BufferedReader, value_type: GGUFValueType, model_uses_little_endian: bool) -> float: + if value_type not in GGUF_NUMBER_FORMATS: + raise ParseError(f"Value type '{value_type}' not in format dict") + typestring = f"{'<' if model_uses_little_endian else '>'}{GGUF_VALUE_TYPE_FORMAT[value_type]}" + return struct.unpack(typestring, model.read(struct.calcsize(typestring)))[0] + + @staticmethod + def read_bool(model: io.BufferedReader, model_uses_little_endian: bool) -> bool: + typestring = f"{'<' if model_uses_little_endian else '>'}{GGUF_VALUE_TYPE_FORMAT[GGUFValueType.BOOL]}" + value = struct.unpack(typestring, model.read(struct.calcsize(typestring)))[0] + if value not in [0, 1]: + raise ParseError(f"Invalid bool value '{value}'") + return value == 1 + + @staticmethod + def read_value_type(model: io.BufferedReader, model_uses_little_endian: bool) -> GGUFValueType: + value_type = GGUFInfoParser.read_number(model, GGUFValueType.UINT32, model_uses_little_endian) + return GGUFValueType(value_type) + + @staticmethod + def read_value(model: io.BufferedReader, value_type: GGUFValueType, model_uses_little_endian: bool) -> Any: + value = None + if value_type in GGUF_NUMBER_FORMATS: + value = GGUFInfoParser.read_number(model, value_type, model_uses_little_endian) + elif value_type == GGUFValueType.BOOL: + value = GGUFInfoParser.read_bool(model, model_uses_little_endian) + elif value_type == GGUFValueType.STRING: + value = GGUFInfoParser.read_string(model) + elif value_type == GGUFValueType.ARRAY: + array_type = GGUFInfoParser.read_value_type(model, model_uses_little_endian) + array_length = GGUFInfoParser.read_number(model, GGUFValueType.UINT64, model_uses_little_endian) + value = [ + GGUFInfoParser.read_value(model, array_type, model_uses_little_endian) for _ in range(array_length) + ] + + if value is not None: + return value + raise ParseError(f"Unknown type '{value_type}'") + + def parse(model_name: str, model_registry: str, model_path: str, cli_args) -> GGUFModelInfo: + # By default, models are little-endian encoded + is_little_endian = True + + with open(model_path, "rb") as model: + magic_number = GGUFInfoParser.read_string(model, 4) + if magic_number != GGUFModelInfo.MAGIC_NUMBER: + raise ParseError(f"Invalid GGUF magic number '{magic_number}'") + + gguf_version = GGUFInfoParser.read_number(model, GGUFValueType.UINT32, is_little_endian) + # If the read GGUF version is different, then the model could be big-endian encoded + if gguf_version != GGUFModelInfo.VERSION: + is_little_endian = False + gguf_version = GGUFInfoParser.read_number(model, GGUFValueType.UINT32, is_little_endian) + if gguf_version != GGUFModelInfo.VERSION: + raise ParseError(f"Expected GGUF version '{GGUFModelInfo.VERSION}', but got '{gguf_version}'") + + tensor_count = GGUFInfoParser.read_number(model, GGUFValueType.UINT64, is_little_endian) + metadata_kv_count = GGUFInfoParser.read_number(model, GGUFValueType.UINT64, is_little_endian) + + metadata = {} + for _ in range(metadata_kv_count): + key = GGUFInfoParser.read_string(model) + value_type = GGUFInfoParser.read_value_type(model, is_little_endian) + metadata[key] = GGUFInfoParser.read_value(model, value_type, is_little_endian) + + tensors: list[Tensor] = [] + for _ in range(tensor_count): + name = GGUFInfoParser.read_string(model) + n_dimensions = GGUFInfoParser.read_number(model, GGUFValueType.UINT32, is_little_endian) + dimensions: list[int] = [] + for _ in range(n_dimensions): + dimensions.append(GGUFInfoParser.read_number(model, GGUFValueType.UINT64, is_little_endian)) + tensor_type = GGML_TYPE(GGUFInfoParser.read_number(model, GGUFValueType.UINT32, is_little_endian)) + offset = GGUFInfoParser.read_number(model, GGUFValueType.UINT64, is_little_endian) + tensors.append(Tensor(name, n_dimensions, dimensions, tensor_type, offset)) + + return GGUFModelInfo(model_name, model_registry, model_path, metadata, tensors, is_little_endian) diff --git a/ramalama/huggingface.py b/ramalama/huggingface.py index 675c76f4..cdc21348 100644 --- a/ramalama/huggingface.py +++ b/ramalama/huggingface.py @@ -38,9 +38,6 @@ def __init__(self, model): model = model.removeprefix("hf.co/") super().__init__(model) self.type = "huggingface" - split = self.model.rsplit("/", 1) - self.directory = split[0] if len(split) > 1 else "" - self.filename = split[1] if len(split) > 1 else split[0] self.hf_cli_available = is_huggingface_cli_available() def login(self, args): diff --git a/ramalama/model.py b/ramalama/model.py index 94889238..d7dda2d0 100644 --- a/ramalama/model.py +++ b/ramalama/model.py @@ -15,6 +15,8 @@ from ramalama.quadlet import Quadlet from ramalama.kube import Kube from ramalama.common import MNT_DIR, MNT_FILE +from ramalama.model_inspect import GGUFModelInfo, ModelInfoBase +from ramalama.gguf_parser import GGUFInfoParser MODEL_TYPES = ["file", "https", "http", "oci", "huggingface", "hf", "ollama"] @@ -41,6 +43,9 @@ class Model: def __init__(self, model): self.model = model + split = self.model.rsplit("/", 1) + self.directory = split[0] if len(split) > 1 else "" + self.filename = split[1] if len(split) > 1 else split[0] def login(self, args): raise NotImplementedError(f"ramalama login for {self.type} not implemented") @@ -301,6 +306,16 @@ def get_model_path(self, args): return model_path + def get_model_registry(self, args): + model_path = self.get_model_path(args) + if not model_path or args.dryrun: + return "" + + parts = model_path.replace(args.store, "").split(os.sep) + if len(parts) < 3: + return "" + return parts[2] + def build_exec_args_bench(self, args, model_path): exec_model_path = MNT_FILE if args.container else model_path exec_args = ["llama-bench"] @@ -467,6 +482,18 @@ def exists(self, args): def check_valid_model_path(self, relative_target_path, model_path): return os.path.exists(model_path) and os.readlink(model_path) == relative_target_path + def inspect(self, args): + model_name = self.filename + model_path = self.get_model_path(args) + model_registry = self.get_model_registry(args) + + if GGUFInfoParser.is_model_gguf(model_path): + gguf_info: GGUFModelInfo = GGUFInfoParser.parse(model_name, model_registry, model_path, args) + print(gguf_info.serialize(json=args.json, all=args.all)) + return + + print(ModelInfoBase(model_name, model_registry, model_path).serialize(json=args.json)) + def dry_run(args): for arg in args: diff --git a/ramalama/model_inspect.py b/ramalama/model_inspect.py new file mode 100644 index 00000000..41530e71 --- /dev/null +++ b/ramalama/model_inspect.py @@ -0,0 +1,111 @@ +import sys +import shutil +import json + +from typing import Dict, Any +from dataclasses import dataclass + + +def get_terminal_width(): + if sys.stdout.isatty(): + return shutil.get_terminal_size().columns + return 80 + + +def adjust_new_line(line: str) -> str: + filler = "..." + max_width = get_terminal_width() + adjusted_length = max_width - len(filler) + + adjust_for_newline = 1 if line.endswith("\n") else 0 + if len(line) - adjust_for_newline > max_width: + return line[: adjusted_length - adjust_for_newline] + filler + "\n" if adjust_for_newline == 1 else "" + if not line.endswith("\n"): + return line + "\n" + return line + + +@dataclass +class Tensor: + name: str + n_dimensions: int + dimensions: list[int] + type: str + offset: int + + +@dataclass +class ModelInfoBase: + Name: str + Registry: str + Path: str + + def serialize(self, json: bool = False) -> str: + ret = adjust_new_line(f"{self.Name}\n") + ret = ret + adjust_new_line(f" Path: {self.Path}\n") + ret = ret + adjust_new_line(f" Registry: {self.Registry}") + return ret + + def to_json(self) -> str: + return json.dumps(self, sort_keys=True, indent=4) + + +class GGUFModelInfo(ModelInfoBase): + + MAGIC_NUMBER = "GGUF" + VERSION = 3 + + def __init__( + self, + Name: str, + Registry: str, + Path: str, + metadata: Dict[str, Any], + tensors: list[Tensor], + uses_little_endian: bool, + ): + super().__init__(Name, Registry, Path) + + self.Format = GGUFModelInfo.MAGIC_NUMBER + self.Version = GGUFModelInfo.VERSION + self.Metadata: Dict[str, Any] = metadata + self.Tensors: list[Tensor] = tensors + self.LittleEndian: bool = uses_little_endian + + def serialize(self, json: bool = False, all: bool = False) -> str: + if json: + return self.to_json(all) + + ret = super().serialize() + ret = ret + adjust_new_line(f" Format: {GGUFModelInfo.MAGIC_NUMBER}") + ret = ret + adjust_new_line(f" Version: {GGUFModelInfo.VERSION}") + ret = ret + adjust_new_line(f" Endianness: {'little' if self.LittleEndian else 'big'}") + metadata_header = " Metadata: " + if not all: + metadata_header = metadata_header + f"{len(self.Metadata)} entries" + ret = ret + adjust_new_line(metadata_header) + if all: + for key, value in sorted(self.Metadata.items()): + ret = ret + adjust_new_line(f" {key}: {value}") + tensor_header = " Tensors: " + if not all: + tensor_header = tensor_header + f"{len(self.Tensors)} entries" + ret = ret + adjust_new_line(tensor_header) + if all: + i = 0 + for tensor in self.Tensors: + ret = ret + adjust_new_line( + f" {i}: {tensor.name, tensor.type.name, tensor.n_dimensions, tensor.offset}" + ) + i = i + 1 + + return ret + + def to_json(self, all: bool = False) -> str: + if all: + return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4) + + d = {k: v for k, v in self.__dict__.items() if k != "Metadata" and k != "Tensors"} + d["Metadata"] = len(self.Metadata) + d["Tensors"] = len(self.Tensors) + return json.dumps(d, sort_keys=True, indent=4) diff --git a/ramalama/url.py b/ramalama/url.py index 8e11dfe4..992dc074 100644 --- a/ramalama/url.py +++ b/ramalama/url.py @@ -15,7 +15,6 @@ def __init__(self, model): super().__init__(model) split = self.model.rsplit("/", 1) self.directory = split[0].removeprefix("/") if len(split) > 1 else "" - self.filename = split[1] if len(split) > 1 else split[0] def pull(self, args): model_path = self.model_path(args) diff --git a/test/system/100-inspect.bats b/test/system/100-inspect.bats new file mode 100644 index 00000000..66105067 --- /dev/null +++ b/test/system/100-inspect.bats @@ -0,0 +1,35 @@ +#!/usr/bin/env bats + +load helpers +load helpers.registry +load setup_suite + +# bats test_tags=distro-integration +@test "ramalama inspect GGUF model" { + run_ramalama inspect tiny + + is "${lines[0]}" "tinyllama" "model name" + is "${lines[1]}" " Path: .*models/ollama/tinyllama:latest" "model path" + is "${lines[2]}" " Registry: ollama" "model registry" + is "${lines[3]}" " Format: GGUF" "model format" + is "${lines[4]}" " Version: 3" "model format version" + is "${lines[5]}" " Endianness: little" "model endianness" + is "${lines[6]}" " Metadata: 23 entries" "# of metadata entries" + is "${lines[7]}" " Tensors: 201 entries" "# of tensor entries" +} + +# bats test_tags=distro-integration +@test "ramalama inspect GGUF model with --all" { + run_ramalama inspect --all tiny + + is "${lines[0]}" "tinyllama" "model name" + is "${lines[1]}" " Path: .*models/ollama/tinyllama:latest" "model path" + is "${lines[2]}" " Registry: ollama" "model registry" + is "${lines[3]}" " Format: GGUF" "model format" + is "${lines[4]}" " Version: 3" "model format version" + is "${lines[5]}" " Endianness: little" "model endianness" + is "${lines[6]}" " Metadata: " "metadata header" + is "${lines[7]}" " general.architecture: llama" "metadata general.architecture" +} + +# vim: filetype=sh