Merge pull request #776 from engelmi/add-model-info-cli

Add model inspect cli
containers · Feb 10, 2025 · 0d841ec · 0d841ec
2 parents c5a15b0 + 7f8a046
commit 0d841ec
Show file tree

Hide file tree

Showing 12 changed files with 492 additions and 20 deletions.
diff --git a/.github/workflows/install_ramalama.yml b/.github/workflows/install_ramalama.yml
@@ -38,8 +38,13 @@ jobs:
         run: |
           chmod +x install.sh
           sudo ./install.sh -l
-
-      - name: RamaLama info
+        
+      - name: Verify RamaLama installation
+        run: |
+          install_dir=$(sudo ./install.sh get_installation_dir)/ramalama/
+          ls -f -A ramalama/ | while read file; do ls -f -A $install_dir | grep $file; done 
+      
+      - name: Ramalama info
         run: |
           ramalama info
 

diff --git a/docs/ramalama-inspect.1.md b/docs/ramalama-inspect.1.md
@@ -0,0 +1,77 @@
+% ramalama-inspect 1
+
+## NAME
+ramalama\-inspect - inspect the specified AI Model
+
+## SYNOPSIS
+**ramalama inspect** [*options*] *model*
+
+## DESCRIPTION
+Inspect the specified AI Model about additional information
+like the repository, its metadata and tensor information.
+
+## OPTIONS
+
+#### **--all**
+Print all available information about the AI Model.
+By default, only a basic subset is printed.
+
+#### **--help**, **-h**
+Print usage message
+
+#### **--json**
+Print the AI Model information in json format.
+
+## EXAMPLES
+
+Inspect the smollm:135m model for basic information
+```
+$ ramalama inspect smollm:135m
+smollm:135m
+   Path: /var/lib/ramalama/models/ollama/smollm:135m
+   Registry: ollama
+   Format: GGUF
+   Version: 3
+   Endianness: little
+   Metadata: 39 entries
+   Tensors: 272 entries
+```
+
+Inspect the smollm:135m model for all information in json format
+```
+$ ramalama inspect smollm:135m --all --json
+{
+    "Name": "smollm:135m",
+    "Path": "/home/mengel/.local/share/ramalama/models/ollama/smollm:135m",
+    "Registry": "ollama",
+    "Format": "GGUF",
+    "Version": 3,
+    "LittleEndian": true,
+    "Metadata": {
+        "general.architecture": "llama",
+        "general.base_model.0.name": "SmolLM 135M",
+        "general.base_model.0.organization": "HuggingFaceTB",
+        "general.base_model.0.repo_url": "https://huggingface.co/HuggingFaceTB/SmolLM-135M",
+        ...
+    },
+    "Tensors": [
+        {
+            "dimensions": [
+                576,
+                49152
+            ],
+            "n_dimensions": 2,
+            "name": "token_embd.weight",
+            "offset": 0,
+            "type": 8
+        },
+        ...
+    ]
+}
+```
+
+## SEE ALSO
+**[ramalama(1)](ramalama.1.md)**
+
+## HISTORY
+Feb 2025, Originally compiled by Michael Engel <[email protected]>
diff --git a/docs/ramalama.1.md b/docs/ramalama.1.md
@@ -141,6 +141,7 @@ show RamaLama version
 | [ramalama-bench(1)](ramalama-bench.1.md)          | benchmark specified AI Model                               |
 | [ramalama-convert(1)](ramalama-convert.1.md)      | convert AI Models from local storage to OCI Image          |
 | [ramalama-info(1)](ramalama-info.1.md)            | Display RamaLama configuration information                 |
+| [ramalama-inspect(1)](ramalama-inspect.1.md)      | inspect the specified AI Model                             |
 | [ramalama-list(1)](ramalama-list.1.md)            | list all downloaded AI Models                              |
 | [ramalama-login(1)](ramalama-login.1.md)          | login to remote registry                                   |
 | [ramalama-logout(1)](ramalama-logout.1.md)        | logout from remote registry                                |

diff --git a/install.sh b/install.sh
@@ -100,6 +100,16 @@ check_platform() {
   return 0
 }
 
+get_installation_dir() {
+  local sharedirs=("/opt/homebrew/share" "/usr/local/share" "/usr/share")
+  for dir in "${sharedirs[@]}"; do
+    if [ -d "$dir" ]; then
+      echo "$dir/ramalama"
+      break
+    fi
+  done
+}
+
 setup_ramalama() {
   local binfile="ramalama"
   local from_file="${binfile}"
@@ -114,23 +124,17 @@ setup_ramalama() {
   download "$url" "$to_file"
   local ramalama_bin="${1}/${binfile}"
   local sharedirs=("/opt/homebrew/share" "/usr/local/share" "/usr/share")
-  local syspath
-  for dir in "${sharedirs[@]}"; do
-    if [ -d "$dir" ]; then
-      syspath="$dir/ramalama"
-      break
-    fi
-  done
+  local syspath=$(get_installation_dir)
 
   $sudo install -m755 -d "$syspath"
   syspath="$syspath/ramalama"
   $sudo install -m755 -d "$syspath"
   $sudo install -m755 "$to_file" "$ramalama_bin"
-  local python_files=("cli.py" "huggingface.py" "model.py" "ollama.py" \
-                      "common.py" "__init__.py" "quadlet.py" "kube.py" \
-                      "oci.py" "version.py" "shortnames.py" "toml_parser.py" \
-                      "file.py" "http_client.py" "url.py" "annotations.py" \
-                      "gpu_detector.py" "console.py")
+  local python_files=("cli.py" "gguf_parser.py" "huggingface.py" "model.py" \
+                      "model_inspect.py" "ollama.py" "common.py" "__init__.py" \
+                      "quadlet.py" "kube.py" "oci.py" "version.py" "shortnames.py" \
+                      "toml_parser.py" "file.py" "http_client.py" "url.py" \
+                      "annotations.py" "gpu_detector.py" "console.py")
   for i in "${python_files[@]}"; do
     if $local_install; then
       url="ramalama/${i}"
@@ -154,6 +158,9 @@ main() {
         local_install="true"
         shift
         ;;
+      get_*)
+        get_installation_dir
+        return;;
       *)
         break
     esac
@@ -184,4 +191,3 @@ main() {
 }
 
 main "$@"
-
diff --git a/ramalama/cli.py b/ramalama/cli.py
@@ -243,6 +243,7 @@ def configure_subcommands(parser):
     containers_parser(subparsers)
     convert_parser(subparsers)
     info_parser(subparsers)
+    inspect_parser(subparsers)
     list_parser(subparsers)
     login_parser(subparsers)
     logout_parser(subparsers)
@@ -994,3 +995,16 @@ def perplexity_parser(subparsers):
 def perplexity_cli(args):
     model = New(args.MODEL, args)
     model.perplexity(args)
+
+
+def inspect_parser(subparsers):
+    parser = subparsers.add_parser("inspect", help="inspect an AI Model")
+    parser.add_argument("MODEL")  # positional argument
+    parser.add_argument("--all", dest="all", action="store_true", help="display all available information of AI Model")
+    parser.add_argument("--json", dest="json", action="store_true", help="display AI Model information in JSON format")
+    parser.set_defaults(func=inspect_cli)
+
+
+def inspect_cli(args):
+    model = New(args.MODEL, args)
+    model.inspect(args)
diff --git a/ramalama/common.py b/ramalama/common.py
@@ -17,7 +17,6 @@
 
 from ramalama.http_client import HttpClient
 
-
 logging.basicConfig(level=logging.WARNING, format="%(asctime)s - %(levelname)s - %(message)s")
 
 MNT_DIR = "/mnt/models"

diff --git a/ramalama/gguf_parser.py b/ramalama/gguf_parser.py
@@ -0,0 +1,201 @@
+import io
+import struct
+
+from enum import IntEnum
+from typing import Dict, Any
+
+import ramalama.console as console
+from ramalama.model_inspect import GGUFModelInfo, Tensor
+
+
+# Based on ggml_type in
+# https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#file-structure
+class GGML_TYPE(IntEnum):
+    GGML_TYPE_F32 = (0,)
+    GGML_TYPE_F16 = (1,)
+    GGML_TYPE_Q4_0 = (2,)
+    GGML_TYPE_Q4_1 = (3,)
+    # GGML_TYPE_Q4_2 = 4, support has been removed
+    # GGML_TYPE_Q4_3 = 5, support has been removed
+    GGML_TYPE_Q5_0 = (6,)
+    GGML_TYPE_Q5_1 = (7,)
+    GGML_TYPE_Q8_0 = (8,)
+    GGML_TYPE_Q8_1 = (9,)
+    GGML_TYPE_Q2_K = (10,)
+    GGML_TYPE_Q3_K = (11,)
+    GGML_TYPE_Q4_K = (12,)
+    GGML_TYPE_Q5_K = (13,)
+    GGML_TYPE_Q6_K = (14,)
+    GGML_TYPE_Q8_K = (15,)
+    GGML_TYPE_IQ2_XXS = (16,)
+    GGML_TYPE_IQ2_XS = (17,)
+    GGML_TYPE_IQ3_XXS = (18,)
+    GGML_TYPE_IQ1_S = (19,)
+    GGML_TYPE_IQ4_NL = (20,)
+    GGML_TYPE_IQ3_S = (21,)
+    GGML_TYPE_IQ2_S = (22,)
+    GGML_TYPE_IQ4_XS = (23,)
+    GGML_TYPE_I8 = (24,)
+    GGML_TYPE_I16 = (25,)
+    GGML_TYPE_I32 = (26,)
+    GGML_TYPE_I64 = (27,)
+    GGML_TYPE_F64 = (28,)
+    GGML_TYPE_IQ1_M = (29,)
+
+
+# Based on gguf_metadata_value_type in
+# https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#file-structure
+class GGUFValueType(IntEnum):
+    UINT8 = (0,)  # 8-bit unsigned integer
+    INT8 = (1,)  # 8-bit signed integer
+    UINT16 = (2,)  # 16-bit unsigned little-endian integer
+    INT16 = (3,)  # 16-bit signed little-endian integer
+    UINT32 = (4,)  # 32-bit unsigned little-endian integer
+    INT32 = (5,)  # 32-bit signed little-endian integer
+    FLOAT32 = (6,)  # 32-bit IEEE754 floating point number
+
+    # boolean of 1-byte value where 0 is false and 1 is true.
+    # Anything else is invalid, and should be treated as either the model being invalid or the reader being buggy.
+    BOOL = (7,)
+
+    STRING = (8,)  # UTF-8 non-null-terminated string, with length prepended.
+
+    # Array of other values, with the length and type prepended.
+    # Arrays can be nested, and the length of the array is the number of elements in the array, not the number of bytes.
+    ARRAY = (9,)
+
+    UINT64 = (10,)  # 64-bit unsigned little-endian integer
+    INT64 = (11,)  # 64-bit signed little-endian integer
+    FLOAT64 = (12,)  # 64-bit IEEE754 floating point number
+
+
+# Mapping GGUFs value types to python struct librarys format characters
+# see https://docs.python.org/3/library/struct.html#format-characters
+GGUF_VALUE_TYPE_FORMAT: Dict[GGUFValueType, str] = {
+    GGUFValueType.UINT8: "B",
+    GGUFValueType.INT8: "b",
+    GGUFValueType.UINT16: "H",
+    GGUFValueType.INT16: "h",
+    GGUFValueType.UINT32: "I",
+    GGUFValueType.INT32: "i",
+    GGUFValueType.FLOAT32: "f",
+    GGUFValueType.BOOL: "?",
+    GGUFValueType.UINT64: "Q",
+    GGUFValueType.INT64: "q",
+    GGUFValueType.FLOAT64: "d",
+}
+
+GGUF_NUMBER_FORMATS: list[GGUFValueType] = [
+    GGUFValueType.UINT8,
+    GGUFValueType.INT8,
+    GGUFValueType.UINT16,
+    GGUFValueType.INT16,
+    GGUFValueType.UINT32,
+    GGUFValueType.INT32,
+    GGUFValueType.FLOAT32,
+    GGUFValueType.UINT64,
+    GGUFValueType.INT64,
+    GGUFValueType.FLOAT64,
+]
+
+
+class ParseError(Exception):
+    pass
+
+
+class GGUFInfoParser:
+
+    def is_model_gguf(model_path: str) -> bool:
+        try:
+            with open(model_path, "rb") as model_file:
+                magic_number = GGUFInfoParser.read_string(model_file, 4)
+                return magic_number == GGUFModelInfo.MAGIC_NUMBER
+        except Exception as ex:
+            console.warning(f" Failed to read model '{model_path}': {ex}")
+            return False
+
+    @staticmethod
+    def read_string(model: io.BufferedReader, length: int = -1) -> str:
+        if length == -1:
+            type_string = GGUF_VALUE_TYPE_FORMAT[GGUFValueType.UINT64]
+            length = struct.unpack(type_string, model.read(struct.calcsize(type_string)))[0]
+        return model.read(length).decode("utf-8")
+
+    @staticmethod
+    def read_number(model: io.BufferedReader, value_type: GGUFValueType, model_uses_little_endian: bool) -> float:
+        if value_type not in GGUF_NUMBER_FORMATS:
+            raise ParseError(f"Value type '{value_type}' not in format dict")
+        typestring = f"{'<' if model_uses_little_endian else '>'}{GGUF_VALUE_TYPE_FORMAT[value_type]}"
+        return struct.unpack(typestring, model.read(struct.calcsize(typestring)))[0]
+
+    @staticmethod
+    def read_bool(model: io.BufferedReader, model_uses_little_endian: bool) -> bool:
+        typestring = f"{'<' if model_uses_little_endian else '>'}{GGUF_VALUE_TYPE_FORMAT[GGUFValueType.BOOL]}"
+        value = struct.unpack(typestring, model.read(struct.calcsize(typestring)))[0]
+        if value not in [0, 1]:
+            raise ParseError(f"Invalid bool value '{value}'")
+        return value == 1
+
+    @staticmethod
+    def read_value_type(model: io.BufferedReader, model_uses_little_endian: bool) -> GGUFValueType:
+        value_type = GGUFInfoParser.read_number(model, GGUFValueType.UINT32, model_uses_little_endian)
+        return GGUFValueType(value_type)
+
+    @staticmethod
+    def read_value(model: io.BufferedReader, value_type: GGUFValueType, model_uses_little_endian: bool) -> Any:
+        value = None
+        if value_type in GGUF_NUMBER_FORMATS:
+            value = GGUFInfoParser.read_number(model, value_type, model_uses_little_endian)
+        elif value_type == GGUFValueType.BOOL:
+            value = GGUFInfoParser.read_bool(model, model_uses_little_endian)
+        elif value_type == GGUFValueType.STRING:
+            value = GGUFInfoParser.read_string(model)
+        elif value_type == GGUFValueType.ARRAY:
+            array_type = GGUFInfoParser.read_value_type(model, model_uses_little_endian)
+            array_length = GGUFInfoParser.read_number(model, GGUFValueType.UINT64, model_uses_little_endian)
+            value = [
+                GGUFInfoParser.read_value(model, array_type, model_uses_little_endian) for _ in range(array_length)
+            ]
+
+        if value is not None:
+            return value
+        raise ParseError(f"Unknown type '{value_type}'")
+
+    def parse(model_name: str, model_registry: str, model_path: str, cli_args) -> GGUFModelInfo:
+        # By default, models are little-endian encoded
+        is_little_endian = True
+
+        with open(model_path, "rb") as model:
+            magic_number = GGUFInfoParser.read_string(model, 4)
+            if magic_number != GGUFModelInfo.MAGIC_NUMBER:
+                raise ParseError(f"Invalid GGUF magic number '{magic_number}'")
+
+            gguf_version = GGUFInfoParser.read_number(model, GGUFValueType.UINT32, is_little_endian)
+            # If the read GGUF version is different, then the model could be big-endian encoded
+            if gguf_version != GGUFModelInfo.VERSION:
+                is_little_endian = False
+                gguf_version = GGUFInfoParser.read_number(model, GGUFValueType.UINT32, is_little_endian)
+                if gguf_version != GGUFModelInfo.VERSION:
+                    raise ParseError(f"Expected GGUF version '{GGUFModelInfo.VERSION}', but got '{gguf_version}'")
+
+            tensor_count = GGUFInfoParser.read_number(model, GGUFValueType.UINT64, is_little_endian)
+            metadata_kv_count = GGUFInfoParser.read_number(model, GGUFValueType.UINT64, is_little_endian)
+
+            metadata = {}
+            for _ in range(metadata_kv_count):
+                key = GGUFInfoParser.read_string(model)
+                value_type = GGUFInfoParser.read_value_type(model, is_little_endian)
+                metadata[key] = GGUFInfoParser.read_value(model, value_type, is_little_endian)
+
+            tensors: list[Tensor] = []
+            for _ in range(tensor_count):
+                name = GGUFInfoParser.read_string(model)
+                n_dimensions = GGUFInfoParser.read_number(model, GGUFValueType.UINT32, is_little_endian)
+                dimensions: list[int] = []
+                for _ in range(n_dimensions):
+                    dimensions.append(GGUFInfoParser.read_number(model, GGUFValueType.UINT64, is_little_endian))
+                tensor_type = GGML_TYPE(GGUFInfoParser.read_number(model, GGUFValueType.UINT32, is_little_endian))
+                offset = GGUFInfoParser.read_number(model, GGUFValueType.UINT64, is_little_endian)
+                tensors.append(Tensor(name, n_dimensions, dimensions, tensor_type, offset))
+
+            return GGUFModelInfo(model_name, model_registry, model_path, metadata, tensors, is_little_endian)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -17,7 +17,6 @@

		from ramalama.http_client import HttpClient


		logging.basicConfig(level=logging.WARNING, format="%(asctime)s - %(levelname)s - %(message)s")

		MNT_DIR = "/mnt/models"
Expand Down