Skip to content

Commit 4891585

Browse files
committed
convert-*.py: refactor to per_model_weight_count_estimation()
1 parent d87e7cf commit 4891585

File tree

3 files changed

+58
-49
lines changed

3 files changed

+58
-49
lines changed

convert-hf-to-gguf.py

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,12 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
123123
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
124124
self.tensor_names = None
125125
self.metadata = metadata
126+
127+
model_tensors = self.get_tensors()
128+
126129
if self.ftype == gguf.LlamaFileType.GUESSED:
127130
# NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
128-
_, first_tensor = next(self.get_tensors())
131+
_, first_tensor = next(model_tensors)
129132
if first_tensor.dtype == torch.float16:
130133
logger.info(f"choosing --outtype f16 from first tensor type ({first_tensor.dtype})")
131134
self.ftype = gguf.LlamaFileType.MOSTLY_F16
@@ -162,8 +165,35 @@ def get_model_name(metadata, huggingface_parameters, dir_model, model_arch):
162165
# Get Expert Count From huggingface_parameters
163166
expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None
164167

168+
def per_model_weight_count_estimation(tensors, expert_count):
169+
# TODO: Ensure parameter count is accurate throughout various model type
170+
# May currently overestimate parameter count in Mamba model because
171+
# output weights is tied with token embeddings.
172+
sum_weight_estimate = 0
173+
for name, data_torch in tensors:
174+
# Got A Tensor
175+
176+
# We don't need these
177+
if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
178+
continue
179+
180+
# Calculate Tensor Volume
181+
sum_weights_in_tensor = 1
182+
for dim in data_torch.shape:
183+
sum_weights_in_tensor *= dim
184+
185+
# Add Tensor Volume To Running Count
186+
sum_weight_estimate += sum_weights_in_tensor
187+
188+
# Calculate weight estimate per model
189+
per_model_weight_estimate = (sum_weight_estimate / expert_count) if (expert_count > 0) else sum_weight_estimate
190+
191+
return per_model_weight_estimate
192+
193+
weight_estimate = per_model_weight_count_estimation(model_tensors, expert_count)
194+
165195
# Generate default filename based on model specification and available metadata
166-
self.fname_default = gguf.naming_convention(self.model_name, self.metadata.version, expert_count, self.parameter_count(), encodingScheme)
196+
self.fname_default = gguf.naming_convention(self.model_name, self.metadata.version, expert_count, weight_estimate, encodingScheme)
167197

168198
# Filename Output
169199
if fname_out is not None:
@@ -345,28 +375,6 @@ def extra_f16_tensors(self, name: str, new_name: str, bid: int | None, n_dims: i
345375

346376
return False
347377

348-
def parameter_count(self):
349-
# TODO: Ensure parameter count is accurate throughout various model type
350-
# May currently overestimate parameter count in Mamba model because
351-
# output weights is tied with token embeddings.
352-
total_model_parameters = 0
353-
for name, data_torch in self.get_tensors():
354-
# Got A Tensor
355-
356-
# We don't need these
357-
if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
358-
continue
359-
360-
# Calculate Tensor Volume
361-
sum_weights_in_tensor = 1
362-
for dim in data_torch.shape:
363-
sum_weights_in_tensor *= dim
364-
365-
# Add Tensor Volume To Running Count
366-
total_model_parameters += sum_weights_in_tensor
367-
368-
return total_model_parameters
369-
370378
def write_tensors(self):
371379
max_name_len = max(len(s) for _, s in self.tensor_map.mapping.values()) + len(".weight,")
372380

convert.py

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1318,18 +1318,28 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT
13181318
raise ValueError(f"Unexpected combination of types: {name_to_type}")
13191319

13201320

1321-
def model_parameter_count(model: LazyModel) -> int:
1321+
def per_model_weight_count_estimation(model: LazyModel, expert_count:int) -> int:
13221322
# TODO: Ensure parameter count is accurate throughout various model type
1323-
total_model_parameters = 0
1323+
sum_weight_estimate = 0
13241324
for name, lazy_tensor in model.items():
1325+
# We don't need these
1326+
if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
1327+
continue
1328+
13251329
# Got A Tensor
13261330
sum_weights_in_tensor = 1
1331+
13271332
# Tensor Volume
13281333
for dim in lazy_tensor.shape:
13291334
sum_weights_in_tensor *= dim
1335+
13301336
# Add Tensor Volume To Running Count
1331-
total_model_parameters += sum_weights_in_tensor
1332-
return total_model_parameters
1337+
sum_weight_estimate += sum_weights_in_tensor
1338+
1339+
# Calculate weight estimate per model
1340+
per_model_weight_estimate = (sum_weight_estimate / expert_count) if (expert_count > 0) else sum_weight_estimate
1341+
1342+
return per_model_weight_estimate
13331343

13341344

13351345
def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel:
@@ -1511,18 +1521,10 @@ def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) ->
15111521
return vocab, special_vocab
15121522

15131523

1514-
def default_convention_outfile(file_type: GGMLFileType, params: Params, model_params_count: int, metadata: Metadata) -> str:
1515-
1516-
name = None
1517-
if metadata is not None and metadata.name is not None:
1518-
name = metadata.name
1519-
elif params.path_model is not None:
1520-
name = params.path_model.name
1521-
1524+
def default_convention_outfile(file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: Metadata) -> str:
1525+
name = metadata.name if metadata is not None and metadata.name is not None else model_name
15221526
version = metadata.version if metadata is not None and metadata.version is not None else None
15231527

1524-
expert_count = params.n_experts if params.n_experts is not None else None
1525-
15261528
encodingScheme = {
15271529
GGMLFileType.AllF32: "F32",
15281530
GGMLFileType.MostlyF16: "F16",
@@ -1532,8 +1534,8 @@ def default_convention_outfile(file_type: GGMLFileType, params: Params, model_pa
15321534
return gguf.naming_convention(name, version, expert_count, model_params_count, encodingScheme)
15331535

15341536

1535-
def default_outfile(model_paths: list[Path], file_type: GGMLFileType, params: Params, model_params_count: int, metadata: Metadata) -> Path:
1536-
default_filename = default_convention_outfile(file_type, params, model_params_count, metadata)
1537+
def default_outfile(model_paths: list[Path], file_type: GGMLFileType, model_name:str, expert_count:int, model_params_count: int, metadata: Metadata) -> Path:
1538+
default_filename = default_convention_outfile(file_type, model_name, expert_count, model_params_count, metadata)
15371539
ret = model_paths[0].parent / f"{default_filename}.gguf"
15381540
if ret in model_paths:
15391541
logger.error(
@@ -1591,9 +1593,9 @@ def main(args_in: list[str] | None = None) -> None:
15911593
model_plus = load_some_model(args.model)
15921594
params = Params.load(model_plus)
15931595
model = convert_model_names(model_plus.model, params, args.skip_unknown)
1594-
model_params_count = model_parameter_count(model_plus.model)
1596+
model_params_count = per_model_weight_count_estimation(model_plus.model, params.n_experts)
15951597
ftype = pick_output_type(model, args.outtype)
1596-
print(f"{default_convention_outfile(ftype, params, model_params_count, metadata)}") # noqa: NP100
1598+
print(f"{default_convention_outfile(ftype, params.path_model.name, params.n_experts, model_params_count, metadata)}") # noqa: NP100
15971599
return
15981600

15991601
if args.no_vocab and args.vocab_only:
@@ -1609,8 +1611,8 @@ def main(args_in: list[str] | None = None) -> None:
16091611
else:
16101612
model_plus = ModelPlus(model = {}, paths = [args.model / 'dummy'], format = 'none', vocab = None)
16111613

1612-
model_params_count = model_parameter_count(model_plus.model)
1613-
logger.info(f"model parameters count : {model_params_count} ({gguf.model_parameter_count_rounded_notation(model_params_count)})")
1614+
model_params_count = per_model_weight_count_estimation(model_plus.model, params.n_experts)
1615+
logger.info(f"model parameters count : {model_params_count} ({gguf.model_weight_count_rounded_notation(model_params_count)})")
16141616

16151617
if args.dump:
16161618
do_dump_model(model_plus)
@@ -1678,7 +1680,7 @@ def main(args_in: list[str] | None = None) -> None:
16781680
model = convert_model_names(model, params, args.skip_unknown)
16791681
ftype = pick_output_type(model, args.outtype)
16801682
model = convert_to_output_type(model, ftype)
1681-
outfile = args.outfile or default_outfile(model_plus.paths, ftype, params, model_params_count, metadata)
1683+
outfile = args.outfile or default_outfile(model_plus.paths, ftype, params.path_model.name, params.n_experts, model_params_count, metadata)
16821684

16831685
params.ftype = ftype
16841686
logger.info(f"Writing {outfile}, format {ftype}")

gguf-py/gguf/utility.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from __future__ import annotations
22

3-
4-
def model_parameter_count_rounded_notation(model_params_count: int) -> str:
3+
def model_weight_count_rounded_notation(model_params_count: int) -> str:
54
if model_params_count > 1e15 :
65
# Quadrillion Of Parameters
76
scaled_model_params = model_params_count * 1e-15
@@ -29,7 +28,7 @@ def naming_convention(model_name: str, version_string:str, expert_count_int:int,
2928
# Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
3029
name = model_name.strip().replace(' ', '-') if model_name is not None else "ggml-model"
3130
version = f"-{version_string}" if version_string is not None else ""
32-
expert_count_chunk = f"{expert_count_int}x" if expert_count_int is not None else ""
33-
parameters = model_parameter_count_rounded_notation(model_params_count)
31+
expert_count_chunk = f"{expert_count_int}x" if expert_count_int is not None and expert_count_int > 0 else ""
32+
parameters = model_weight_count_rounded_notation(model_params_count)
3433
encodingScheme = encodingScheme.upper()
3534
return f"{name}{version}-{expert_count_chunk}{parameters}-{encodingScheme}"

0 commit comments

Comments
 (0)