@@ -1318,18 +1318,28 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT
1318
1318
raise ValueError (f"Unexpected combination of types: { name_to_type } " )
1319
1319
1320
1320
1321
- def model_parameter_count (model : LazyModel ) -> int :
1321
+ def per_model_weight_count_estimation (model : LazyModel , expert_count : int ) -> int :
1322
1322
# TODO: Ensure parameter count is accurate throughout various model type
1323
- total_model_parameters = 0
1323
+ sum_weight_estimate = 0
1324
1324
for name , lazy_tensor in model .items ():
1325
+ # We don't need these
1326
+ if name .endswith ((".attention.masked_bias" , ".attention.bias" , ".rotary_emb.inv_freq" )):
1327
+ continue
1328
+
1325
1329
# Got A Tensor
1326
1330
sum_weights_in_tensor = 1
1331
+
1327
1332
# Tensor Volume
1328
1333
for dim in lazy_tensor .shape :
1329
1334
sum_weights_in_tensor *= dim
1335
+
1330
1336
# Add Tensor Volume To Running Count
1331
- total_model_parameters += sum_weights_in_tensor
1332
- return total_model_parameters
1337
+ sum_weight_estimate += sum_weights_in_tensor
1338
+
1339
+ # Calculate weight estimate per model
1340
+ per_model_weight_estimate = (sum_weight_estimate / expert_count ) if (expert_count > 0 ) else sum_weight_estimate
1341
+
1342
+ return per_model_weight_estimate
1333
1343
1334
1344
1335
1345
def convert_to_output_type (model : LazyModel , output_type : GGMLFileType ) -> LazyModel :
@@ -1511,18 +1521,10 @@ def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) ->
1511
1521
return vocab , special_vocab
1512
1522
1513
1523
1514
- def default_convention_outfile (file_type : GGMLFileType , params : Params , model_params_count : int , metadata : Metadata ) -> str :
1515
-
1516
- name = None
1517
- if metadata is not None and metadata .name is not None :
1518
- name = metadata .name
1519
- elif params .path_model is not None :
1520
- name = params .path_model .name
1521
-
1524
+ def default_convention_outfile (file_type : GGMLFileType , model_name :str , expert_count :int , model_params_count : int , metadata : Metadata ) -> str :
1525
+ name = metadata .name if metadata is not None and metadata .name is not None else model_name
1522
1526
version = metadata .version if metadata is not None and metadata .version is not None else None
1523
1527
1524
- expert_count = params .n_experts if params .n_experts is not None else None
1525
-
1526
1528
encodingScheme = {
1527
1529
GGMLFileType .AllF32 : "F32" ,
1528
1530
GGMLFileType .MostlyF16 : "F16" ,
@@ -1532,8 +1534,8 @@ def default_convention_outfile(file_type: GGMLFileType, params: Params, model_pa
1532
1534
return gguf .naming_convention (name , version , expert_count , model_params_count , encodingScheme )
1533
1535
1534
1536
1535
- def default_outfile (model_paths : list [Path ], file_type : GGMLFileType , params : Params , model_params_count : int , metadata : Metadata ) -> Path :
1536
- default_filename = default_convention_outfile (file_type , params , model_params_count , metadata )
1537
+ def default_outfile (model_paths : list [Path ], file_type : GGMLFileType , model_name : str , expert_count : int , model_params_count : int , metadata : Metadata ) -> Path :
1538
+ default_filename = default_convention_outfile (file_type , model_name , expert_count , model_params_count , metadata )
1537
1539
ret = model_paths [0 ].parent / f"{ default_filename } .gguf"
1538
1540
if ret in model_paths :
1539
1541
logger .error (
@@ -1591,9 +1593,9 @@ def main(args_in: list[str] | None = None) -> None:
1591
1593
model_plus = load_some_model (args .model )
1592
1594
params = Params .load (model_plus )
1593
1595
model = convert_model_names (model_plus .model , params , args .skip_unknown )
1594
- model_params_count = model_parameter_count (model_plus .model )
1596
+ model_params_count = per_model_weight_count_estimation (model_plus .model , params . n_experts )
1595
1597
ftype = pick_output_type (model , args .outtype )
1596
- print (f"{ default_convention_outfile (ftype , params , model_params_count , metadata )} " ) # noqa: NP100
1598
+ print (f"{ default_convention_outfile (ftype , params . path_model . name , params . n_experts , model_params_count , metadata )} " ) # noqa: NP100
1597
1599
return
1598
1600
1599
1601
if args .no_vocab and args .vocab_only :
@@ -1609,8 +1611,8 @@ def main(args_in: list[str] | None = None) -> None:
1609
1611
else :
1610
1612
model_plus = ModelPlus (model = {}, paths = [args .model / 'dummy' ], format = 'none' , vocab = None )
1611
1613
1612
- model_params_count = model_parameter_count (model_plus .model )
1613
- logger .info (f"model parameters count : { model_params_count } ({ gguf .model_parameter_count_rounded_notation (model_params_count )} )" )
1614
+ model_params_count = per_model_weight_count_estimation (model_plus .model , params . n_experts )
1615
+ logger .info (f"model parameters count : { model_params_count } ({ gguf .model_weight_count_rounded_notation (model_params_count )} )" )
1614
1616
1615
1617
if args .dump :
1616
1618
do_dump_model (model_plus )
@@ -1678,7 +1680,7 @@ def main(args_in: list[str] | None = None) -> None:
1678
1680
model = convert_model_names (model , params , args .skip_unknown )
1679
1681
ftype = pick_output_type (model , args .outtype )
1680
1682
model = convert_to_output_type (model , ftype )
1681
- outfile = args .outfile or default_outfile (model_plus .paths , ftype , params , model_params_count , metadata )
1683
+ outfile = args .outfile or default_outfile (model_plus .paths , ftype , params . path_model . name , params . n_experts , model_params_count , metadata )
1682
1684
1683
1685
params .ftype = ftype
1684
1686
logger .info (f"Writing { outfile } , format { ftype } " )
0 commit comments