@@ -3417,6 +3417,167 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
3417
3417
return [(new_name , data_torch )]
3418
3418
3419
3419
3420
+ @ModelBase .register ("Plamo2ForCausalLM" , "PLaMo2ForCausalLM" )
3421
+ class Plamo2Model (TextModel ):
3422
+ model_arch = gguf .MODEL_ARCH .PLAMO2
3423
+
3424
+ def set_vocab (self ):
3425
+ # PLaMo 2 uses a custom tokenizer with a .jsonl file
3426
+ # We need to handle this specially
3427
+ tokenizer_jsonl_path = self .dir_model / "tokenizer.jsonl"
3428
+ tokenizer_config_path = self .dir_model / "tokenizer_config.json"
3429
+
3430
+ if not tokenizer_jsonl_path .is_file ():
3431
+ raise FileNotFoundError (f"PLaMo 2 tokenizer file not found: { tokenizer_jsonl_path } " )
3432
+
3433
+ # Load tokenizer config
3434
+ with open (tokenizer_config_path , 'r' , encoding = 'utf-8' ) as f :
3435
+ tokenizer_config = json .load (f )
3436
+
3437
+ # Load tokens from JSONL file (actually a list format)
3438
+ tokens = []
3439
+ scores = []
3440
+ toktypes = []
3441
+
3442
+ with open (tokenizer_jsonl_path , 'r' , encoding = 'utf-8' ) as f :
3443
+ for line_num , line in enumerate (f ):
3444
+ if line .strip ():
3445
+ token_data = json .loads (line )
3446
+ # Format: [token, score, type, ?, ?, ?, ?]
3447
+ token = token_data [0 ].encode ("utf-8" )
3448
+ score = float (token_data [1 ])
3449
+ token_type_str = token_data [2 ] if len (token_data ) > 2 else "NORMAL"
3450
+
3451
+ tokens .append (token )
3452
+ scores .append (score )
3453
+
3454
+ # Map token type strings to GGUF token types
3455
+ if token_type_str == "UNKNOWN" :
3456
+ toktypes .append (gguf .TokenType .UNKNOWN )
3457
+ elif token_type_str == "CONTROL" :
3458
+ toktypes .append (gguf .TokenType .CONTROL )
3459
+ elif token_type_str == "BYTE" :
3460
+ toktypes .append (gguf .TokenType .BYTE )
3461
+ else :
3462
+ toktypes .append (gguf .TokenType .NORMAL )
3463
+
3464
+ # Use "llama" (SPM) tokenizer type which doesn't require merges
3465
+ # PLaMo 2's tokenizer is more similar to SPM than GPT2
3466
+ self .gguf_writer .add_tokenizer_model ("llama" )
3467
+ self .gguf_writer .add_tokenizer_pre ("default" )
3468
+ self .gguf_writer .add_token_list (tokens )
3469
+ self .gguf_writer .add_token_scores (scores )
3470
+ self .gguf_writer .add_token_types (toktypes )
3471
+
3472
+ # Add special tokens from config
3473
+ if "bos_token_id" in tokenizer_config :
3474
+ self .gguf_writer .add_bos_token_id (tokenizer_config ["bos_token_id" ])
3475
+ if "eos_token_id" in tokenizer_config :
3476
+ self .gguf_writer .add_eos_token_id (tokenizer_config ["eos_token_id" ])
3477
+ if "pad_token_id" in tokenizer_config :
3478
+ self .gguf_writer .add_pad_token_id (tokenizer_config ["pad_token_id" ])
3479
+ if "unk_token_id" in tokenizer_config :
3480
+ self .gguf_writer .add_unk_token_id (tokenizer_config ["unk_token_id" ])
3481
+
3482
+ self .gguf_writer .add_add_space_prefix (False )
3483
+
3484
+ def set_gguf_parameters (self ):
3485
+ hparams = self .hparams
3486
+ block_count = hparams ["num_hidden_layers" ]
3487
+
3488
+ self .gguf_writer .add_context_length (hparams .get ("max_position_embeddings" , 2048 ))
3489
+ self .gguf_writer .add_embedding_length (hparams .get ("hidden_size" , 4096 ))
3490
+ self .gguf_writer .add_block_count (block_count )
3491
+ self .gguf_writer .add_head_count (hparams .get ("num_attention_heads" , 32 ))
3492
+ self .gguf_writer .add_head_count_kv (hparams .get ("num_key_value_heads" , 4 ))
3493
+ self .gguf_writer .add_layer_norm_rms_eps (hparams .get ("rms_norm_eps" , 1e-06 ))
3494
+ self .gguf_writer .add_rope_freq_base (hparams .get ("rope_theta" , 1000000.0 ))
3495
+
3496
+ # Mamba parameters
3497
+ self .gguf_writer .add_ssm_state_size (hparams .get ("mamba_d_state" , 64 ))
3498
+ self .gguf_writer .add_ssm_conv_kernel (hparams .get ("mamba_d_conv" , 4 ))
3499
+ self .gguf_writer .add_ssm_num_heads (hparams .get ("mamba_num_heads" , 64 ))
3500
+ self .gguf_writer .add_ssm_head_dim (hparams .get ("hidden_size_per_head" , 128 ))
3501
+ self .gguf_writer .add_ssm_inner_size (hparams .get ("hidden_size_per_head" , 128 ) * hparams .get ("mamba_num_heads" , 64 ))
3502
+ self .gguf_writer .add_ssm_time_step_rank (hparams .get ("time_step_limit" , 192 ))
3503
+ self .gguf_writer .add_ssm_dt_min (hparams .get ("time_step_min" , 0.001 ))
3504
+ self .gguf_writer .add_ssm_dt_max (hparams .get ("time_step_max" , 0.1 ))
3505
+ self .gguf_writer .add_hybrid_mamba_step (hparams .get ("mamba_step" , 2 ))
3506
+
3507
+ # MLP feed forward parameters (for attention layers)
3508
+ self .gguf_writer .add_feed_forward_length (hparams .get ("intermediate_size" , 16384 ))
3509
+
3510
+ # Which layers are Mamba layers
3511
+ # PLaMo 2 uses mamba_step to indicate the pattern (e.g., 2 means every other layer)
3512
+ # This logic matches modeling_plamo.py's is_mamba function
3513
+ mamba_step = hparams .get ("mamba_step" , 2 )
3514
+ mamba_enabled = hparams .get ("mamba_enabled" , True )
3515
+ mamba_layers = []
3516
+
3517
+ if mamba_enabled :
3518
+ for i in range (block_count ):
3519
+ if block_count <= (mamba_step // 2 ):
3520
+ # use attention in last layer
3521
+ is_mamba = (i != block_count - 1 )
3522
+ else :
3523
+ is_mamba = (i % mamba_step ) != (mamba_step // 2 )
3524
+ if is_mamba :
3525
+ mamba_layers .append (i )
3526
+
3527
+ if mamba_layers :
3528
+ self .gguf_writer .add_hybrid_mamba_layers (mamba_layers )
3529
+
3530
+ self .gguf_writer .add_file_type (self .ftype )
3531
+
3532
+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
3533
+ del bid # unused
3534
+
3535
+ if name .endswith (".dt_bias" ):
3536
+ name = name .rpartition (".dt_bias" )[0 ] + ".dt_proj.bias"
3537
+ elif name .endswith (".dt_norm_weight" ):
3538
+ name = name .rpartition (".dt_norm_weight" )[0 ] + ".dt_norm.weight"
3539
+ elif name .endswith (".B_norm_weight" ):
3540
+ name = name .rpartition (".B_norm_weight" )[0 ] + ".B_norm.weight"
3541
+ elif name .endswith (".C_norm_weight" ):
3542
+ name = name .rpartition (".C_norm_weight" )[0 ] + ".C_norm.weight"
3543
+ elif name .endswith (".k_weight" ):
3544
+ name = name .rpartition (".k_weight" )[0 ] + ".k.weight"
3545
+ elif name .endswith (".q_weight" ):
3546
+ name = name .rpartition (".q_weight" )[0 ] + ".q.weight"
3547
+ elif name .endswith (".conv1d.weight" ):
3548
+ data_torch = torch .squeeze (data_torch ) # remove (, 1, )
3549
+ assert data_torch .ndim == 2
3550
+ elif name .endswith (".pre_mixer_norm.weight" ):
3551
+ data_torch += 1.0
3552
+ elif name .endswith (".post_mixer_norm.weight" ):
3553
+ data_torch += 1.0 / 5
3554
+ elif name .endswith (".pre_mlp_norm.weight" ):
3555
+ data_torch += 1.0
3556
+ elif name .endswith (".post_mlp_norm.weight" ):
3557
+ data_torch += 1.0 / (5 ** 1.5 )
3558
+ elif name .endswith (".gate_up_proj.weight" ):
3559
+ # Split the combined gate_up tensor
3560
+ split_size = data_torch .shape [0 ] // 2
3561
+ gate_tensor = data_torch [:split_size , :]
3562
+ up_tensor = data_torch [split_size :, :]
3563
+
3564
+ # Return both tensors - remove .weight suffix if present
3565
+ name_base = name .replace (".gate_up_proj.weight" , "" )
3566
+ gate_name = name_base + ".ffn_gate.weight"
3567
+ up_name = name_base + ".ffn_up.weight"
3568
+
3569
+ gate_mapped = self .map_tensor_name (gate_name )
3570
+ up_mapped = self .map_tensor_name (up_name )
3571
+
3572
+ return [(gate_mapped , gate_tensor ), (up_mapped , up_tensor )]
3573
+
3574
+ new_name = self .map_tensor_name (name )
3575
+
3576
+ print (f"Plamo2Model: { name } -> { new_name } , shape={ data_torch .shape } " )
3577
+
3578
+ return [(new_name , data_torch )]
3579
+
3580
+
3420
3581
@ModelBase .register ("CodeShellForCausalLM" )
3421
3582
class CodeShellModel (TextModel ):
3422
3583
model_arch = gguf .MODEL_ARCH .CODESHELL
0 commit comments