Skip to content

Commit 374f67b

Browse files
authored
Support phi4 mini (#3467)
* support partial_rotary * support phi-4 * update doc
1 parent 8868159 commit 374f67b

File tree

9 files changed

+34
-2
lines changed

9 files changed

+34
-2
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ LMDeploy is a toolkit for compressing, deploying, and serving LLM, developed by
141141
<li>Phi-3-mini (3.8B)</li>
142142
<li>Phi-3.5-mini (3.8B)</li>
143143
<li>Phi-3.5-MoE (16x3.8B)</li>
144+
<li>Phi-4-mini (3.8B)</li>
144145
<li>MiniCPM3 (4B)</li>
145146
</ul>
146147
</td>

README_ja.md

+1
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ LMDeploy TurboMindエンジンは卓越した推論能力を持ち、さまざ
139139
<li>Phi-3-mini (3.8B)</li>
140140
<li>Phi-3.5-mini (3.8B)</li>
141141
<li>Phi-3.5-MoE (16x3.8B)</li>
142+
<li>Phi-4-mini (3.8B)</li>
142143
<li>MiniCPM3 (4B)</li>
143144
</ul>
144145
</td>

README_zh-CN.md

+1
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ LMDeploy TurboMind 引擎拥有卓越的推理能力,在各种规模的模型
143143
<li>Phi-3-mini (3.8B)</li>
144144
<li>Phi-3.5-mini (3.8B)</li>
145145
<li>Phi-3.5-MoE (16x3.8B)</li>
146+
<li>Phi-4-mini (3.8B)</li>
146147
<li>MiniCPM3 (4B)</li>
147148
</ul>
148149
</td>

docs/en/supported_models/supported_models.md

+2
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ The following tables detail the models supported by LMDeploy's TurboMind engine
8888
| StarCoder2 | 3B-15B | LLM | Yes | Yes | Yes | No | No |
8989
| Phi-3-mini | 3.8B | LLM | Yes | Yes | Yes | Yes | Yes |
9090
| Phi-3-vision | 4.2B | MLLM | Yes | Yes | Yes | - | - |
91+
| Phi-4-mini | 3.8B | LLM | Yes | Yes | Yes | Yes | Yes |
9192
| CogVLM-Chat | 17B | MLLM | Yes | Yes | Yes | - | - |
9293
| CogVLM2-Chat | 19B | MLLM | Yes | Yes | Yes | - | - |
9394
| LLaVA(1.5,1.6)<sup>\[2\]</sup> | 7B-34B | MLLM | No | No | No | No | No |
@@ -104,6 +105,7 @@ The following tables detail the models supported by LMDeploy's TurboMind engine
104105
| Phi-3.5-mini | 3.8B | LLM | Yes | Yes | No | - | - |
105106
| Phi-3.5-MoE | 16x3.8B | LLM | Yes | Yes | No | - | - |
106107
| Phi-3.5-vision | 4.2B | MLLM | Yes | Yes | No | - | - |
108+
| Phi-4-mini | 3.8B | LLM | Yes | Yes | No | - | - |
107109

108110
```{note}
109111
* [1] Currently Mono-InternVL does not support FP16 due to numerical instability. Please use BF16 instead.

docs/zh_cn/supported_models/supported_models.md

+2
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
| StarCoder2 | 3B-15B | LLM | Yes | Yes | Yes | No | No |
8989
| Phi-3-mini | 3.8B | LLM | Yes | Yes | Yes | Yes | Yes |
9090
| Phi-3-vision | 4.2B | MLLM | Yes | Yes | Yes | - | - |
91+
| Phi-4-mini | 3.8B | LLM | Yes | Yes | Yes | Yes | Yes |
9192
| CogVLM-Chat | 17B | MLLM | Yes | Yes | Yes | - | - |
9293
| CogVLM2-Chat | 19B | MLLM | Yes | Yes | Yes | - | - |
9394
| LLaVA(1.5,1.6)<sup>\[2\]</sup> | 7B-34B | MLLM | No | No | No | No | No |
@@ -104,6 +105,7 @@
104105
| Phi-3.5-mini | 3.8B | LLM | Yes | Yes | No | - | - |
105106
| Phi-3.5-MoE | 16x3.8B | LLM | Yes | Yes | No | - | - |
106107
| Phi-3.5-vision | 4.2B | MLLM | Yes | Yes | No | - | - |
108+
| Phi-4-mini | 3.8B | LLM | Yes | Yes | No | - | - |
107109

108110
```{note}
109111
* [1] 目前,Mono-InternVL不支持FP16,因为数值不稳定。请改用BF16

lmdeploy/model.py

+3
Original file line numberDiff line numberDiff line change
@@ -1633,6 +1633,7 @@ def match(cls, model_path: str) -> Optional[str]:
16331633
return 'internvl-zh-hermes2'
16341634

16351635

1636+
@MODELS.register_module(name='phi-4')
16361637
@MODELS.register_module(name='phi-3')
16371638
class Phi3Instruct(BaseChatTemplate):
16381639
"""Chat template of InternLM model."""
@@ -1669,6 +1670,8 @@ def match(cls, model_path: str) -> Optional[str]:
16691670
path = model_path.lower()
16701671
if all([c in path for c in ['phi-3', 'instruct']]):
16711672
return 'phi-3'
1673+
if all([c in path for c in ['phi-4', 'instruct']]):
1674+
return 'phi-4'
16721675

16731676

16741677
@MODELS.register_module(name='internvl2-phi3')

lmdeploy/pytorch/kernels/cuda/apply_rotary_pos_emb.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,16 @@ def apply_rotary_pos_emb(q: Tensor,
124124

125125
seq_len = cos.numel() // cos.size(-1)
126126
BLOCK = 16
127-
half_size = q.size(-1) // 2
127+
128+
if q.size(-1) == cos.size(-1):
129+
half_size = q.size(-1) // 2
130+
elif q.size(-1) > cos.size(-1):
131+
# only do rope with rope_dim size
132+
half_size = cos.size(-1) // 2
133+
else:
134+
raise ValueError('Not support head_dim < rope_dim, '
135+
f'but given head_dim={q.size(-1)} '
136+
f'rope_dim={cos.size(-1)}')
128137
BLOCK_N = triton.next_power_of_2(half_size)
129138
num_heads_q = q.size(-2)
130139
num_heads_k = k.size(-2)

lmdeploy/pytorch/models/phi3.py

+8
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None, device:
232232
rope_max_pos_emb = config.max_position_embeddings
233233
rope_base = config.rope_theta
234234
rope_scaling = config.rope_scaling
235+
partial_rotary_factor = getattr(config, 'partial_rotary_factor', None)
235236
if rope_scaling is not None:
236237
scaling_type = rope_scaling['type']
237238
assert scaling_type in ['longrope', 'su']
@@ -246,13 +247,15 @@ def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None, device:
246247
rope_base,
247248
longrope_params=longrope_params,
248249
emb_type=emb_type,
250+
partial_rotary_factor=partial_rotary_factor,
249251
)
250252
else:
251253
self.rotary_emb = build_rotary_embedding(
252254
rope_dim,
253255
rope_max_pos_emb,
254256
rope_base,
255257
emb_type=emb_type,
258+
partial_rotary_factor=partial_rotary_factor,
256259
)
257260

258261
def forward(
@@ -348,6 +351,11 @@ def get_logits(self, hidden_states: torch.Tensor):
348351
"""compute logits of the model output."""
349352
return self.lm_head(hidden_states)
350353

354+
def update_weights(self):
355+
"""update weights."""
356+
if self.config.tie_word_embeddings:
357+
self.lm_head.weight = self.model.embed_tokens.weight
358+
351359
def get_input_embeddings(self):
352360
"""get input embeddings."""
353361
return self.model.get_input_embeddings()

lmdeploy/pytorch/nn/rotary_embedding.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -85,11 +85,16 @@ def build_rotary_embedding(dim: int,
8585
yarn_params: YarnParameters = None,
8686
longrope_params: LongRoPEScalingParameters = None,
8787
llama3_params: Llama3Parameters = None,
88-
emb_type: RopeType = RopeType.Default) -> nn.Module:
88+
emb_type: RopeType = RopeType.Default,
89+
partial_rotary_factor: float = None) -> nn.Module:
8990
"""build rotary embedding op."""
9091
backend = get_backend()
9192

9293
builder = backend.get_layer_impl_builder(OpType.RotaryEmbedding)
94+
95+
# update rope_dim
96+
if partial_rotary_factor is not None:
97+
dim = int(dim * partial_rotary_factor)
9398
return builder.build(dim,
9499
max_position_embeddings,
95100
base,

0 commit comments

Comments
 (0)