Skip to content

Commit e4841d2

Browse files
authored
llama : fix parallel processing for plamo2 (ggml-org#14716)
1 parent 538cc77 commit e4841d2

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

src/llama-model.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15763,6 +15763,7 @@ struct llm_build_plamo2 : public llm_graph_context_mamba {
1576315763
cb(zx, "mamba_in_proj", il);
1576415764
// {8192, 5, 1, 1} -> {8192, 1, 5, 1}
1576515765
zx = ggml_permute(ctx0, zx, 0, 2, 1, 3);
15766+
zx = ggml_cont(ctx0, zx);
1576615767
zx = ggml_reshape_4d(ctx0, zx, head_dim * 2, n_heads, n_seq_tokens, n_seqs);
1576715768
cb(zx, "mamba_in_proj_out", il);
1576815769

@@ -15780,7 +15781,6 @@ struct llm_build_plamo2 : public llm_graph_context_mamba {
1578015781
// conv1d
1578115782
{
1578215783
// => {d_conv - 1 + n_seq_tokens, d_inner, n_seqs}
15783-
x = ggml_view_2d(ctx0, x, d_inner, n_seq_tokens * n_seqs, d_inner * x->nb[0], 0);
1578415784
ggml_tensor * conv_x = ggml_concat(ctx0, conv, ggml_transpose(ctx0, x), 0);
1578515785
cb(conv_x, "mamba_conv1d_input", il);
1578615786

0 commit comments

Comments
 (0)