From 7d7003df9c4c5f0800db84b6f95511be7870bb9d Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 24 Jun 2025 17:30:18 +0300 Subject: [PATCH 1/2] batch : fix check for empty sequences in memory ggml-ci --- src/llama-batch.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/llama-batch.cpp b/src/llama-batch.cpp index 401e11364dbc9..5f094f966dcec 100644 --- a/src/llama-batch.cpp +++ b/src/llama-batch.cpp @@ -244,7 +244,9 @@ bool llama_batch_allocr::init( continue; } - if (memory) { + const llama_pos p0 = memory ? memory->seq_pos_max(s) : -1; + + if (p0 >= 0) { bool ok = true; if (batch.token) { From 53608db533105eff6c00ab5b445535ee3ac49f9d Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 24 Jun 2025 17:37:24 +0300 Subject: [PATCH 2/2] cont : reuse the var ggml-ci --- src/llama-batch.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/llama-batch.cpp b/src/llama-batch.cpp index 5f094f966dcec..91b1d6078a252 100644 --- a/src/llama-batch.cpp +++ b/src/llama-batch.cpp @@ -250,7 +250,7 @@ bool llama_batch_allocr::init( bool ok = true; if (batch.token) { - if (seq_pos_min(s) != memory->seq_pos_max(s) + 1) { + if (seq_pos_min(s) != p0 + 1) { ok = false; } } else { @@ -258,7 +258,7 @@ bool llama_batch_allocr::init( // for embeddings (typically used as vision input), we allow them to have repeating positions // ref: https://github.com/ggml-org/llama.cpp/issues/13694#issuecomment-2983871762 - if (seq_pos_min(s) != memory->seq_pos_max(s) && seq_pos_min(s) != memory->seq_pos_max(s) + 1) { + if (seq_pos_min(s) != p0 && seq_pos_min(s) != p0 + 1) { ok = false; } } @@ -269,7 +269,7 @@ bool llama_batch_allocr::init( " - the last position stored in the memory module of the context (i.e. the KV cache) for sequence %d is X = %d\n" " - the tokens for sequence %d in the input batch have a starting position of Y = %d\n" " it is required that the sequence positions remain consecutive: Y = X + 1\n", - __func__, s, s, memory->seq_pos_max(s), s, seq_pos_min(s)); + __func__, s, s, p0, s, seq_pos_min(s)); return false; }