Skip to content

Commit 0fd93cd

Browse files
nicobossslaren
andauthored
llama : model-based max number of graph nodes calculation (ggml-org#8970)
* llama : model-based max number of graph nodes calculation * Update src/llama.cpp --------- Co-authored-by: slaren <[email protected]>
1 parent 84eb2f4 commit 0fd93cd

File tree

1 file changed

+2
-7
lines changed

1 file changed

+2
-7
lines changed

src/llama.cpp

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3575,13 +3575,8 @@ namespace GGUFMeta {
35753575

35763576
using llama_buf_map = std::unordered_map<uint32_t, ggml_backend_buffer_t>;
35773577

3578-
// TODO: update when needed or think of some clever automatic way to do this
3579-
static size_t llama_model_max_nodes(const llama_model & /*model*/) {
3580-
//if (model.arch == LLM_ARCH_LLAMA && model.hparams.n_layer > ??) { // llama-3 405B
3581-
// return 32768;
3582-
//}
3583-
3584-
return 8192;
3578+
static size_t llama_model_max_nodes(const llama_model & model) {
3579+
return std::max<size_t>(8192, model.tensors_by_name.size()*5);
35853580
}
35863581

35873582
struct llama_model_loader {

0 commit comments

Comments
 (0)