Skip to content

Commit a522cda

Browse files
ggerganovqnixsynapse
authored andcommitted
ggml : support bcast ggml_soft_max_ext, ggml_flash_attn_ext (ggml-org#14435)
ggml-ci
1 parent 882076a commit a522cda

File tree

9 files changed

+252
-613
lines changed

9 files changed

+252
-613
lines changed

ggml/include/ggml.h

Lines changed: 7 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -557,8 +557,6 @@ extern "C" {
557557
GGML_GLU_OP_REGLU,
558558
GGML_GLU_OP_GEGLU,
559559
GGML_GLU_OP_SWIGLU,
560-
GGML_GLU_OP_GEGLU_ERF,
561-
GGML_GLU_OP_GEGLU_QUICK,
562560

563561
GGML_GLU_OP_COUNT,
564562
};
@@ -648,9 +646,6 @@ extern "C" {
648646

649647
// misc
650648

651-
GGML_API const char * ggml_version(void);
652-
GGML_API const char * ggml_commit(void);
653-
654649
GGML_API void ggml_time_init(void); // call this once at the beginning of the program
655650
GGML_API int64_t ggml_time_ms(void);
656651
GGML_API int64_t ggml_time_us(void);
@@ -1149,22 +1144,6 @@ extern "C" {
11491144
struct ggml_context * ctx,
11501145
struct ggml_tensor * a);
11511146

1152-
GGML_API struct ggml_tensor * ggml_geglu_erf(
1153-
struct ggml_context * ctx,
1154-
struct ggml_tensor * a);
1155-
1156-
GGML_API struct ggml_tensor * ggml_geglu_erf_swapped(
1157-
struct ggml_context * ctx,
1158-
struct ggml_tensor * a);
1159-
1160-
GGML_API struct ggml_tensor * ggml_geglu_quick(
1161-
struct ggml_context * ctx,
1162-
struct ggml_tensor * a);
1163-
1164-
GGML_API struct ggml_tensor * ggml_geglu_quick_swapped(
1165-
struct ggml_context * ctx,
1166-
struct ggml_tensor * a);
1167-
11681147
// A: n columns, r rows,
11691148
// B: n columns, r rows,
11701149
GGML_API struct ggml_tensor * ggml_glu_split(
@@ -1188,16 +1167,6 @@ extern "C" {
11881167
struct ggml_tensor * a,
11891168
struct ggml_tensor * b);
11901169

1191-
GGML_API struct ggml_tensor * ggml_geglu_erf_split(
1192-
struct ggml_context * ctx,
1193-
struct ggml_tensor * a,
1194-
struct ggml_tensor * b);
1195-
1196-
GGML_API struct ggml_tensor * ggml_geglu_quick_split(
1197-
struct ggml_context * ctx,
1198-
struct ggml_tensor * a,
1199-
struct ggml_tensor * b);
1200-
12011170
// normalize along rows
12021171
GGML_API struct ggml_tensor * ggml_norm(
12031172
struct ggml_context * ctx,
@@ -2011,16 +1980,15 @@ extern "C" {
20111980

20121981
#define GGML_KQ_MASK_PAD 64
20131982

2014-
// q: [n_embd_k, n_batch, n_head, ne3 ]
2015-
// k: [n_embd_k, n_kv, n_head_kv, ne3 ]
2016-
// v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !!
2017-
// mask: [n_kv, n_batch_pad, ne32, ne33] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
2018-
// res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !!
1983+
// q: [n_embd_k, n_batch, n_head, ne3]
1984+
// k: [n_embd_k, n_kv, n_head_kv, ne3]
1985+
// v: [n_embd_v, n_kv, n_head_kv, ne3] !! not transposed !!
1986+
// mask: [n_kv, n_batch_pad, ne32, 1] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
1987+
// res: [n_embd_v, n_head, n_batch, ne3] !! permuted !!
20191988
//
20201989
// broadcast:
20211990
// n_head % n_head_kv == 0
2022-
// n_head % ne32 == 0
2023-
// ne3 % ne33 == 0
1991+
// ne3 % ne32 == 0
20241992
//
20251993
GGML_API struct ggml_tensor * ggml_flash_attn_ext(
20261994
struct ggml_context * ctx,
@@ -2060,8 +2028,7 @@ extern "C" {
20602028
struct ggml_tensor * dt,
20612029
struct ggml_tensor * A,
20622030
struct ggml_tensor * B,
2063-
struct ggml_tensor * C,
2064-
struct ggml_tensor * ids);
2031+
struct ggml_tensor * C);
20652032

20662033
// partition into non-overlapping windows with padding if needed
20672034
// example:

0 commit comments

Comments
 (0)