@@ -557,8 +557,6 @@ extern "C" {
557
557
GGML_GLU_OP_REGLU ,
558
558
GGML_GLU_OP_GEGLU ,
559
559
GGML_GLU_OP_SWIGLU ,
560
- GGML_GLU_OP_GEGLU_ERF ,
561
- GGML_GLU_OP_GEGLU_QUICK ,
562
560
563
561
GGML_GLU_OP_COUNT ,
564
562
};
@@ -648,9 +646,6 @@ extern "C" {
648
646
649
647
// misc
650
648
651
- GGML_API const char * ggml_version (void );
652
- GGML_API const char * ggml_commit (void );
653
-
654
649
GGML_API void ggml_time_init (void ); // call this once at the beginning of the program
655
650
GGML_API int64_t ggml_time_ms (void );
656
651
GGML_API int64_t ggml_time_us (void );
@@ -1149,22 +1144,6 @@ extern "C" {
1149
1144
struct ggml_context * ctx ,
1150
1145
struct ggml_tensor * a );
1151
1146
1152
- GGML_API struct ggml_tensor * ggml_geglu_erf (
1153
- struct ggml_context * ctx ,
1154
- struct ggml_tensor * a );
1155
-
1156
- GGML_API struct ggml_tensor * ggml_geglu_erf_swapped (
1157
- struct ggml_context * ctx ,
1158
- struct ggml_tensor * a );
1159
-
1160
- GGML_API struct ggml_tensor * ggml_geglu_quick (
1161
- struct ggml_context * ctx ,
1162
- struct ggml_tensor * a );
1163
-
1164
- GGML_API struct ggml_tensor * ggml_geglu_quick_swapped (
1165
- struct ggml_context * ctx ,
1166
- struct ggml_tensor * a );
1167
-
1168
1147
// A: n columns, r rows,
1169
1148
// B: n columns, r rows,
1170
1149
GGML_API struct ggml_tensor * ggml_glu_split (
@@ -1188,16 +1167,6 @@ extern "C" {
1188
1167
struct ggml_tensor * a ,
1189
1168
struct ggml_tensor * b );
1190
1169
1191
- GGML_API struct ggml_tensor * ggml_geglu_erf_split (
1192
- struct ggml_context * ctx ,
1193
- struct ggml_tensor * a ,
1194
- struct ggml_tensor * b );
1195
-
1196
- GGML_API struct ggml_tensor * ggml_geglu_quick_split (
1197
- struct ggml_context * ctx ,
1198
- struct ggml_tensor * a ,
1199
- struct ggml_tensor * b );
1200
-
1201
1170
// normalize along rows
1202
1171
GGML_API struct ggml_tensor * ggml_norm (
1203
1172
struct ggml_context * ctx ,
@@ -2011,16 +1980,15 @@ extern "C" {
2011
1980
2012
1981
#define GGML_KQ_MASK_PAD 64
2013
1982
2014
- // q: [n_embd_k, n_batch, n_head, ne3 ]
2015
- // k: [n_embd_k, n_kv, n_head_kv, ne3 ]
2016
- // v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !!
2017
- // mask: [n_kv, n_batch_pad, ne32, ne33 ] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
2018
- // res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !!
1983
+ // q: [n_embd_k, n_batch, n_head, ne3]
1984
+ // k: [n_embd_k, n_kv, n_head_kv, ne3]
1985
+ // v: [n_embd_v, n_kv, n_head_kv, ne3] !! not transposed !!
1986
+ // mask: [n_kv, n_batch_pad, ne32, 1 ] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
1987
+ // res: [n_embd_v, n_head, n_batch, ne3] !! permuted !!
2019
1988
//
2020
1989
// broadcast:
2021
1990
// n_head % n_head_kv == 0
2022
- // n_head % ne32 == 0
2023
- // ne3 % ne33 == 0
1991
+ // ne3 % ne32 == 0
2024
1992
//
2025
1993
GGML_API struct ggml_tensor * ggml_flash_attn_ext (
2026
1994
struct ggml_context * ctx ,
@@ -2060,8 +2028,7 @@ extern "C" {
2060
2028
struct ggml_tensor * dt ,
2061
2029
struct ggml_tensor * A ,
2062
2030
struct ggml_tensor * B ,
2063
- struct ggml_tensor * C ,
2064
- struct ggml_tensor * ids );
2031
+ struct ggml_tensor * C );
2065
2032
2066
2033
// partition into non-overlapping windows with padding if needed
2067
2034
// example:
0 commit comments