@@ -31,7 +31,7 @@ static inline int nearest_int(float fval) {
3131 return (i & 0x007fffff ) - 0x00400000 ;
3232}
3333
34- // Helper template functions for `fp16` and `fp32`.
34+ // Helper functions for `fp16` and `fp32`.
3535
3636template <int nrows_interleaved, int interleave_size>
3737static inline void ggml_repack_mat_f16_NxK_generic (const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
@@ -333,7 +333,7 @@ static inline void ggml_gemv_f16_KxM_f16_generic(int n, float * GGML_RESTRICT s,
333333 for (int l = 0 ; l < nb; l++) {
334334 for (int j = 0 ; j < ncols_interleaved; j++) {
335335 for (int k = 0 ; k < interleave_size; k++) {
336- sumf[j] += GGML_FP16_TO_FP32 (b_ptr[l].d [j * interleave_size + k]) * GGML_FP16_TO_FP32 (a_ptr[l + k]);
336+ sumf[j] += GGML_FP16_TO_FP32 (b_ptr[l].d [j * interleave_size + k]) * GGML_FP16_TO_FP32 (a_ptr[l * interleave_size + k]);
337337 }
338338 }
339339 }
@@ -363,7 +363,7 @@ static inline void ggml_gemv_f32_KxM_f32_generic(int n, float * GGML_RESTRICT s,
363363 for (int l = 0 ; l < nb; l++) {
364364 for (int j = 0 ; j < ncols_interleaved; j++) {
365365 for (int k = 0 ; k < interleave_size; k++) {
366- sumf[j] += b_ptr[l].d [j * interleave_size + k] * a_ptr[l + k];
366+ sumf[j] += b_ptr[l].d [j * interleave_size + k] * a_ptr[l * interleave_size + k];
367367 }
368368 }
369369 }
@@ -375,7 +375,7 @@ template<int nrows, int interleave_size, int ncols_interleaved>
375375static inline void ggml_gemm_f16_NxKxM_f16_generic (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
376376 const int nb = n / interleave_size;
377377
378- assert (nr % nrows == 0 );
378+ assert (nr % nrows == 0 );
379379 assert (n % interleave_size == 0 );
380380 assert (nc % ncols_interleaved == 0 );
381381
@@ -395,7 +395,7 @@ static inline void ggml_gemm_f16_NxKxM_f16_generic(int n, float * GGML_RESTRICT
395395 for (int m = 0 ; m < nrows; m++) {
396396 for (int j = 0 ; j < ncols_interleaved; j++) {
397397 for (int k = 0 ; k < interleave_size; k++) {
398- sumf[m][j] += b_ptr[l].d [j * interleave_size + k] * a_ptr[l].d [m * interleave_size + k];
398+ sumf[m][j] += GGML_FP16_TO_FP32 ( b_ptr[l].d [j * interleave_size + k]) * GGML_FP16_TO_FP32 ( a_ptr[l].d [m * interleave_size + k]) ;
399399 }
400400 }
401401 }
@@ -412,7 +412,7 @@ template<int nrows, int interleave_size, int ncols_interleaved>
412412static inline void ggml_gemm_f32_NxKxM_f32_generic (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
413413 const int nb = n / interleave_size;
414414
415- assert (nr % nrows == 0 );
415+ assert (nr % nrows == 0 );
416416 assert (n % interleave_size == 0 );
417417 assert (nc % ncols_interleaved == 0 );
418418
@@ -1913,34 +1913,34 @@ static int repack_iq4_nl_to_iq4_nl_8_bl(struct ggml_tensor * t, int interleave_b
19131913 GGML_UNUSED (data_size);
19141914}
19151915
1916- template <int nrows_interleaved , int interleave_size>
1917- static int repack_f16_to_f16_N_bl (struct ggml_tensor * t, const void * GGML_RESTRICT data, size_t data_size) {
1916+ template <int ncols_interleaved , int interleave_size>
1917+ static int repack_f16_to_f16_MxK_bl (struct ggml_tensor * t, const void * GGML_RESTRICT data, size_t data_size) {
19181918 GGML_ASSERT (t->type == GGML_TYPE_F16 );
19191919
19201920 const ggml_half * src = (const ggml_half *)data;
1921- block_f16<nrows_interleaved , interleave_size> * dst = ( block_f16<nrows_interleaved , interleave_size> *)t->data ;
1921+ block_f16<ncols_interleaved , interleave_size> * dst = ( block_f16<ncols_interleaved , interleave_size> *)t->data ;
19221922
1923- ggml_half dst_tmp[nrows_interleaved * interleave_size];
1923+ ggml_half dst_tmp[ncols_interleaved * interleave_size];
19241924
19251925 int nrow = ggml_nrows (t);
19261926 int row_size = t->ne [0 ];
19271927 int nblocks = row_size / interleave_size;
19281928
19291929 GGML_ASSERT (data_size == nrow * nblocks * interleave_size * sizeof (ggml_half));
19301930
1931- if (t->ne [1 ] % nrows_interleaved != 0 || t->ne [0 ] % interleave_size != 0 ) {
1931+ if (t->ne [1 ] % ncols_interleaved != 0 || t->ne [0 ] % interleave_size != 0 ) {
19321932 return -1 ;
19331933 }
19341934
1935- for (int b = 0 ; b < nrow; b += nrows_interleaved ) {
1935+ for (int b = 0 ; b < nrow; b += ncols_interleaved ) {
19361936 for (int i = 0 ; i < nblocks; i++) {
1937- for (int j = 0 ; j < nrows_interleaved ; j++) {
1937+ for (int j = 0 ; j < ncols_interleaved ; j++) {
19381938 for (int k = 0 ; k < interleave_size; k++) {
19391939 dst_tmp[j * interleave_size + k] = src[(j + b) * row_size + i * interleave_size + k];
19401940 }
19411941 }
1942- block_f16<nrows_interleaved , interleave_size> out;
1943- memcpy (&out.d , dst_tmp, sizeof (ggml_half) * nrows_interleaved * interleave_size);
1942+ block_f16<ncols_interleaved , interleave_size> out;
1943+ memcpy (&out.d , dst_tmp, sizeof (ggml_half) * ncols_interleaved * interleave_size);
19441944 *dst = out;
19451945 dst++;
19461946 }
@@ -1949,34 +1949,34 @@ static int repack_f16_to_f16_N_bl(struct ggml_tensor * t, const void * GGML_REST
19491949 return 0 ;
19501950}
19511951
1952- template <int nrows_interleaved , int interleave_size>
1953- static int repack_f32_to_f32_N_bl (struct ggml_tensor * t, const void * GGML_RESTRICT data, size_t data_size) {
1952+ template <int ncols_interleaved , int interleave_size>
1953+ static int repack_f32_to_f32_MxK_bl (struct ggml_tensor * t, const void * GGML_RESTRICT data, size_t data_size) {
19541954 GGML_ASSERT (t->type == GGML_TYPE_F32 );
19551955
19561956 const float * src = (const float *)data;
1957- block_f32<nrows_interleaved , interleave_size> * dst = ( block_f32<nrows_interleaved , interleave_size> *)t->data ;
1957+ block_f32<ncols_interleaved , interleave_size> * dst = ( block_f32<ncols_interleaved , interleave_size> *)t->data ;
19581958
1959- float dst_tmp[nrows_interleaved * interleave_size];
1959+ float dst_tmp[ncols_interleaved * interleave_size];
19601960
19611961 int nrow = ggml_nrows (t);
19621962 int row_size = t->ne [0 ];
19631963 int nblocks = row_size / interleave_size;
19641964
19651965 GGML_ASSERT (data_size == nrow * nblocks * interleave_size * sizeof (float ));
19661966
1967- if (t->ne [1 ] % nrows_interleaved != 0 || t->ne [0 ] % interleave_size != 0 ) {
1967+ if (t->ne [1 ] % ncols_interleaved != 0 || t->ne [0 ] % interleave_size != 0 ) {
19681968 return -1 ;
19691969 }
19701970
1971- for (int b = 0 ; b < nrow; b += nrows_interleaved ) {
1971+ for (int b = 0 ; b < nrow; b += ncols_interleaved ) {
19721972 for (int i = 0 ; i < nblocks; i++) {
1973- for (int j = 0 ; j < nrows_interleaved ; j++) {
1973+ for (int j = 0 ; j < ncols_interleaved ; j++) {
19741974 for (int k = 0 ; k < interleave_size; k++) {
19751975 dst_tmp[j * interleave_size + k] = src[(j + b) * row_size + i * interleave_size + k];
19761976 }
19771977 }
1978- block_f32<nrows_interleaved , interleave_size> out;
1979- memcpy (&out.d , dst_tmp, sizeof (float ) * nrows_interleaved * interleave_size);
1978+ block_f32<ncols_interleaved , interleave_size> out;
1979+ memcpy (&out.d , dst_tmp, sizeof (float ) * ncols_interleaved * interleave_size);
19801980 *dst = out;
19811981 dst++;
19821982 }
@@ -2029,29 +2029,29 @@ template <> int repack<block_iq4_nl, 8, 8>(struct ggml_tensor * t, const void *
20292029}
20302030
20312031template <> int repack<ggml_half, 1 , 16 >(struct ggml_tensor * t, const void * data, size_t data_size) {
2032- return repack_f16_to_f16_N_bl <16 , 1 >(t, data, data_size);
2032+ return repack_f16_to_f16_MxK_bl <16 , 1 >(t, data, data_size);
20332033}
20342034template <> int repack<ggml_half, 1 , 32 >(struct ggml_tensor * t, const void * data, size_t data_size) {
2035- return repack_f16_to_f16_N_bl <32 , 1 >(t, data, data_size);
2035+ return repack_f16_to_f16_MxK_bl <32 , 1 >(t, data, data_size);
20362036}
20372037template <> int repack<ggml_half, 1 , 64 >(struct ggml_tensor * t, const void * data, size_t data_size) {
2038- return repack_f16_to_f16_N_bl <64 , 1 >(t, data, data_size);
2038+ return repack_f16_to_f16_MxK_bl <64 , 1 >(t, data, data_size);
20392039}
20402040template <> int repack<ggml_half, 1 , 128 >(struct ggml_tensor * t, const void * data, size_t data_size) {
2041- return repack_f16_to_f16_N_bl <128 , 1 >(t, data, data_size);
2041+ return repack_f16_to_f16_MxK_bl <128 , 1 >(t, data, data_size);
20422042}
20432043
20442044template <> int repack<float , 1 , 16 >(struct ggml_tensor * t, const void * data, size_t data_size) {
2045- return repack_f32_to_f32_N_bl <16 , 1 >(t, data, data_size);
2045+ return repack_f32_to_f32_MxK_bl <16 , 1 >(t, data, data_size);
20462046}
20472047template <> int repack<float , 1 , 32 >(struct ggml_tensor * t, const void * data, size_t data_size) {
2048- return repack_f32_to_f32_N_bl <32 , 1 >(t, data, data_size);
2048+ return repack_f32_to_f32_MxK_bl <32 , 1 >(t, data, data_size);
20492049}
20502050template <> int repack<float , 1 , 64 >(struct ggml_tensor * t, const void * data, size_t data_size) {
2051- return repack_f32_to_f32_N_bl <64 , 1 >(t, data, data_size);
2051+ return repack_f32_to_f32_MxK_bl <64 , 1 >(t, data, data_size);
20522052}
20532053template <> int repack<float , 1 , 128 >(struct ggml_tensor * t, const void * data, size_t data_size) {
2054- return repack_f32_to_f32_N_bl <128 , 1 >(t, data, data_size);
2054+ return repack_f32_to_f32_MxK_bl <128 , 1 >(t, data, data_size);
20552055}
20562056
20572057// gemv
@@ -2277,7 +2277,7 @@ template <typename BLOC_TYPE, int64_t NB_ROWS, int64_t INTER_SIZE, int64_t NB_CO
22772277
22782278 GGML_ASSERT (src1_ptr + src1_col_stride * nrows <= (const char *) params->wdata + params->wsize );
22792279
2280- // If there are more than three rows in src1, use gemm; otherwise, use gemv.
2280+ // If there are more than `NB_ROWS` rows in src1, use gemm; otherwise, use gemv.
22812281 if (nrows > (NB_ROWS - 1 )) {
22822282 gemm<BLOC_TYPE , NB_ROWS , INTER_SIZE , NB_COLS , PARAM_TYPE >(ne00, (float *) (dst_ptr) + src0_start, nb1 / nb0,
22832283 src0_ptr + src0_start * nb01, src1_ptr,
0 commit comments