Skip to content

Commit f995cdc

Browse files
committed
musa: handle __hgt2_mask (available starting from MUSA SDK rc4.3.0)
Signed-off-by: Xiaodong Ye <[email protected]>
1 parent f08c4c0 commit f995cdc

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

ggml/src/ggml-cuda/common.cuh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@
7878
#define GGML_CUDA_CC_IS_CDNA3(cc) (cc >= GGML_CUDA_CC_CDNA3 && cc < GGML_CUDA_CC_RDNA1)
7979

8080
// Moore Threads
81+
#define MUSART_HMASK 40300 // MUSA rc4.3, min. ver. for half2 -> uint mask comparisons
82+
8183
#define GGML_CUDA_CC_QY1 (GGML_CUDA_CC_OFFSET_MTHREADS + 0x210) // MTT S80, MTT S3000
8284
#define GGML_CUDA_CC_QY2 (GGML_CUDA_CC_OFFSET_MTHREADS + 0x220) // MTT S4000
8385
#define GGML_CUDA_CC_NG (GGML_CUDA_CC_OFFSET_MTHREADS + 0x310) // TBD
@@ -490,13 +492,13 @@ static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
490492
#endif // !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || defined(GGML_USE_HIP)
491493
}
492494

493-
#if CUDART_VERSION < CUDART_HMASK
495+
#if CUDART_VERSION < CUDART_HMASK || (defined(MUSART_VERSION) && MUSART_VERSION < MUSART_HMASK)
494496
static __device__ __forceinline__ uint32_t __hgt2_mask(const half2 a, const half2 b) {
495497
const uint32_t mask_low = 0x0000FFFF * (float( __low2half(a)) > float( __low2half(b)));
496498
const uint32_t mask_high = 0xFFFF0000 * (float(__high2half(a)) > float(__high2half(b)));
497499
return mask_low | mask_high;
498500
}
499-
#endif // CUDART_VERSION < CUDART_HMASK
501+
#endif // CUDART_VERSION < CUDART_HMASK || (defined(MUSART_VERSION) && MUSART_VERSION < MUSART_HMASK)
500502

501503
static __device__ __forceinline__ int ggml_cuda_dp4a(const int a, const int b, int c) {
502504
#if defined(GGML_USE_HIP)

0 commit comments

Comments
 (0)