File tree Expand file tree Collapse file tree 1 file changed +5
-2
lines changed Expand file tree Collapse file tree 1 file changed +5
-2
lines changed Original file line number Diff line number Diff line change 78
78
#define GGML_CUDA_CC_IS_CDNA3 (cc ) (cc >= GGML_CUDA_CC_CDNA3 && cc < GGML_CUDA_CC_RDNA1)
79
79
80
80
// Moore Threads
81
+ #define MUSART_HMASK 40300 // MUSA rc4.3, min. ver. for half2 -> uint mask comparisons
82
+
81
83
#define GGML_CUDA_CC_QY1 (GGML_CUDA_CC_OFFSET_MTHREADS + 0x210 ) // MTT S80, MTT S3000
82
84
#define GGML_CUDA_CC_QY2 (GGML_CUDA_CC_OFFSET_MTHREADS + 0x220 ) // MTT S4000
83
85
#define GGML_CUDA_CC_NG (GGML_CUDA_CC_OFFSET_MTHREADS + 0x310 ) // TBD
@@ -490,13 +492,14 @@ static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
490
492
#endif // !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || defined(GGML_USE_HIP)
491
493
}
492
494
493
- #if CUDART_VERSION < CUDART_HMASK
495
+ #if (defined(CUDART_VERSION) && CUDART_VERSION < CUDART_HMASK) || defined(GGML_USE_HIP) || \
496
+ (defined(MUSART_VERSION) && MUSART_VERSION < MUSART_HMASK)
494
497
static __device__ __forceinline__ uint32_t __hgt2_mask (const half2 a, const half2 b) {
495
498
const uint32_t mask_low = 0x0000FFFF * (float ( __low2half (a)) > float ( __low2half (b)));
496
499
const uint32_t mask_high = 0xFFFF0000 * (float (__high2half (a)) > float (__high2half (b)));
497
500
return mask_low | mask_high;
498
501
}
499
- #endif // CUDART_VERSION < CUDART_HMASK
502
+ #endif // (defined( CUDART_VERSION) && CUDART_VERSION < CUDART_HMASK) || defined(GGML_USE_HIP) || (defined(MUSART_VERSION) && MUSART_VERSION < MUSART_HMASK)
500
503
501
504
static __device__ __forceinline__ int ggml_cuda_dp4a (const int a, const int b, int c) {
502
505
#if defined(GGML_USE_HIP)
You can’t perform that action at this time.
0 commit comments