File tree Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Original file line number Diff line number Diff line change 78
78
#define GGML_CUDA_CC_IS_CDNA3 (cc ) (cc >= GGML_CUDA_CC_CDNA3 && cc < GGML_CUDA_CC_RDNA1)
79
79
80
80
// Moore Threads
81
+ #define MUSART_HMASK 40300 // MUSA rc4.3, min. ver. for half2 -> uint mask comparisons
82
+
81
83
#define GGML_CUDA_CC_QY1 (GGML_CUDA_CC_OFFSET_MTHREADS + 0x210 ) // MTT S80, MTT S3000
82
84
#define GGML_CUDA_CC_QY2 (GGML_CUDA_CC_OFFSET_MTHREADS + 0x220 ) // MTT S4000
83
85
#define GGML_CUDA_CC_NG (GGML_CUDA_CC_OFFSET_MTHREADS + 0x310 ) // TBD
@@ -490,13 +492,13 @@ static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
490
492
#endif // !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || defined(GGML_USE_HIP)
491
493
}
492
494
493
- #if CUDART_VERSION < CUDART_HMASK
495
+ #if CUDART_VERSION < CUDART_HMASK || (defined(MUSART_VERSION) && MUSART_VERSION < MUSART_HMASK)
494
496
static __device__ __forceinline__ uint32_t __hgt2_mask (const half2 a, const half2 b) {
495
497
const uint32_t mask_low = 0x0000FFFF * (float ( __low2half (a)) > float ( __low2half (b)));
496
498
const uint32_t mask_high = 0xFFFF0000 * (float (__high2half (a)) > float (__high2half (b)));
497
499
return mask_low | mask_high;
498
500
}
499
- #endif // CUDART_VERSION < CUDART_HMASK
501
+ #endif // CUDART_VERSION < CUDART_HMASK || (defined(MUSART_VERSION) && MUSART_VERSION < MUSART_HMASK)
500
502
501
503
static __device__ __forceinline__ int ggml_cuda_dp4a (const int a, const int b, int c) {
502
504
#if defined(GGML_USE_HIP)
You can’t perform that action at this time.
0 commit comments