fix musa inference on x86

Bodhi Hu · Bodhi Hu · commit cb0f48878db2 · 2025-02-18T10:40:23.000-05:00
diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh
@@ -234,8 +234,6 @@ static bool new_mma_available(const int cc) {
 static constexpr __device__ int ggml_cuda_get_physical_warp_size() {
 #if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
     return __AMDGCN_WAVEFRONT_SIZE;
-#elif defined(GGML_USE_MUSA)
-    return 128;
 #else
     return 32;
 #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
@@ -406,11 +404,11 @@ static __device__ __forceinline__ int ggml_cuda_dp4a(const int a, const int b, i
 
 #if __CUDA_ARCH__ >= GGML_CUDA_CC_DP4A || defined(GGML_USE_MUSA)
     return __dp4a(a, b, c);
-#else // __CUDA_ARCH__ >= GGML_CUDA_CC_DP4A
+#else
     const int8_t * a8 = (const int8_t *) &a;
     const int8_t * b8 = (const int8_t *) &b;
     return c + a8[0]*b8[0] + a8[1]*b8[1] + a8[2]*b8[2] + a8[3]*b8[3];
-#endif // __CUDA_ARCH__ >= GGML_CUDA_CC_DP4A
+#endif // __CUDA_ARCH__ >= GGML_CUDA_CC_DP4A || defined(GGML_USE_MUSA)
 
 #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
 }
diff --git a/ggml/src/ggml-cuda/mmq.cu b/ggml/src/ggml-cuda/mmq.cu
@@ -137,10 +137,6 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11) {
         return true;
     }
 
-#if defined(GGML_USE_MUSA)
-    return true;
-#endif // defined(GGML_USE_MUSA)
-
     if (ggml_cuda_highest_compiled_arch(cc) < GGML_CUDA_CC_DP4A) {
         return false;
     }

Original file line number	Diff line number	Diff line change
`@@ -137,10 +137,6 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11) {`
`137`	`137`	`return true;`
`138`	`138`	`}`
`139`	`139`
`140`		`-#if defined(GGML_USE_MUSA)`
`141`		`- return true;`
`142`		`-#endif // defined(GGML_USE_MUSA)`
`143`		`-`
`144`	`140`	`if (ggml_cuda_highest_compiled_arch(cc) < GGML_CUDA_CC_DP4A) {`
`145`	`141`	`return false;`
`146`	`142`	`}`