Skip to content

Commit cb0f488

Browse files
author
Bodhi Hu
committed
fix musa inference on x86
1 parent 2842362 commit cb0f488

File tree

2 files changed

+2
-8
lines changed

2 files changed

+2
-8
lines changed

ggml/src/ggml-cuda/common.cuh

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -234,8 +234,6 @@ static bool new_mma_available(const int cc) {
234234
static constexpr __device__ int ggml_cuda_get_physical_warp_size() {
235235
#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
236236
return __AMDGCN_WAVEFRONT_SIZE;
237-
#elif defined(GGML_USE_MUSA)
238-
return 128;
239237
#else
240238
return 32;
241239
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
@@ -406,11 +404,11 @@ static __device__ __forceinline__ int ggml_cuda_dp4a(const int a, const int b, i
406404

407405
#if __CUDA_ARCH__ >= GGML_CUDA_CC_DP4A || defined(GGML_USE_MUSA)
408406
return __dp4a(a, b, c);
409-
#else // __CUDA_ARCH__ >= GGML_CUDA_CC_DP4A
407+
#else
410408
const int8_t * a8 = (const int8_t *) &a;
411409
const int8_t * b8 = (const int8_t *) &b;
412410
return c + a8[0]*b8[0] + a8[1]*b8[1] + a8[2]*b8[2] + a8[3]*b8[3];
413-
#endif // __CUDA_ARCH__ >= GGML_CUDA_CC_DP4A
411+
#endif // __CUDA_ARCH__ >= GGML_CUDA_CC_DP4A || defined(GGML_USE_MUSA)
414412

415413
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
416414
}

ggml/src/ggml-cuda/mmq.cu

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -137,10 +137,6 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11) {
137137
return true;
138138
}
139139

140-
#if defined(GGML_USE_MUSA)
141-
return true;
142-
#endif // defined(GGML_USE_MUSA)
143-
144140
if (ggml_cuda_highest_compiled_arch(cc) < GGML_CUDA_CC_DP4A) {
145141
return false;
146142
}

0 commit comments

Comments
 (0)