Skip to content

Commit 68ca77a

Browse files
committed
chore : Fix the error when compiling rocm build on windows using cmake
(#9666) Fix the compilation error "call to undeclared function '_mm256_dpbusd_epi32'". The function _mm256_dpbusd_epi32 is defined in avxintrin.h, while _mm256_dpbusd_epi32 is defined in avx512vlvnniintrin.h. Therefore, __AVX__, __AVX512VNNI__, and __AVX512VL__ need to be defined. According to (#7743), DGGML_OPENMP=OFF is needed to add, so adding it in doc.
1 parent 1842922 commit 68ca77a

File tree

4 files changed

+4
-4
lines changed

4 files changed

+4
-4
lines changed

docs/build.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ You can download it from your Linux distro's package manager or from here: [ROCm
259259
- Using `CMake` for Windows (using x64 Native Tools Command Prompt for VS, and assuming a gfx1100-compatible AMD GPU):
260260
```bash
261261
set PATH=%HIP_PATH%\bin;%PATH%
262-
cmake -S . -B build -G Ninja -DAMDGPU_TARGETS=gfx1100 -DGGML_HIP=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release
262+
cmake -S . -B build -G Ninja -DGGML_OPENMP=OFF -DAMDGPU_TARGETS=gfx1100 -DGGML_HIP=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release
263263
cmake --build build
264264
```
265265
Make sure that `AMDGPU_TARGETS` is set to the GPU arch you want to compile for. The above example uses `gfx1100` that corresponds to Radeon RX 7900XTX/XT/GRE. You can find a list of targets [here](https://llvm.org/docs/AMDGPUUsage.html#processors)

ggml/src/ggml-cpu/ggml-cpu-aarch64.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ static inline __m256i sum_i16_pairs_int32x8(const __m256i x) {
161161
}
162162

163163
static inline __m256i mul_sum_us8_pairs_int32x8(const __m256i ax, const __m256i sy) {
164-
#if defined(__AVXVNNI__) || (defined(__AVX512VNNI__) && defined(__AVX512VL__))
164+
#if defined(__AVX__) && defined(__AVX512VNNI__) && defined(__AVX512VL__)
165165
const __m256i zero = _mm256_setzero_si256();
166166
return _mm256_dpbusd_epi32(zero, ax, sy);
167167
#else

ggml/src/ggml-cpu/ggml-cpu-quants.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ static inline __m256 sum_i16_pairs_float(const __m256i x) {
103103
}
104104

105105
static inline __m256 mul_sum_us8_pairs_float(const __m256i ax, const __m256i sy) {
106-
#if defined(__AVXVNNI__) || (defined(__AVX512VNNI__) && defined(__AVX512VL__))
106+
#if defined(__AVX__) && defined(__AVX512VNNI__) && defined(__AVX512VL__)
107107
const __m256i zero = _mm256_setzero_si256();
108108
const __m256i summed_pairs = _mm256_dpbusd_epi32(zero, ax, sy);
109109
return _mm256_cvtepi32_ps(summed_pairs);

ggml/src/ggml-cpu/llamafile/sgemm.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -992,7 +992,7 @@ class tinyBLAS_Q0_AVX {
992992

993993
inline __m256 updot(__m256i u, __m256i s) {
994994
__m256i res;
995-
#if defined(__AVXVNNI__) || (defined(__AVX512VNNI__) && defined(__AVX512VL__))
995+
#if defined(__AVX__) && defined(__AVX512VNNI__) && defined(__AVX512VL__)
996996
res = _mm256_dpbusd_epi32(_mm256_setzero_si256(), u, s);
997997
#else
998998
res = _mm256_madd_epi16(_mm256_set1_epi16(1), _mm256_maddubs_epi16(u, s));

0 commit comments

Comments
 (0)