Skip to content

Commit e9da620

Browse files
committed
Merge remote-tracking branch 'origin/master'
2 parents 0dacadb + 3dff8fe commit e9da620

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

exllama_ext/hip_compat.cuh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#ifndef _hip_compat_cuh
22
#define _hip_compat_cuh
33

4-
// Workaround for a bug in hipamd, backported from upstream.
4+
// Workaround for a bug in hipamd, backported from upstream, this is fixed in ROCm 5.6.
55
__device__ __forceinline__ __half __compat_hrcp(__half x) {
66
return __half_raw{
77
static_cast<_Float16>(__builtin_amdgcn_rcph(static_cast<__half_raw>(x).data))};
@@ -15,7 +15,7 @@ __device__ __forceinline__ __half2 __compat_h2rcp(__half2 x) {
1515
#define hrcp __compat_hrcp
1616
#define h2rcp __compat_h2rcp
1717

18-
// Workaround for hipify_python using rocblas instead of hipblas.
18+
// Automatic conversion of hipblasHgemm doesn't convert half to hipblasHalf.
1919
__host__ __forceinline__ hipblasStatus_t __compat_hipblasHgemm(hipblasHandle_t handle,
2020
hipblasOperation_t transA,
2121
hipblasOperation_t transB,
@@ -37,7 +37,9 @@ __host__ __forceinline__ hipblasStatus_t __compat_hipblasHgemm(hipblasHandle_t
3737
reinterpret_cast<const hipblasHalf *>(beta),
3838
reinterpret_cast<hipblasHalf *>(CP), ldc);
3939
}
40+
#define hipblasHgemm __compat_hipblasHgemm
4041

42+
// Previous version of PyTorch were converting to rocBLAS instead of hipBLAS.
4143
#define rocblas_handle hipblasHandle_t
4244
#define rocblas_operation_none HIPBLAS_OP_N
4345
#define rocblas_get_stream hipblasGetStream

0 commit comments

Comments
 (0)