1
1
#ifndef _hip_compat_cuh
2
2
#define _hip_compat_cuh
3
3
4
- // Workaround for a bug in hipamd, backported from upstream.
4
+ // Workaround for a bug in hipamd, backported from upstream, this is fixed in ROCm 5.6 .
5
5
__device__ __forceinline__ __half __compat_hrcp (__half x) {
6
6
return __half_raw{
7
7
static_cast <_Float16>(__builtin_amdgcn_rcph (static_cast <__half_raw>(x).data ))};
@@ -15,7 +15,7 @@ __device__ __forceinline__ __half2 __compat_h2rcp(__half2 x) {
15
15
#define hrcp __compat_hrcp
16
16
#define h2rcp __compat_h2rcp
17
17
18
- // Workaround for hipify_python using rocblas instead of hipblas .
18
+ // Automatic conversion of hipblasHgemm doesn't convert half to hipblasHalf .
19
19
__host__ __forceinline__ hipblasStatus_t __compat_hipblasHgemm (hipblasHandle_t handle,
20
20
hipblasOperation_t transA,
21
21
hipblasOperation_t transB,
@@ -37,7 +37,9 @@ __host__ __forceinline__ hipblasStatus_t __compat_hipblasHgemm(hipblasHandle_t
37
37
reinterpret_cast <const hipblasHalf *>(beta),
38
38
reinterpret_cast <hipblasHalf *>(CP), ldc);
39
39
}
40
+ #define hipblasHgemm __compat_hipblasHgemm
40
41
42
+ // Previous version of PyTorch were converting to rocBLAS instead of hipBLAS.
41
43
#define rocblas_handle hipblasHandle_t
42
44
#define rocblas_operation_none HIPBLAS_OP_N
43
45
#define rocblas_get_stream hipblasGetStream
0 commit comments