Skip to content

Commit f521cd2

Browse files
committed
[rocmlibs] Add support for gfx1150
Added support for gfx1150 along with minor modifications in clr.
1 parent a478317 commit f521cd2

11 files changed

+96
-85
lines changed

bin/patches/clr.patch

+16
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,19 @@ index 72f406210..6004cf325 100644
1111
PATHS
1212
/opt/rocm/
1313
${ROCM_INSTALL_PATH}
14+
diff --git a/rocclr/device/rocm/rocdevice.cpp b/rocclr/device/rocm/rocdevice.cpp
15+
index 751161a58..3271157b7 100644
16+
--- a/rocclr/device/rocm/rocdevice.cpp
17+
+++ b/rocclr/device/rocm/rocdevice.cpp
18+
@@ -679,11 +679,6 @@ bool Device::create() {
19+
pciDeviceId_);
20+
return false;
21+
}
22+
- if (agent_isas.count != 1) {
23+
- LogPrintfError("HSA device %s (PCI ID %x) has %u ISAs but can only support a single ISA",
24+
- agent_name, pciDeviceId_, agent_isas.count);
25+
- return false;
26+
- }
27+
28+
uint32_t isa_name_length = 0;
29+
if (HSA_STATUS_SUCCESS !=

bin/patches/patch-control-file_20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ rocprofiler: rocprofiler-combined-no-aql-ok-fix-cov6.patch
1111
babelstream: babelstream-usm.patch
1212
llvm-project: ATD_ASO_full.patch
1313
UMT: umt.patch
14+
clr : clr.patch

bin/patches/patch-control-file_21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ rocprofiler: rocprofiler-combined-no-aql-ok-fix-cov6.patch
1111
babelstream: babelstream-usm.patch
1212
llvm-project: ATD_ASO_full.patch
1313
UMT: umt.patch
14+
clr : clr.patch

bin/rocmlibs/build_powerinfer.sh

+3
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ if [ "$1" == "install" ] ; then
118118
pushd $_repo_dir
119119
cd gguf-py
120120
echo "Installing gguf python package"
121+
python3 -m venv $AOMP_INSTALL_DIR/../venv
122+
source $AOMP_INSTALL_DIR/../venv/bin/activate
121123
pip install .
122124
if [ $? != 0 ] ; then
123125
echo "ERROR pip install failed for PowerInfer/gguf-py package"
@@ -130,6 +132,7 @@ if [ "$1" == "install" ] ; then
130132
echo "ERROR pip install failed for PowerInfer/powerinfer-py package"
131133
exit 1
132134
fi
135+
deactivate
133136
popd
134137
removepatch $_repo_dir
135138
else

bin/rocmlibs/patches/patch-control-file_20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
Tensile: tensile_aca95d17.patch
2+
clr : clr.patch
23
rocBLAS: rocblas.patch
34
rocPRIM: rocprim.patch
45
rocSPARSE: rocsparse.patch

bin/rocmlibs/patches/patch-control-file_21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
Tensile: tensile_aca95d17.patch
2+
clr : clr.patch
23
rocBLAS: rocblas.patch
34
rocPRIM: rocprim.patch
45
rocSPARSE: rocsparse.patch

bin/rocmlibs/patches/powerinfer.patch

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ index 4cf28d5..2cf69a1 100644
77
# in github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
88
# and select the line that matches the current nixpkgs version of rocBLAS.
99
- "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
10-
+ "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx90c;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1103"
10+
+ "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx90c;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150"
1111
];
1212
};
1313
apps.llama-server = {

bin/rocmlibs/patches/rocblas.patch

+11-6
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
diff --git a/CMakeLists.txt b/CMakeLists.txt
2-
index f70de1cf..5eedfcc3 100644
2+
index 2cb3b303..ad442656 100644
33
--- a/CMakeLists.txt
44
+++ b/CMakeLists.txt
55
@@ -112,7 +112,7 @@ if (NOT BUILD_ADDRESS_SANITIZER)
66
set( TARGET_LIST_ROCM_5.6 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102")
77
set( TARGET_LIST_ROCM_5.7 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102")
88
set( TARGET_LIST_ROCM_6.0 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102")
99
- set( TARGET_LIST_ROCM_6.3 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201")
10-
+ set( TARGET_LIST_ROCM_6.3 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201;gfx1103;gfx90c")
10+
+ set( TARGET_LIST_ROCM_6.3 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1150;gfx1151;gfx1200;gfx1201;gfx1103;gfx90c")
1111
else()
1212
set( TARGET_LIST_ROCM_5.6 "gfx908:xnack+;gfx90a:xnack+")
1313
set( TARGET_LIST_ROCM_5.7 "gfx908:xnack+;gfx90a:xnack+;gfx942:xnack+")
1414
diff --git a/library/src/handle.cpp b/library/src/handle.cpp
15-
index ea1fa1a7..cb14874c 100644
15+
index ea1fa1a7..7d2a05c3 100644
1616
--- a/library/src/handle.cpp
1717
+++ b/library/src/handle.cpp
1818
@@ -106,6 +106,10 @@ static Processor getActiveArch(int deviceId)
@@ -26,19 +26,23 @@ index ea1fa1a7..cb14874c 100644
2626
else if(deviceString.find("gfx940") != std::string::npos)
2727
{
2828
return Processor::gfx940;
29-
@@ -146,6 +150,10 @@ static Processor getActiveArch(int deviceId)
29+
@@ -146,6 +150,14 @@ static Processor getActiveArch(int deviceId)
3030
{
3131
return Processor::gfx1102;
3232
}
3333
+ else if(deviceString.find("gfx1103") != std::string::npos)
3434
+ {
3535
+ return Processor::gfx1103;
36+
+ }
37+
+ else if(deviceString.find("gfx1150") != std::string::npos)
38+
+ {
39+
+ return Processor::gfx1150;
3640
+ }
3741
else if(deviceString.find("gfx1151") != std::string::npos)
3842
{
3943
return Processor::gfx1151;
4044
diff --git a/library/src/include/handle.hpp b/library/src/include/handle.hpp
41-
index 94d18c7b..c47cefe8 100644
45+
index 70844136..6532c78e 100644
4246
--- a/library/src/include/handle.hpp
4347
+++ b/library/src/include/handle.hpp
4448
@@ -82,6 +82,7 @@ enum class Processor : int
@@ -49,11 +53,12 @@ index 94d18c7b..c47cefe8 100644
4953
gfx940 = 940,
5054
gfx941 = 941,
5155
gfx942 = 942,
52-
@@ -96,6 +97,7 @@ enum class Processor : int
56+
@@ -96,6 +97,8 @@ enum class Processor : int
5357
gfx1100 = 1100,
5458
gfx1101 = 1101,
5559
gfx1102 = 1102,
5660
+ gfx1103 = 1103,
61+
+ gfx1150 = 1150,
5762
gfx1151 = 1151,
5863
gfx1200 = 1200,
5964
gfx1201 = 1201

bin/rocmlibs/patches/rocprim.patch

+2-67
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,13 @@
11
diff --git a/CMakeLists.txt b/CMakeLists.txt
2-
index a5b9b127..0153b24d 100644
2+
index 462a5928..6caba409 100644
33
--- a/CMakeLists.txt
44
+++ b/CMakeLists.txt
55
@@ -99,7 +99,7 @@ if(NOT USE_HIP_CPU)
66
)
77
else()
88
rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS
99
- TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201"
10-
+ TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx90c;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1103;gfx1151;gfx1200;gfx1201"
10+
+ TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1150;gfx1151;gfx1200;gfx1201"
1111
)
1212
endif()
1313

14-
diff --git a/rocprim/include/rocprim/device/config_types.hpp b/rocprim/include/rocprim/device/config_types.hpp
15-
index 58729b1d..22f7272a 100644
16-
--- a/rocprim/include/rocprim/device/config_types.hpp
17-
+++ b/rocprim/include/rocprim/device/config_types.hpp
18-
@@ -169,9 +169,11 @@ enum class target_arch : unsigned int
19-
gfx906 = 906,
20-
gfx908 = 908,
21-
gfx90a = 910,
22-
+ gfx90c = 912,
23-
gfx1030 = 1030,
24-
gfx1100 = 1100,
25-
gfx1102 = 1102,
26-
+ gfx1103 = 1103,
27-
unknown = std::numeric_limits<unsigned int>::max(),
28-
};
29-
#endif // DOXYGEN_SHOULD_SKIP_THIS
30-
@@ -204,16 +206,18 @@ constexpr bool prefix_equals(const char* lhs, const char* rhs, std::size_t n)
31-
constexpr target_arch get_target_arch_from_name(const char* const arch_name, const std::size_t n)
32-
{
33-
constexpr const char* target_names[]
34-
- = {"gfx803", "gfx900", "gfx906", "gfx908", "gfx90a", "gfx1030", "gfx1100", "gfx1102"};
35-
+ = {"gfx803", "gfx900", "gfx906", "gfx908", "gfx90a", "gfx90c", "gfx1030", "gfx1100", "gfx1102", "gfx1103"};
36-
constexpr target_arch target_architectures[] = {
37-
target_arch::gfx803,
38-
target_arch::gfx900,
39-
target_arch::gfx906,
40-
target_arch::gfx908,
41-
target_arch::gfx90a,
42-
+ target_arch::gfx90c,
43-
target_arch::gfx1030,
44-
target_arch::gfx1100,
45-
target_arch::gfx1102,
46-
+ target_arch::gfx1103,
47-
};
48-
static_assert(sizeof(target_names) / sizeof(target_names[0])
49-
== sizeof(target_architectures) / sizeof(target_architectures[0]),
50-
@@ -266,12 +270,16 @@ auto dispatch_target_arch(const target_arch target_arch)
51-
return Config::template architecture_config<target_arch::gfx908>::params;
52-
case target_arch::gfx90a:
53-
return Config::template architecture_config<target_arch::gfx90a>::params;
54-
+ case target_arch::gfx90c:
55-
+ return Config::template architecture_config<target_arch::gfx90c>::params;
56-
case target_arch::gfx1030:
57-
return Config::template architecture_config<target_arch::gfx1030>::params;
58-
case target_arch::gfx1100:
59-
return Config::template architecture_config<target_arch::gfx1100>::params;
60-
case target_arch::gfx1102:
61-
return Config::template architecture_config<target_arch::gfx1102>::params;
62-
+ case target_arch::gfx1103:
63-
+ return Config::template architecture_config<target_arch::gfx1103>::params;
64-
case target_arch::invalid:
65-
assert(false && "Invalid target architecture selected at runtime.");
66-
}
67-
diff --git a/scripts/autotune/create_optimization.py b/scripts/autotune/create_optimization.py
68-
index 130bdb3c..171de162 100755
69-
--- a/scripts/autotune/create_optimization.py
70-
+++ b/scripts/autotune/create_optimization.py
71-
@@ -41,7 +41,7 @@ from collections import defaultdict
72-
from typing import Dict, List, Callable, Optional, Tuple
73-
from jinja2 import Environment, PackageLoader, select_autoescape
74-
75-
-TARGET_ARCHITECTURES = ['gfx803', 'gfx900', 'gfx906', 'gfx908', 'gfx90a', 'gfx1030', 'gfx1100', 'gfx1102']
76-
+TARGET_ARCHITECTURES = ['gfx803', 'gfx900', 'gfx906', 'gfx908', 'gfx90a', 'gfx90c', 'gfx1030', 'gfx1100', 'gfx1102', 'gfx1103']
77-
# C++ typename used for optional types
78-
EMPTY_TYPENAME = "empty_type"

bin/rocmlibs/patches/tensile_aca95d17.patch

+57-9
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
diff --git a/Tensile/AsmCaps.py b/Tensile/AsmCaps.py
2-
index 548b31f2..de4c2dd5 100644
2+
index 548b31f2..09adb3ef 100644
33
--- a/Tensile/AsmCaps.py
44
+++ b/Tensile/AsmCaps.py
5-
@@ -771,6 +771,50 @@ CACHED_ASM_CAPS = \
5+
@@ -771,6 +771,94 @@ CACHED_ASM_CAPS = \
66
'v_mov_b64': False,
77
'v_pk_fma_f16': True,
88
'v_pk_fmac_f16': False},
@@ -49,23 +49,69 @@ index 548b31f2..de4c2dd5 100644
4949
+ 'v_mad_mix_f32': False,
5050
+ 'v_mov_b64': False,
5151
+ 'v_pk_fma_f16': True,
52+
+ 'v_pk_fmac_f16': False},
53+
+ (11, 5, 0): {'HasAddLshl': True,
54+
+ 'HasAtomicAdd': True,
55+
+ 'HasDirectToLdsDest': False,
56+
+ 'HasDirectToLdsNoDest': False,
57+
+ 'HasExplicitCO': True,
58+
+ 'HasExplicitNC': True,
59+
+ 'HasGLCModifier': True,
60+
+ 'HasNTModifier': False,
61+
+ 'HasLshlOr': True,
62+
+ 'HasMFMA': False,
63+
+ 'HasMFMA_b8': False,
64+
+ 'HasMFMA_bf16_1k': False,
65+
+ 'HasMFMA_bf16_original': False,
66+
+ 'HasMFMA_constSrc': False,
67+
+ 'HasMFMA_f64': False,
68+
+ 'HasMFMA_f8': False,
69+
+ 'HasMFMA_i8_908': False,
70+
+ 'HasMFMA_i8_940': False,
71+
+ 'HasMFMA_vgpr': False,
72+
+ 'HasMFMA_xf32': False,
73+
+ 'HasSMulHi': True,
74+
+ 'HasWMMA': True,
75+
+ 'KernargPreloading': False,
76+
+ 'MaxLgkmcnt': 15,
77+
+ 'MaxVmcnt': 63,
78+
+ 'SupportedISA': True,
79+
+ 'SupportedSource': True,
80+
+ 'VOP3v_dot4_i32_i8': False,
81+
+ 'v_dot2_f32_f16': True,
82+
+ 'v_dot2c_f32_f16': True,
83+
+ 'v_dot4_i32_i8': False,
84+
+ 'v_dot4c_i32_i8': False,
85+
+ 'v_fma_f16': True,
86+
+ 'v_fma_f32': True,
87+
+ 'v_fma_f64': True,
88+
+ 'v_fma_mix_f32': True,
89+
+ 'v_fmac_f16': False,
90+
+ 'v_fmac_f32': True,
91+
+ 'v_mac_f16': False,
92+
+ 'v_mac_f32': False,
93+
+ 'v_mad_mix_f32': False,
94+
+ 'v_mov_b64': False,
95+
+ 'v_pk_fma_f16': True,
5296
+ 'v_pk_fmac_f16': False},
5397
(11, 5, 1): {'HasAddLshl': True,
5498
'HasAtomicAdd': True,
5599
'HasDirectToLdsDest': False,
56100
diff --git a/Tensile/Common.py b/Tensile/Common.py
57-
index 66f2caa2..4a77df59 100644
101+
index 66f2caa2..46c2d274 100644
58102
--- a/Tensile/Common.py
59103
+++ b/Tensile/Common.py
60-
@@ -253,7 +253,7 @@ globalParameters["SupportedISA"] = [(8,0,3),
104+
@@ -253,8 +253,8 @@ globalParameters["SupportedISA"] = [(8,0,3),
61105
(9,0,0), (9,0,6), (9,0,8), (9,0,10),
62106
(9,4,0), (9,4,1), (9,4,2),
63107
(10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1),
64108
- (11,0,0), (11,0,1), (11,0,2),
109+
- (11,5,1),
65110
+ (11,0,0), (11,0,1), (11,0,2), (11,0,3),
66-
(11,5,1),
111+
+ (11, 5, 0), (11,5,1),
67112
(12,0,0), (12,0,1)] # assembly kernels writer supports these architectures
68113

114+
globalParameters["CleanupBuildFiles"] = False # cleanup build files (e.g. kernel assembly) once no longer needed
69115
@@ -316,7 +316,7 @@ globalParameters["SeparateArchitectures"] = False # write Tensile library metada
70116

71117
globalParameters["LazyLibraryLoading"] = False # Load library and code object files when needed instead of at startup
@@ -75,7 +121,7 @@ index 66f2caa2..4a77df59 100644
75121

76122
globalParameters["ExperimentalLogicDir"] = "/experimental/"
77123

78-
@@ -328,13 +328,13 @@ architectureMap = {
124+
@@ -328,13 +328,14 @@ architectureMap = {
79125
'all':'_','gfx000':'none', 'gfx803':'r9nano', 'gfx900':'vega10', 'gfx900:xnack-':'vega10',
80126
'gfx906':'vega20', 'gfx906:xnack+':'vega20', 'gfx906:xnack-':'vega20',
81127
'gfx908':'arcturus','gfx908:xnack+':'arcturus', 'gfx908:xnack-':'arcturus',
@@ -88,10 +134,11 @@ index 66f2caa2..4a77df59 100644
88134
'gfx1030':'navi21', 'gfx1031':'navi22', 'gfx1032':'navi23', 'gfx1034':'navi24', 'gfx1035':'rembrandt',
89135
- 'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33',
90136
+ 'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33', 'gfx1103':'phoenix',
137+
+ 'gfx1150':'gfx1150',
91138
'gfx1151':'gfx1151',
92139
'gfx1200':'gfx1200',
93140
'gfx1201':'gfx1201'
94-
@@ -2461,7 +2461,7 @@ def assignGlobalParameters( config ):
141+
@@ -2461,7 +2462,7 @@ def assignGlobalParameters( config ):
95142
if os.name == "nt":
96143
globalParameters["CurrentISA"] = (9,0,6)
97144
printWarning("Failed to detect ISA so forcing (gfx906) on windows")
@@ -292,7 +339,7 @@ index 6e22a2c7..09345113 100644
292339
Build Tensile client executable; used for stand alone benchmarking (default).
293340
\-\-client-config
294341
diff --git a/pytest.ini b/pytest.ini
295-
index 13c43039..23a53d35 100644
342+
index 13c43039..70ed7a3c 100644
296343
--- a/pytest.ini
297344
+++ b/pytest.ini
298345
@@ -92,6 +92,7 @@ markers =
@@ -317,9 +364,10 @@ index 13c43039..23a53d35 100644
317364
skip-gfx940: architecture
318365
skip-gfx941: architecture
319366
skip-gfx942: architecture
320-
@@ -125,4 +128,5 @@ markers =
367+
@@ -125,4 +128,6 @@ markers =
321368
skip-gfx1100: architecture
322369
skip-gfx1101: architecture
323370
skip-gfx1102: architecture
324371
+ skip-gfx1103: architecture
372+
+ skip-gfx1150: architecture
325373
skip-gfx1151: architecture

bin/rocmlibs/test_powerinfer.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ pushd $MODEL_DIR
2626
pushd $MODEL_DIR
2727

2828
# Install huggingface-cli to download the PowerInfer GGUF models
29-
pip install -U "huggingface_hub[cli]"
30-
export PATH=$PATH:$HOME/.local/bin
29+
pipx install -U "huggingface_hub[cli]"
30+
export PATH=$PATH:$HOME/.local/share/pipx/venvs/huggingface-hub/bin
3131

3232
# Use huggingface-cli to download the PowerInfer GGUF version of LLaMA(ReLU)-2-7B model
3333
huggingface-cli download --resume-download --local-dir ReluLLaMA-7B --local-dir-use-symlinks False PowerInfer/ReluLLaMA-7B-PowerInfer-GGUF

0 commit comments

Comments
 (0)