Skip to content

Commit 160e661

Browse files
authored
Revert "[rocmlibs] Add support for gfx1150 (#1335)"
This reverts commit 250353c.
1 parent 250353c commit 160e661

11 files changed

+88
-96
lines changed

bin/aomp_common_vars

+1-1
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ _sep=""
305305
# _sep=";"
306306
#done
307307

308-
ROCMLIBS_GFXLIST=${ROCMLIBS_GFXLIST:-"gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1150;gfx1151;gfx1200;gfx1201"}
308+
ROCMLIBS_GFXLIST=${ROCMLIBS_GFXLIST:-"gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201"}
309309
_gfxlist="$ROCMLIBS_GFXLIST"
310310

311311
# Calculate the number of threads to use for make

bin/patches/clr.patch

-16
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,3 @@ index 72f406210..6004cf325 100644
1111
PATHS
1212
/opt/rocm/
1313
${ROCM_INSTALL_PATH}
14-
diff --git a/rocclr/device/rocm/rocdevice.cpp b/rocclr/device/rocm/rocdevice.cpp
15-
index 751161a58..3271157b7 100644
16-
--- a/rocclr/device/rocm/rocdevice.cpp
17-
+++ b/rocclr/device/rocm/rocdevice.cpp
18-
@@ -679,11 +679,6 @@ bool Device::create() {
19-
pciDeviceId_);
20-
return false;
21-
}
22-
- if (agent_isas.count != 1) {
23-
- LogPrintfError("HSA device %s (PCI ID %x) has %u ISAs but can only support a single ISA",
24-
- agent_name, pciDeviceId_, agent_isas.count);
25-
- return false;
26-
- }
27-
28-
uint32_t isa_name_length = 0;
29-
if (HSA_STATUS_SUCCESS !=

bin/patches/patch-control-file_20.0.txt

-1
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,3 @@ rocprofiler: rocprofiler-combined-no-aql-ok-fix-cov6.patch
1111
babelstream: babelstream-usm.patch
1212
llvm-project: ATD_ASO_full.patch
1313
UMT: umt.patch
14-
clr : clr.patch

bin/patches/patch-control-file_21.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ GenASis: genasis.patch
55
GenASiS_Basics: genasis_basics.patch
66
hipamd: hipamd-rpath.patch
77
bolt: bolt.patch
8-
clr: clr.patch
8+
clr:
99
rocr-runtime: rocr-runtime-combined-numa-remove-gfx940-gfx941-revert-add-gfx9-4-generic-support.patch
1010
rocprofiler: rocprofiler-combined-no-aql-ok-fix-cov6.patch
1111
babelstream: babelstream-usm.patch

bin/rocmlibs/build_powerinfer.sh

-3
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,6 @@ if [ "$1" == "install" ] ; then
118118
pushd $_repo_dir
119119
cd gguf-py
120120
echo "Installing gguf python package"
121-
python3 -m venv $AOMP_INSTALL_DIR/../venv
122-
source $AOMP_INSTALL_DIR/../venv/bin/activate
123121
pip install .
124122
if [ $? != 0 ] ; then
125123
echo "ERROR pip install failed for PowerInfer/gguf-py package"
@@ -132,7 +130,6 @@ if [ "$1" == "install" ] ; then
132130
echo "ERROR pip install failed for PowerInfer/powerinfer-py package"
133131
exit 1
134132
fi
135-
deactivate
136133
popd
137134
removepatch $_repo_dir
138135
else

bin/rocmlibs/patches/patch-control-file_20.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ rocPRIM: rocprim.patch
44
rocSPARSE: rocsparse.patch
55
rocSOLVER: rocsolver.patch
66
hipBLAS: hipblas.patch
7-
PowerInfer: powerinfer.patch
7+
PowerInfer: powerinfer.patch

bin/rocmlibs/patches/powerinfer.patch

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ index 4cf28d5..2cf69a1 100644
77
# in github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
88
# and select the line that matches the current nixpkgs version of rocBLAS.
99
- "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
10-
+ "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx90c;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150"
10+
+ "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx90c;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1103"
1111
];
1212
};
1313
apps.llama-server = {

bin/rocmlibs/patches/rocblas.patch

+6-11
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
diff --git a/CMakeLists.txt b/CMakeLists.txt
2-
index 2cb3b303..ad442656 100644
2+
index f70de1cf..5eedfcc3 100644
33
--- a/CMakeLists.txt
44
+++ b/CMakeLists.txt
55
@@ -112,7 +112,7 @@ if (NOT BUILD_ADDRESS_SANITIZER)
66
set( TARGET_LIST_ROCM_5.6 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102")
77
set( TARGET_LIST_ROCM_5.7 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102")
88
set( TARGET_LIST_ROCM_6.0 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102")
99
- set( TARGET_LIST_ROCM_6.3 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201")
10-
+ set( TARGET_LIST_ROCM_6.3 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1150;gfx1151;gfx1200;gfx1201;gfx1103;gfx90c")
10+
+ set( TARGET_LIST_ROCM_6.3 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201;gfx1103;gfx90c")
1111
else()
1212
set( TARGET_LIST_ROCM_5.6 "gfx908:xnack+;gfx90a:xnack+")
1313
set( TARGET_LIST_ROCM_5.7 "gfx908:xnack+;gfx90a:xnack+;gfx942:xnack+")
1414
diff --git a/library/src/handle.cpp b/library/src/handle.cpp
15-
index ea1fa1a7..7d2a05c3 100644
15+
index ea1fa1a7..cb14874c 100644
1616
--- a/library/src/handle.cpp
1717
+++ b/library/src/handle.cpp
1818
@@ -106,6 +106,10 @@ static Processor getActiveArch(int deviceId)
@@ -26,23 +26,19 @@ index ea1fa1a7..7d2a05c3 100644
2626
else if(deviceString.find("gfx940") != std::string::npos)
2727
{
2828
return Processor::gfx940;
29-
@@ -146,6 +150,14 @@ static Processor getActiveArch(int deviceId)
29+
@@ -146,6 +150,10 @@ static Processor getActiveArch(int deviceId)
3030
{
3131
return Processor::gfx1102;
3232
}
3333
+ else if(deviceString.find("gfx1103") != std::string::npos)
3434
+ {
3535
+ return Processor::gfx1103;
36-
+ }
37-
+ else if(deviceString.find("gfx1150") != std::string::npos)
38-
+ {
39-
+ return Processor::gfx1150;
4036
+ }
4137
else if(deviceString.find("gfx1151") != std::string::npos)
4238
{
4339
return Processor::gfx1151;
4440
diff --git a/library/src/include/handle.hpp b/library/src/include/handle.hpp
45-
index 70844136..6532c78e 100644
41+
index 94d18c7b..c47cefe8 100644
4642
--- a/library/src/include/handle.hpp
4743
+++ b/library/src/include/handle.hpp
4844
@@ -82,6 +82,7 @@ enum class Processor : int
@@ -53,12 +49,11 @@ index 70844136..6532c78e 100644
5349
gfx940 = 940,
5450
gfx941 = 941,
5551
gfx942 = 942,
56-
@@ -96,6 +97,8 @@ enum class Processor : int
52+
@@ -96,6 +97,7 @@ enum class Processor : int
5753
gfx1100 = 1100,
5854
gfx1101 = 1101,
5955
gfx1102 = 1102,
6056
+ gfx1103 = 1103,
61-
+ gfx1150 = 1150,
6257
gfx1151 = 1151,
6358
gfx1200 = 1200,
6459
gfx1201 = 1201

bin/rocmlibs/patches/rocprim.patch

+67-2
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,78 @@
11
diff --git a/CMakeLists.txt b/CMakeLists.txt
2-
index 462a5928..6caba409 100644
2+
index a5b9b127..0153b24d 100644
33
--- a/CMakeLists.txt
44
+++ b/CMakeLists.txt
55
@@ -99,7 +99,7 @@ if(NOT USE_HIP_CPU)
66
)
77
else()
88
rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS
99
- TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201"
10-
+ TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1150;gfx1151;gfx1200;gfx1201"
10+
+ TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx90c;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1103;gfx1151;gfx1200;gfx1201"
1111
)
1212
endif()
1313

14+
diff --git a/rocprim/include/rocprim/device/config_types.hpp b/rocprim/include/rocprim/device/config_types.hpp
15+
index 58729b1d..22f7272a 100644
16+
--- a/rocprim/include/rocprim/device/config_types.hpp
17+
+++ b/rocprim/include/rocprim/device/config_types.hpp
18+
@@ -169,9 +169,11 @@ enum class target_arch : unsigned int
19+
gfx906 = 906,
20+
gfx908 = 908,
21+
gfx90a = 910,
22+
+ gfx90c = 912,
23+
gfx1030 = 1030,
24+
gfx1100 = 1100,
25+
gfx1102 = 1102,
26+
+ gfx1103 = 1103,
27+
unknown = std::numeric_limits<unsigned int>::max(),
28+
};
29+
#endif // DOXYGEN_SHOULD_SKIP_THIS
30+
@@ -204,16 +206,18 @@ constexpr bool prefix_equals(const char* lhs, const char* rhs, std::size_t n)
31+
constexpr target_arch get_target_arch_from_name(const char* const arch_name, const std::size_t n)
32+
{
33+
constexpr const char* target_names[]
34+
- = {"gfx803", "gfx900", "gfx906", "gfx908", "gfx90a", "gfx1030", "gfx1100", "gfx1102"};
35+
+ = {"gfx803", "gfx900", "gfx906", "gfx908", "gfx90a", "gfx90c", "gfx1030", "gfx1100", "gfx1102", "gfx1103"};
36+
constexpr target_arch target_architectures[] = {
37+
target_arch::gfx803,
38+
target_arch::gfx900,
39+
target_arch::gfx906,
40+
target_arch::gfx908,
41+
target_arch::gfx90a,
42+
+ target_arch::gfx90c,
43+
target_arch::gfx1030,
44+
target_arch::gfx1100,
45+
target_arch::gfx1102,
46+
+ target_arch::gfx1103,
47+
};
48+
static_assert(sizeof(target_names) / sizeof(target_names[0])
49+
== sizeof(target_architectures) / sizeof(target_architectures[0]),
50+
@@ -266,12 +270,16 @@ auto dispatch_target_arch(const target_arch target_arch)
51+
return Config::template architecture_config<target_arch::gfx908>::params;
52+
case target_arch::gfx90a:
53+
return Config::template architecture_config<target_arch::gfx90a>::params;
54+
+ case target_arch::gfx90c:
55+
+ return Config::template architecture_config<target_arch::gfx90c>::params;
56+
case target_arch::gfx1030:
57+
return Config::template architecture_config<target_arch::gfx1030>::params;
58+
case target_arch::gfx1100:
59+
return Config::template architecture_config<target_arch::gfx1100>::params;
60+
case target_arch::gfx1102:
61+
return Config::template architecture_config<target_arch::gfx1102>::params;
62+
+ case target_arch::gfx1103:
63+
+ return Config::template architecture_config<target_arch::gfx1103>::params;
64+
case target_arch::invalid:
65+
assert(false && "Invalid target architecture selected at runtime.");
66+
}
67+
diff --git a/scripts/autotune/create_optimization.py b/scripts/autotune/create_optimization.py
68+
index 130bdb3c..171de162 100755
69+
--- a/scripts/autotune/create_optimization.py
70+
+++ b/scripts/autotune/create_optimization.py
71+
@@ -41,7 +41,7 @@ from collections import defaultdict
72+
from typing import Dict, List, Callable, Optional, Tuple
73+
from jinja2 import Environment, PackageLoader, select_autoescape
74+
75+
-TARGET_ARCHITECTURES = ['gfx803', 'gfx900', 'gfx906', 'gfx908', 'gfx90a', 'gfx1030', 'gfx1100', 'gfx1102']
76+
+TARGET_ARCHITECTURES = ['gfx803', 'gfx900', 'gfx906', 'gfx908', 'gfx90a', 'gfx90c', 'gfx1030', 'gfx1100', 'gfx1102', 'gfx1103']
77+
# C++ typename used for optional types
78+
EMPTY_TYPENAME = "empty_type"

bin/rocmlibs/patches/tensile_aca95d17.patch

+9-57
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
diff --git a/Tensile/AsmCaps.py b/Tensile/AsmCaps.py
2-
index 548b31f2..09adb3ef 100644
2+
index 548b31f2..de4c2dd5 100644
33
--- a/Tensile/AsmCaps.py
44
+++ b/Tensile/AsmCaps.py
5-
@@ -771,6 +771,94 @@ CACHED_ASM_CAPS = \
5+
@@ -771,6 +771,50 @@ CACHED_ASM_CAPS = \
66
'v_mov_b64': False,
77
'v_pk_fma_f16': True,
88
'v_pk_fmac_f16': False},
@@ -49,69 +49,23 @@ index 548b31f2..09adb3ef 100644
4949
+ 'v_mad_mix_f32': False,
5050
+ 'v_mov_b64': False,
5151
+ 'v_pk_fma_f16': True,
52-
+ 'v_pk_fmac_f16': False},
53-
+ (11, 5, 0): {'HasAddLshl': True,
54-
+ 'HasAtomicAdd': True,
55-
+ 'HasDirectToLdsDest': False,
56-
+ 'HasDirectToLdsNoDest': False,
57-
+ 'HasExplicitCO': True,
58-
+ 'HasExplicitNC': True,
59-
+ 'HasGLCModifier': True,
60-
+ 'HasNTModifier': False,
61-
+ 'HasLshlOr': True,
62-
+ 'HasMFMA': False,
63-
+ 'HasMFMA_b8': False,
64-
+ 'HasMFMA_bf16_1k': False,
65-
+ 'HasMFMA_bf16_original': False,
66-
+ 'HasMFMA_constSrc': False,
67-
+ 'HasMFMA_f64': False,
68-
+ 'HasMFMA_f8': False,
69-
+ 'HasMFMA_i8_908': False,
70-
+ 'HasMFMA_i8_940': False,
71-
+ 'HasMFMA_vgpr': False,
72-
+ 'HasMFMA_xf32': False,
73-
+ 'HasSMulHi': True,
74-
+ 'HasWMMA': True,
75-
+ 'KernargPreloading': False,
76-
+ 'MaxLgkmcnt': 15,
77-
+ 'MaxVmcnt': 63,
78-
+ 'SupportedISA': True,
79-
+ 'SupportedSource': True,
80-
+ 'VOP3v_dot4_i32_i8': False,
81-
+ 'v_dot2_f32_f16': True,
82-
+ 'v_dot2c_f32_f16': True,
83-
+ 'v_dot4_i32_i8': False,
84-
+ 'v_dot4c_i32_i8': False,
85-
+ 'v_fma_f16': True,
86-
+ 'v_fma_f32': True,
87-
+ 'v_fma_f64': True,
88-
+ 'v_fma_mix_f32': True,
89-
+ 'v_fmac_f16': False,
90-
+ 'v_fmac_f32': True,
91-
+ 'v_mac_f16': False,
92-
+ 'v_mac_f32': False,
93-
+ 'v_mad_mix_f32': False,
94-
+ 'v_mov_b64': False,
95-
+ 'v_pk_fma_f16': True,
9652
+ 'v_pk_fmac_f16': False},
9753
(11, 5, 1): {'HasAddLshl': True,
9854
'HasAtomicAdd': True,
9955
'HasDirectToLdsDest': False,
10056
diff --git a/Tensile/Common.py b/Tensile/Common.py
101-
index 66f2caa2..46c2d274 100644
57+
index 66f2caa2..4a77df59 100644
10258
--- a/Tensile/Common.py
10359
+++ b/Tensile/Common.py
104-
@@ -253,8 +253,8 @@ globalParameters["SupportedISA"] = [(8,0,3),
60+
@@ -253,7 +253,7 @@ globalParameters["SupportedISA"] = [(8,0,3),
10561
(9,0,0), (9,0,6), (9,0,8), (9,0,10),
10662
(9,4,0), (9,4,1), (9,4,2),
10763
(10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1),
10864
- (11,0,0), (11,0,1), (11,0,2),
109-
- (11,5,1),
11065
+ (11,0,0), (11,0,1), (11,0,2), (11,0,3),
111-
+ (11, 5, 0), (11,5,1),
66+
(11,5,1),
11267
(12,0,0), (12,0,1)] # assembly kernels writer supports these architectures
11368

114-
globalParameters["CleanupBuildFiles"] = False # cleanup build files (e.g. kernel assembly) once no longer needed
11569
@@ -316,7 +316,7 @@ globalParameters["SeparateArchitectures"] = False # write Tensile library metada
11670

11771
globalParameters["LazyLibraryLoading"] = False # Load library and code object files when needed instead of at startup
@@ -121,7 +75,7 @@ index 66f2caa2..46c2d274 100644
12175

12276
globalParameters["ExperimentalLogicDir"] = "/experimental/"
12377

124-
@@ -328,13 +328,14 @@ architectureMap = {
78+
@@ -328,13 +328,13 @@ architectureMap = {
12579
'all':'_','gfx000':'none', 'gfx803':'r9nano', 'gfx900':'vega10', 'gfx900:xnack-':'vega10',
12680
'gfx906':'vega20', 'gfx906:xnack+':'vega20', 'gfx906:xnack-':'vega20',
12781
'gfx908':'arcturus','gfx908:xnack+':'arcturus', 'gfx908:xnack-':'arcturus',
@@ -134,11 +88,10 @@ index 66f2caa2..46c2d274 100644
13488
'gfx1030':'navi21', 'gfx1031':'navi22', 'gfx1032':'navi23', 'gfx1034':'navi24', 'gfx1035':'rembrandt',
13589
- 'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33',
13690
+ 'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33', 'gfx1103':'phoenix',
137-
+ 'gfx1150':'gfx1150',
13891
'gfx1151':'gfx1151',
13992
'gfx1200':'gfx1200',
14093
'gfx1201':'gfx1201'
141-
@@ -2461,7 +2462,7 @@ def assignGlobalParameters( config ):
94+
@@ -2461,7 +2461,7 @@ def assignGlobalParameters( config ):
14295
if os.name == "nt":
14396
globalParameters["CurrentISA"] = (9,0,6)
14497
printWarning("Failed to detect ISA so forcing (gfx906) on windows")
@@ -339,7 +292,7 @@ index 6e22a2c7..09345113 100644
339292
Build Tensile client executable; used for stand alone benchmarking (default).
340293
\-\-client-config
341294
diff --git a/pytest.ini b/pytest.ini
342-
index 13c43039..70ed7a3c 100644
295+
index 13c43039..23a53d35 100644
343296
--- a/pytest.ini
344297
+++ b/pytest.ini
345298
@@ -92,6 +92,7 @@ markers =
@@ -364,10 +317,9 @@ index 13c43039..70ed7a3c 100644
364317
skip-gfx940: architecture
365318
skip-gfx941: architecture
366319
skip-gfx942: architecture
367-
@@ -125,4 +128,6 @@ markers =
320+
@@ -125,4 +128,5 @@ markers =
368321
skip-gfx1100: architecture
369322
skip-gfx1101: architecture
370323
skip-gfx1102: architecture
371324
+ skip-gfx1103: architecture
372-
+ skip-gfx1150: architecture
373325
skip-gfx1151: architecture

bin/rocmlibs/test_powerinfer.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ pushd $MODEL_DIR
2626
pushd $MODEL_DIR
2727

2828
# Install huggingface-cli to download the PowerInfer GGUF models
29-
pipx install -U "huggingface_hub[cli]"
30-
export PATH=$PATH:$HOME/.local/share/pipx/venvs/huggingface-hub/bin
29+
pip install -U "huggingface_hub[cli]"
30+
export PATH=$PATH:$HOME/.local/bin
3131

3232
# Use huggingface-cli to download the PowerInfer GGUF version of LLaMA(ReLU)-2-7B model
3333
huggingface-cli download --resume-download --local-dir ReluLLaMA-7B --local-dir-use-symlinks False PowerInfer/ReluLLaMA-7B-PowerInfer-GGUF

0 commit comments

Comments
 (0)