Skip to content

[rocmlibs] Add support for gfx1150 #1335

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bin/aomp_common_vars
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ _sep=""
# _sep=";"
#done

ROCMLIBS_GFXLIST=${ROCMLIBS_GFXLIST:-"gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201"}
ROCMLIBS_GFXLIST=${ROCMLIBS_GFXLIST:-"gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1150;gfx1151;gfx1200;gfx1201"}
_gfxlist="$ROCMLIBS_GFXLIST"

# Calculate the number of threads to use for make
Expand Down
16 changes: 16 additions & 0 deletions bin/patches/clr.patch
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,19 @@ index 72f406210..6004cf325 100644
PATHS
/opt/rocm/
${ROCM_INSTALL_PATH}
diff --git a/rocclr/device/rocm/rocdevice.cpp b/rocclr/device/rocm/rocdevice.cpp
index 751161a58..3271157b7 100644
--- a/rocclr/device/rocm/rocdevice.cpp
+++ b/rocclr/device/rocm/rocdevice.cpp
@@ -679,11 +679,6 @@ bool Device::create() {
pciDeviceId_);
return false;
}
- if (agent_isas.count != 1) {
- LogPrintfError("HSA device %s (PCI ID %x) has %u ISAs but can only support a single ISA",
- agent_name, pciDeviceId_, agent_isas.count);
- return false;
- }

uint32_t isa_name_length = 0;
if (HSA_STATUS_SUCCESS !=
1 change: 1 addition & 0 deletions bin/patches/patch-control-file_20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ rocprofiler: rocprofiler-combined-no-aql-ok-fix-cov6.patch
babelstream: babelstream-usm.patch
llvm-project: ATD_ASO_full.patch
UMT: umt.patch
clr : clr.patch
2 changes: 1 addition & 1 deletion bin/patches/patch-control-file_21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ GenASis: genasis.patch
GenASiS_Basics: genasis_basics.patch
hipamd: hipamd-rpath.patch
bolt: bolt.patch
clr:
clr: clr.patch
rocr-runtime: rocr-runtime-combined-numa-remove-gfx940-gfx941-revert-add-gfx9-4-generic-support.patch
rocprofiler: rocprofiler-combined-no-aql-ok-fix-cov6.patch
babelstream: babelstream-usm.patch
Expand Down
3 changes: 3 additions & 0 deletions bin/rocmlibs/build_powerinfer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ if [ "$1" == "install" ] ; then
pushd $_repo_dir
cd gguf-py
echo "Installing gguf python package"
python3 -m venv $AOMP_INSTALL_DIR/../venv
source $AOMP_INSTALL_DIR/../venv/bin/activate
pip install .
if [ $? != 0 ] ; then
echo "ERROR pip install failed for PowerInfer/gguf-py package"
Expand All @@ -130,6 +132,7 @@ if [ "$1" == "install" ] ; then
echo "ERROR pip install failed for PowerInfer/powerinfer-py package"
exit 1
fi
deactivate
popd
removepatch $_repo_dir
else
Expand Down
2 changes: 1 addition & 1 deletion bin/rocmlibs/patches/patch-control-file_20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ rocPRIM: rocprim.patch
rocSPARSE: rocsparse.patch
rocSOLVER: rocsolver.patch
hipBLAS: hipblas.patch
PowerInfer: powerinfer.patch
PowerInfer: powerinfer.patch
2 changes: 1 addition & 1 deletion bin/rocmlibs/patches/powerinfer.patch
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ index 4cf28d5..2cf69a1 100644
# in github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
# and select the line that matches the current nixpkgs version of rocBLAS.
- "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
+ "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx90c;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1103"
+ "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx90c;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150"
];
};
apps.llama-server = {
17 changes: 11 additions & 6 deletions bin/rocmlibs/patches/rocblas.patch
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f70de1cf..5eedfcc3 100644
index 2cb3b303..ad442656 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -112,7 +112,7 @@ if (NOT BUILD_ADDRESS_SANITIZER)
set( TARGET_LIST_ROCM_5.6 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102")
set( TARGET_LIST_ROCM_5.7 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102")
set( TARGET_LIST_ROCM_6.0 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102")
- set( TARGET_LIST_ROCM_6.3 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201")
+ set( TARGET_LIST_ROCM_6.3 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201;gfx1103;gfx90c")
+ set( TARGET_LIST_ROCM_6.3 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1150;gfx1151;gfx1200;gfx1201;gfx1103;gfx90c")
else()
set( TARGET_LIST_ROCM_5.6 "gfx908:xnack+;gfx90a:xnack+")
set( TARGET_LIST_ROCM_5.7 "gfx908:xnack+;gfx90a:xnack+;gfx942:xnack+")
diff --git a/library/src/handle.cpp b/library/src/handle.cpp
index ea1fa1a7..cb14874c 100644
index ea1fa1a7..7d2a05c3 100644
--- a/library/src/handle.cpp
+++ b/library/src/handle.cpp
@@ -106,6 +106,10 @@ static Processor getActiveArch(int deviceId)
Expand All @@ -26,19 +26,23 @@ index ea1fa1a7..cb14874c 100644
else if(deviceString.find("gfx940") != std::string::npos)
{
return Processor::gfx940;
@@ -146,6 +150,10 @@ static Processor getActiveArch(int deviceId)
@@ -146,6 +150,14 @@ static Processor getActiveArch(int deviceId)
{
return Processor::gfx1102;
}
+ else if(deviceString.find("gfx1103") != std::string::npos)
+ {
+ return Processor::gfx1103;
+ }
+ else if(deviceString.find("gfx1150") != std::string::npos)
+ {
+ return Processor::gfx1150;
+ }
else if(deviceString.find("gfx1151") != std::string::npos)
{
return Processor::gfx1151;
diff --git a/library/src/include/handle.hpp b/library/src/include/handle.hpp
index 94d18c7b..c47cefe8 100644
index 70844136..6532c78e 100644
--- a/library/src/include/handle.hpp
+++ b/library/src/include/handle.hpp
@@ -82,6 +82,7 @@ enum class Processor : int
Expand All @@ -49,11 +53,12 @@ index 94d18c7b..c47cefe8 100644
gfx940 = 940,
gfx941 = 941,
gfx942 = 942,
@@ -96,6 +97,7 @@ enum class Processor : int
@@ -96,6 +97,8 @@ enum class Processor : int
gfx1100 = 1100,
gfx1101 = 1101,
gfx1102 = 1102,
+ gfx1103 = 1103,
+ gfx1150 = 1150,
gfx1151 = 1151,
gfx1200 = 1200,
gfx1201 = 1201
Expand Down
69 changes: 2 additions & 67 deletions bin/rocmlibs/patches/rocprim.patch
Original file line number Diff line number Diff line change
@@ -1,78 +1,13 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a5b9b127..0153b24d 100644
index 462a5928..6caba409 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -99,7 +99,7 @@ if(NOT USE_HIP_CPU)
)
else()
rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS
- TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201"
+ TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx90c;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1103;gfx1151;gfx1200;gfx1201"
+ TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1150;gfx1151;gfx1200;gfx1201"
)
endif()

diff --git a/rocprim/include/rocprim/device/config_types.hpp b/rocprim/include/rocprim/device/config_types.hpp
index 58729b1d..22f7272a 100644
--- a/rocprim/include/rocprim/device/config_types.hpp
+++ b/rocprim/include/rocprim/device/config_types.hpp
@@ -169,9 +169,11 @@ enum class target_arch : unsigned int
gfx906 = 906,
gfx908 = 908,
gfx90a = 910,
+ gfx90c = 912,
gfx1030 = 1030,
gfx1100 = 1100,
gfx1102 = 1102,
+ gfx1103 = 1103,
unknown = std::numeric_limits<unsigned int>::max(),
};
#endif // DOXYGEN_SHOULD_SKIP_THIS
@@ -204,16 +206,18 @@ constexpr bool prefix_equals(const char* lhs, const char* rhs, std::size_t n)
constexpr target_arch get_target_arch_from_name(const char* const arch_name, const std::size_t n)
{
constexpr const char* target_names[]
- = {"gfx803", "gfx900", "gfx906", "gfx908", "gfx90a", "gfx1030", "gfx1100", "gfx1102"};
+ = {"gfx803", "gfx900", "gfx906", "gfx908", "gfx90a", "gfx90c", "gfx1030", "gfx1100", "gfx1102", "gfx1103"};
constexpr target_arch target_architectures[] = {
target_arch::gfx803,
target_arch::gfx900,
target_arch::gfx906,
target_arch::gfx908,
target_arch::gfx90a,
+ target_arch::gfx90c,
target_arch::gfx1030,
target_arch::gfx1100,
target_arch::gfx1102,
+ target_arch::gfx1103,
};
static_assert(sizeof(target_names) / sizeof(target_names[0])
== sizeof(target_architectures) / sizeof(target_architectures[0]),
@@ -266,12 +270,16 @@ auto dispatch_target_arch(const target_arch target_arch)
return Config::template architecture_config<target_arch::gfx908>::params;
case target_arch::gfx90a:
return Config::template architecture_config<target_arch::gfx90a>::params;
+ case target_arch::gfx90c:
+ return Config::template architecture_config<target_arch::gfx90c>::params;
case target_arch::gfx1030:
return Config::template architecture_config<target_arch::gfx1030>::params;
case target_arch::gfx1100:
return Config::template architecture_config<target_arch::gfx1100>::params;
case target_arch::gfx1102:
return Config::template architecture_config<target_arch::gfx1102>::params;
+ case target_arch::gfx1103:
+ return Config::template architecture_config<target_arch::gfx1103>::params;
case target_arch::invalid:
assert(false && "Invalid target architecture selected at runtime.");
}
diff --git a/scripts/autotune/create_optimization.py b/scripts/autotune/create_optimization.py
index 130bdb3c..171de162 100755
--- a/scripts/autotune/create_optimization.py
+++ b/scripts/autotune/create_optimization.py
@@ -41,7 +41,7 @@ from collections import defaultdict
from typing import Dict, List, Callable, Optional, Tuple
from jinja2 import Environment, PackageLoader, select_autoescape

-TARGET_ARCHITECTURES = ['gfx803', 'gfx900', 'gfx906', 'gfx908', 'gfx90a', 'gfx1030', 'gfx1100', 'gfx1102']
+TARGET_ARCHITECTURES = ['gfx803', 'gfx900', 'gfx906', 'gfx908', 'gfx90a', 'gfx90c', 'gfx1030', 'gfx1100', 'gfx1102', 'gfx1103']
# C++ typename used for optional types
EMPTY_TYPENAME = "empty_type"
66 changes: 57 additions & 9 deletions bin/rocmlibs/patches/tensile_aca95d17.patch
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
diff --git a/Tensile/AsmCaps.py b/Tensile/AsmCaps.py
index 548b31f2..de4c2dd5 100644
index 548b31f2..09adb3ef 100644
--- a/Tensile/AsmCaps.py
+++ b/Tensile/AsmCaps.py
@@ -771,6 +771,50 @@ CACHED_ASM_CAPS = \
@@ -771,6 +771,94 @@ CACHED_ASM_CAPS = \
'v_mov_b64': False,
'v_pk_fma_f16': True,
'v_pk_fmac_f16': False},
Expand Down Expand Up @@ -49,23 +49,69 @@ index 548b31f2..de4c2dd5 100644
+ 'v_mad_mix_f32': False,
+ 'v_mov_b64': False,
+ 'v_pk_fma_f16': True,
+ 'v_pk_fmac_f16': False},
+ (11, 5, 0): {'HasAddLshl': True,
+ 'HasAtomicAdd': True,
+ 'HasDirectToLdsDest': False,
+ 'HasDirectToLdsNoDest': False,
+ 'HasExplicitCO': True,
+ 'HasExplicitNC': True,
+ 'HasGLCModifier': True,
+ 'HasNTModifier': False,
+ 'HasLshlOr': True,
+ 'HasMFMA': False,
+ 'HasMFMA_b8': False,
+ 'HasMFMA_bf16_1k': False,
+ 'HasMFMA_bf16_original': False,
+ 'HasMFMA_constSrc': False,
+ 'HasMFMA_f64': False,
+ 'HasMFMA_f8': False,
+ 'HasMFMA_i8_908': False,
+ 'HasMFMA_i8_940': False,
+ 'HasMFMA_vgpr': False,
+ 'HasMFMA_xf32': False,
+ 'HasSMulHi': True,
+ 'HasWMMA': True,
+ 'KernargPreloading': False,
+ 'MaxLgkmcnt': 15,
+ 'MaxVmcnt': 63,
+ 'SupportedISA': True,
+ 'SupportedSource': True,
+ 'VOP3v_dot4_i32_i8': False,
+ 'v_dot2_f32_f16': True,
+ 'v_dot2c_f32_f16': True,
+ 'v_dot4_i32_i8': False,
+ 'v_dot4c_i32_i8': False,
+ 'v_fma_f16': True,
+ 'v_fma_f32': True,
+ 'v_fma_f64': True,
+ 'v_fma_mix_f32': True,
+ 'v_fmac_f16': False,
+ 'v_fmac_f32': True,
+ 'v_mac_f16': False,
+ 'v_mac_f32': False,
+ 'v_mad_mix_f32': False,
+ 'v_mov_b64': False,
+ 'v_pk_fma_f16': True,
+ 'v_pk_fmac_f16': False},
(11, 5, 1): {'HasAddLshl': True,
'HasAtomicAdd': True,
'HasDirectToLdsDest': False,
diff --git a/Tensile/Common.py b/Tensile/Common.py
index 66f2caa2..4a77df59 100644
index 66f2caa2..46c2d274 100644
--- a/Tensile/Common.py
+++ b/Tensile/Common.py
@@ -253,7 +253,7 @@ globalParameters["SupportedISA"] = [(8,0,3),
@@ -253,8 +253,8 @@ globalParameters["SupportedISA"] = [(8,0,3),
(9,0,0), (9,0,6), (9,0,8), (9,0,10),
(9,4,0), (9,4,1), (9,4,2),
(10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1),
- (11,0,0), (11,0,1), (11,0,2),
- (11,5,1),
+ (11,0,0), (11,0,1), (11,0,2), (11,0,3),
(11,5,1),
+ (11, 5, 0), (11,5,1),
(12,0,0), (12,0,1)] # assembly kernels writer supports these architectures

globalParameters["CleanupBuildFiles"] = False # cleanup build files (e.g. kernel assembly) once no longer needed
@@ -316,7 +316,7 @@ globalParameters["SeparateArchitectures"] = False # write Tensile library metada

globalParameters["LazyLibraryLoading"] = False # Load library and code object files when needed instead of at startup
Expand All @@ -75,7 +121,7 @@ index 66f2caa2..4a77df59 100644

globalParameters["ExperimentalLogicDir"] = "/experimental/"

@@ -328,13 +328,13 @@ architectureMap = {
@@ -328,13 +328,14 @@ architectureMap = {
'all':'_','gfx000':'none', 'gfx803':'r9nano', 'gfx900':'vega10', 'gfx900:xnack-':'vega10',
'gfx906':'vega20', 'gfx906:xnack+':'vega20', 'gfx906:xnack-':'vega20',
'gfx908':'arcturus','gfx908:xnack+':'arcturus', 'gfx908:xnack-':'arcturus',
Expand All @@ -88,10 +134,11 @@ index 66f2caa2..4a77df59 100644
'gfx1030':'navi21', 'gfx1031':'navi22', 'gfx1032':'navi23', 'gfx1034':'navi24', 'gfx1035':'rembrandt',
- 'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33',
+ 'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33', 'gfx1103':'phoenix',
+ 'gfx1150':'gfx1150',
'gfx1151':'gfx1151',
'gfx1200':'gfx1200',
'gfx1201':'gfx1201'
@@ -2461,7 +2461,7 @@ def assignGlobalParameters( config ):
@@ -2461,7 +2462,7 @@ def assignGlobalParameters( config ):
if os.name == "nt":
globalParameters["CurrentISA"] = (9,0,6)
printWarning("Failed to detect ISA so forcing (gfx906) on windows")
Expand Down Expand Up @@ -292,7 +339,7 @@ index 6e22a2c7..09345113 100644
Build Tensile client executable; used for stand alone benchmarking (default).
\-\-client-config
diff --git a/pytest.ini b/pytest.ini
index 13c43039..23a53d35 100644
index 13c43039..70ed7a3c 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -92,6 +92,7 @@ markers =
Expand All @@ -317,9 +364,10 @@ index 13c43039..23a53d35 100644
skip-gfx940: architecture
skip-gfx941: architecture
skip-gfx942: architecture
@@ -125,4 +128,5 @@ markers =
@@ -125,4 +128,6 @@ markers =
skip-gfx1100: architecture
skip-gfx1101: architecture
skip-gfx1102: architecture
+ skip-gfx1103: architecture
+ skip-gfx1150: architecture
skip-gfx1151: architecture
4 changes: 2 additions & 2 deletions bin/rocmlibs/test_powerinfer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ pushd $MODEL_DIR
pushd $MODEL_DIR

# Install huggingface-cli to download the PowerInfer GGUF models
pip install -U "huggingface_hub[cli]"
export PATH=$PATH:$HOME/.local/bin
pipx install -U "huggingface_hub[cli]"
export PATH=$PATH:$HOME/.local/share/pipx/venvs/huggingface-hub/bin

# Use huggingface-cli to download the PowerInfer GGUF version of LLaMA(ReLU)-2-7B model
huggingface-cli download --resume-download --local-dir ReluLLaMA-7B --local-dir-use-symlinks False PowerInfer/ReluLLaMA-7B-PowerInfer-GGUF
Expand Down