Skip to content

Commit 659f5ac

Browse files
authored
[NVPTX] Add support for PTX ISA v8.8 (llvm#136639)
Support PTX version 8.8 (`-mattr=+ptx88`) from CUDA 12.9. The following new targets are also added: - SM103 and SM121: sm_103, sm_103a, sm_121, sm_121a. Also, some things were reformatted. https://docs.nvidia.com/cuda/parallel-thread-execution/#changes-in-ptx-isa-version-8-8
1 parent ebe7fd6 commit 659f5ac

File tree

2 files changed

+51
-27
lines changed

2 files changed

+51
-27
lines changed

llvm/lib/Target/NVPTX/NVPTX.td

Lines changed: 35 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -36,17 +36,21 @@ class FeaturePTX<int version>:
3636

3737
foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
3838
60, 61, 62, 70, 72, 75, 80, 86, 87,
39-
89, 90, 100, 101, 120] in
39+
89, 90, 100, 101, 103, 120, 121] in
4040
def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>;
4141

42-
def SM90a: FeatureSM<"90a", 901>;
42+
// Arch-specific targets. PTX for these is not compatible with any other
43+
// architectures.
44+
def SM90a : FeatureSM<"90a", 901>;
4345
def SM100a: FeatureSM<"100a", 1001>;
4446
def SM101a: FeatureSM<"101a", 1011>;
47+
def SM103a: FeatureSM<"103a", 1031>;
4548
def SM120a: FeatureSM<"120a", 1201>;
49+
def SM121a: FeatureSM<"121a", 1211>;
4650

4751
foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65,
4852
70, 71, 72, 73, 74, 75, 76, 77, 78,
49-
80, 81, 82, 83, 84, 85, 86, 87] in
53+
80, 81, 82, 83, 84, 85, 86, 87, 88] in
5054
def PTX#version: FeaturePTX<version>;
5155

5256
//===----------------------------------------------------------------------===//
@@ -56,33 +60,37 @@ foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65,
5660
class Proc<string Name, list<SubtargetFeature> Features>
5761
: Processor<Name, NoItineraries, Features>;
5862

59-
def : Proc<"sm_20", [SM20, PTX32]>;
60-
def : Proc<"sm_21", [SM21, PTX32]>;
61-
def : Proc<"sm_30", [SM30]>;
62-
def : Proc<"sm_32", [SM32, PTX40]>;
63-
def : Proc<"sm_35", [SM35, PTX32]>;
64-
def : Proc<"sm_37", [SM37, PTX41]>;
65-
def : Proc<"sm_50", [SM50, PTX40]>;
66-
def : Proc<"sm_52", [SM52, PTX41]>;
67-
def : Proc<"sm_53", [SM53, PTX42]>;
68-
def : Proc<"sm_60", [SM60, PTX50]>;
69-
def : Proc<"sm_61", [SM61, PTX50]>;
70-
def : Proc<"sm_62", [SM62, PTX50]>;
71-
def : Proc<"sm_70", [SM70, PTX60]>;
72-
def : Proc<"sm_72", [SM72, PTX61]>;
73-
def : Proc<"sm_75", [SM75, PTX63]>;
74-
def : Proc<"sm_80", [SM80, PTX70]>;
75-
def : Proc<"sm_86", [SM86, PTX71]>;
76-
def : Proc<"sm_87", [SM87, PTX74]>;
77-
def : Proc<"sm_89", [SM89, PTX78]>;
78-
def : Proc<"sm_90", [SM90, PTX78]>;
79-
def : Proc<"sm_90a", [SM90a, PTX80]>;
80-
def : Proc<"sm_100", [SM100, PTX86]>;
63+
def : Proc<"sm_20", [SM20, PTX32]>;
64+
def : Proc<"sm_21", [SM21, PTX32]>;
65+
def : Proc<"sm_30", [SM30]>;
66+
def : Proc<"sm_32", [SM32, PTX40]>;
67+
def : Proc<"sm_35", [SM35, PTX32]>;
68+
def : Proc<"sm_37", [SM37, PTX41]>;
69+
def : Proc<"sm_50", [SM50, PTX40]>;
70+
def : Proc<"sm_52", [SM52, PTX41]>;
71+
def : Proc<"sm_53", [SM53, PTX42]>;
72+
def : Proc<"sm_60", [SM60, PTX50]>;
73+
def : Proc<"sm_61", [SM61, PTX50]>;
74+
def : Proc<"sm_62", [SM62, PTX50]>;
75+
def : Proc<"sm_70", [SM70, PTX60]>;
76+
def : Proc<"sm_72", [SM72, PTX61]>;
77+
def : Proc<"sm_75", [SM75, PTX63]>;
78+
def : Proc<"sm_80", [SM80, PTX70]>;
79+
def : Proc<"sm_86", [SM86, PTX71]>;
80+
def : Proc<"sm_87", [SM87, PTX74]>;
81+
def : Proc<"sm_89", [SM89, PTX78]>;
82+
def : Proc<"sm_90", [SM90, PTX78]>;
83+
def : Proc<"sm_90a", [SM90a, PTX80]>;
84+
def : Proc<"sm_100", [SM100, PTX86]>;
8185
def : Proc<"sm_100a", [SM100a, PTX86]>;
82-
def : Proc<"sm_101", [SM101, PTX86]>;
86+
def : Proc<"sm_101", [SM101, PTX86]>;
8387
def : Proc<"sm_101a", [SM101a, PTX86]>;
84-
def : Proc<"sm_120", [SM120, PTX87]>;
88+
def : Proc<"sm_103", [SM103, PTX88]>;
89+
def : Proc<"sm_103a", [SM103a, PTX88]>;
90+
def : Proc<"sm_120", [SM120, PTX87]>;
8591
def : Proc<"sm_120a", [SM120a, PTX87]>;
92+
def : Proc<"sm_121", [SM121, PTX88]>;
93+
def : Proc<"sm_121a", [SM121a, PTX88]>;
8694

8795
def NVPTXInstrInfo : InstrInfo {
8896
}

llvm/test/CodeGen/NVPTX/sm-version.ll

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,12 @@
2020
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_100a | FileCheck %s --check-prefix=SM100a
2121
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_101 | FileCheck %s --check-prefix=SM101
2222
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_101a | FileCheck %s --check-prefix=SM101a
23+
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_103 | FileCheck %s --check-prefix=SM103
24+
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_103a | FileCheck %s --check-prefix=SM103a
2325
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_120 | FileCheck %s --check-prefix=SM120
2426
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_120a | FileCheck %s --check-prefix=SM120a
27+
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_121 | FileCheck %s --check-prefix=SM121
28+
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_121a | FileCheck %s --check-prefix=SM121a
2529

2630
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=SM20
2731
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_21 | FileCheck %s --check-prefix=SM21
@@ -45,8 +49,12 @@
4549
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a | FileCheck %s --check-prefix=SM100a
4650
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_101 | FileCheck %s --check-prefix=SM101
4751
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_101a | FileCheck %s --check-prefix=SM101a
52+
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_103 | FileCheck %s --check-prefix=SM103
53+
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_103a | FileCheck %s --check-prefix=SM103a
4854
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_120 | FileCheck %s --check-prefix=SM120
4955
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_120a | FileCheck %s --check-prefix=SM120a
56+
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_121 | FileCheck %s --check-prefix=SM121
57+
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_121a | FileCheck %s --check-prefix=SM121a
5058

5159
; SM20: .version 3.2
5260
; SM21: .version 3.2
@@ -70,8 +78,12 @@
7078
; SM100a: .version 8.6
7179
; SM101: .version 8.6
7280
; SM101a: .version 8.6
81+
; SM103: .version 8.8
82+
; SM103a: .version 8.8
7383
; SM120: .version 8.7
7484
; SM120a: .version 8.7
85+
; SM121: .version 8.8
86+
; SM121a: .version 8.8
7587

7688
; SM20: .target sm_20
7789
; SM21: .target sm_21
@@ -95,5 +107,9 @@
95107
; SM100a: .target sm_100a
96108
; SM101: .target sm_101
97109
; SM101a: .target sm_101a
110+
; SM103: .target sm_103
111+
; SM103a: .target sm_103a
98112
; SM120: .target sm_120
99113
; SM120a: .target sm_120a
114+
; SM121: .target sm_121
115+
; SM121a: .target sm_121a

0 commit comments

Comments
 (0)