Skip to content

Commit 48e8937

Browse files
authored
[Clang] Determine offloading architectures at Toolchain creation (#145799)
Summary: Previously we had this weird disconnect where we would get some offloading architectures beforehand and some later. This patch changes it to where we just generate this information at Toolchain creation. There's a few edge cases that will need to be cleaned up. Namely, we don't handle the strange SPIR-V handling that mixes two separate toolchains and we needed a pre-check to reject errors when inferring the toolchain from `--offload-arch` in OpenMP. Possible we could also use this information for some host defines if needed.
1 parent 928a7e6 commit 48e8937

File tree

3 files changed

+75
-72
lines changed

3 files changed

+75
-72
lines changed

clang/include/clang/Driver/Driver.h

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -367,10 +367,9 @@ class Driver {
367367
/// stored in it, and will clean them up when torn down.
368368
mutable llvm::StringMap<std::unique_ptr<ToolChain>> ToolChains;
369369

370-
/// Cache of known offloading architectures for the ToolChain already derived.
371-
/// This should only be modified when we first initialize the offloading
372-
/// toolchains.
373-
llvm::DenseMap<const ToolChain *, llvm::DenseSet<llvm::StringRef>> KnownArchs;
370+
/// The associated offloading architectures with each toolchain.
371+
llvm::DenseMap<const ToolChain *, llvm::SmallVector<llvm::StringRef>>
372+
OffloadArchs;
374373

375374
private:
376375
/// TranslateInputArgs - Create a new derived argument list from the input
@@ -535,11 +534,11 @@ class Driver {
535534

536535
/// Returns the set of bound architectures active for this offload kind.
537536
/// If there are no bound architctures we return a set containing only the
538-
/// empty string. The \p SuppressError option is used to suppress errors.
539-
llvm::DenseSet<StringRef>
537+
/// empty string.
538+
llvm::SmallVector<StringRef>
540539
getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
541540
Action::OffloadKind Kind, const ToolChain *TC,
542-
bool SuppressError = false) const;
541+
bool SpecificToolchain = true) const;
543542

544543
/// Check that the file referenced by Value exists. If it doesn't,
545544
/// issue a diagnostic and return false.

clang/lib/Driver/Driver.cpp

Lines changed: 68 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -988,6 +988,8 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
988988
if (CudaInstallation.isValid())
989989
CudaInstallation.WarnIfUnsupportedVersion();
990990
C.addOffloadDeviceToolChain(&TC, Action::OFK_Cuda);
991+
OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_Cuda, &TC,
992+
/*SpecificToolchain=*/true);
991993
} else if (IsHIP && !UseLLVMOffload) {
992994
if (auto *OMPTargetArg =
993995
C.getInputArgs().getLastArg(options::OPT_fopenmp_targets_EQ)) {
@@ -1004,6 +1006,12 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
10041006
getOffloadToolChain(C.getInputArgs(), Action::OFK_HIP, *HIPTriple,
10051007
C.getDefaultToolChain().getTriple());
10061008
C.addOffloadDeviceToolChain(&TC, Action::OFK_HIP);
1009+
1010+
// TODO: Fix 'amdgcnspirv' handling with the new driver.
1011+
if (C.getInputArgs().hasFlag(options::OPT_offload_new_driver,
1012+
options::OPT_no_offload_new_driver, false))
1013+
OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_HIP, &TC,
1014+
/*SpecificToolchain=*/true);
10071015
}
10081016

10091017
if (IsCuda || IsHIP)
@@ -1069,40 +1077,43 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
10691077
auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT,
10701078
C.getDefaultToolChain().getTriple());
10711079
C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP);
1080+
OffloadArchs[&TC] =
1081+
getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, &TC,
1082+
/*SpecificToolchain=*/true);
10721083
}
10731084
} else if (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) &&
10741085
((!IsHIP && !IsCuda) || UseLLVMOffload)) {
10751086
llvm::Triple AMDTriple("amdgcn-amd-amdhsa");
10761087
llvm::Triple NVPTXTriple("nvptx64-nvidia-cuda");
10771088

1078-
// Attempt to deduce the offloading triple from the set of architectures.
1079-
// We can only correctly deduce NVPTX / AMDGPU triples currently.
1080-
for (const llvm::Triple &TT : {AMDTriple, NVPTXTriple}) {
1081-
auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT,
1082-
C.getDefaultToolChain().getTriple());
1083-
1084-
llvm::DenseSet<StringRef> Archs =
1085-
getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, &TC, true);
1086-
llvm::DenseSet<StringRef> ArchsForTarget;
1087-
for (StringRef Arch : Archs) {
1089+
for (StringRef A :
1090+
C.getInputArgs().getAllArgValues(options::OPT_offload_arch_EQ)) {
1091+
for (StringRef Arch : llvm::split(A, ",")) {
10881092
bool IsNVPTX = IsNVIDIAOffloadArch(
10891093
StringToOffloadArch(getProcessorFromTargetID(NVPTXTriple, Arch)));
10901094
bool IsAMDGPU = IsAMDOffloadArch(
10911095
StringToOffloadArch(getProcessorFromTargetID(AMDTriple, Arch)));
1092-
if (!IsNVPTX && !IsAMDGPU && !Arch.equals_insensitive("native")) {
1096+
if (!IsNVPTX && !IsAMDGPU && !Arch.empty() &&
1097+
!Arch.equals_insensitive("native")) {
10931098
Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch)
10941099
<< Arch;
10951100
return;
10961101
}
1097-
1098-
if (TT.isNVPTX() && IsNVPTX)
1099-
ArchsForTarget.insert(Arch);
1100-
else if (TT.isAMDGPU() && IsAMDGPU)
1101-
ArchsForTarget.insert(Arch);
11021102
}
1103-
if (!ArchsForTarget.empty()) {
1103+
}
1104+
1105+
// Attempt to deduce the offloading triple from the set of architectures.
1106+
// We can only correctly deduce NVPTX / AMDGPU triples currently.
1107+
for (const llvm::Triple &TT : {AMDTriple, NVPTXTriple}) {
1108+
auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT,
1109+
C.getDefaultToolChain().getTriple());
1110+
1111+
llvm::SmallVector<StringRef> Archs =
1112+
getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, &TC,
1113+
/*SpecificToolchain=*/false);
1114+
if (!Archs.empty()) {
11041115
C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP);
1105-
KnownArchs[&TC] = ArchsForTarget;
1116+
OffloadArchs[&TC] = Archs;
11061117
}
11071118
}
11081119

@@ -1143,9 +1154,11 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
11431154
// going to create will depend on both.
11441155
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
11451156
for (const auto &TT : UniqueSYCLTriplesVec) {
1146-
auto SYCLTC = &getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT,
1147-
HostTC->getTriple());
1148-
C.addOffloadDeviceToolChain(SYCLTC, Action::OFK_SYCL);
1157+
auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT,
1158+
HostTC->getTriple());
1159+
C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL);
1160+
OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC,
1161+
/*SpecificToolchain=*/true);
11491162
}
11501163
}
11511164

@@ -4703,20 +4716,22 @@ static StringRef getCanonicalArchString(Compilation &C,
47034716
const llvm::opt::DerivedArgList &Args,
47044717
StringRef ArchStr,
47054718
const llvm::Triple &Triple,
4706-
bool SuppressError = false) {
4719+
bool SpecificToolchain) {
47074720
// Lookup the CUDA / HIP architecture string. Only report an error if we were
47084721
// expecting the triple to be only NVPTX / AMDGPU.
47094722
OffloadArch Arch =
47104723
StringToOffloadArch(getProcessorFromTargetID(Triple, ArchStr));
4711-
if (!SuppressError && Triple.isNVPTX() &&
4724+
if (Triple.isNVPTX() &&
47124725
(Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch(Arch))) {
4713-
C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
4714-
<< "CUDA" << ArchStr;
4726+
if (SpecificToolchain)
4727+
C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
4728+
<< "CUDA" << ArchStr;
47154729
return StringRef();
4716-
} else if (!SuppressError && Triple.isAMDGPU() &&
4730+
} else if (Triple.isAMDGPU() &&
47174731
(Arch == OffloadArch::UNKNOWN || !IsAMDOffloadArch(Arch))) {
4718-
C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
4719-
<< "HIP" << ArchStr;
4732+
if (SpecificToolchain)
4733+
C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
4734+
<< "HIP" << ArchStr;
47204735
return StringRef();
47214736
}
47224737

@@ -4725,13 +4740,9 @@ static StringRef getCanonicalArchString(Compilation &C,
47254740

47264741
if (IsAMDOffloadArch(Arch)) {
47274742
llvm::StringMap<bool> Features;
4728-
auto HIPTriple = getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs());
4729-
if (!HIPTriple)
4730-
return StringRef();
4731-
auto Arch = parseTargetID(*HIPTriple, ArchStr, &Features);
4743+
std::optional<StringRef> Arch = parseTargetID(Triple, ArchStr, &Features);
47324744
if (!Arch) {
47334745
C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << ArchStr;
4734-
C.setContainsError();
47354746
return StringRef();
47364747
}
47374748
return Args.MakeArgStringRef(getCanonicalTargetID(*Arch, Features));
@@ -4754,10 +4765,10 @@ getConflictOffloadArchCombination(const llvm::DenseSet<StringRef> &Archs,
47544765
return getConflictTargetIDCombination(ArchSet);
47554766
}
47564767

4757-
llvm::DenseSet<StringRef>
4768+
llvm::SmallVector<StringRef>
47584769
Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
47594770
Action::OffloadKind Kind, const ToolChain *TC,
4760-
bool SuppressError) const {
4771+
bool SpecificToolchain) const {
47614772
if (!TC)
47624773
TC = &C.getDefaultToolChain();
47634774

@@ -4772,9 +4783,6 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
47724783
: "--no-offload-arch");
47734784
}
47744785

4775-
if (KnownArchs.contains(TC))
4776-
return KnownArchs.lookup(TC);
4777-
47784786
llvm::DenseSet<StringRef> Archs;
47794787
for (auto *Arg : C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind)) {
47804788
// Add or remove the seen architectures in order of appearance. If an
@@ -4784,7 +4792,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
47844792
if (Arch == "native" || Arch.empty()) {
47854793
auto GPUsOrErr = TC->getSystemGPUArchs(Args);
47864794
if (!GPUsOrErr) {
4787-
if (SuppressError)
4795+
if (!SpecificToolchain)
47884796
llvm::consumeError(GPUsOrErr.takeError());
47894797
else
47904798
TC->getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
@@ -4794,16 +4802,21 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
47944802
}
47954803

47964804
for (auto ArchStr : *GPUsOrErr) {
4797-
Archs.insert(
4805+
StringRef CanonicalStr =
47984806
getCanonicalArchString(C, Args, Args.MakeArgString(ArchStr),
4799-
TC->getTriple(), SuppressError));
4807+
TC->getTriple(), SpecificToolchain);
4808+
if (!CanonicalStr.empty())
4809+
Archs.insert(CanonicalStr);
4810+
else if (SpecificToolchain)
4811+
return llvm::SmallVector<StringRef>();
48004812
}
48014813
} else {
4802-
StringRef ArchStr = getCanonicalArchString(
4803-
C, Args, Arch, TC->getTriple(), SuppressError);
4804-
if (ArchStr.empty())
4805-
return Archs;
4806-
Archs.insert(ArchStr);
4814+
StringRef CanonicalStr = getCanonicalArchString(
4815+
C, Args, Arch, TC->getTriple(), SpecificToolchain);
4816+
if (!CanonicalStr.empty())
4817+
Archs.insert(CanonicalStr);
4818+
else if (SpecificToolchain)
4819+
return llvm::SmallVector<StringRef>();
48074820
}
48084821
}
48094822
} else if (Arg->getOption().matches(options::OPT_no_offload_arch_EQ)) {
@@ -4812,27 +4825,20 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
48124825
Archs.clear();
48134826
} else {
48144827
StringRef ArchStr = getCanonicalArchString(
4815-
C, Args, Arch, TC->getTriple(), SuppressError);
4816-
if (ArchStr.empty())
4817-
return Archs;
4828+
C, Args, Arch, TC->getTriple(), SpecificToolchain);
48184829
Archs.erase(ArchStr);
48194830
}
48204831
}
48214832
}
48224833
}
48234834

48244835
if (auto ConflictingArchs =
4825-
getConflictOffloadArchCombination(Archs, TC->getTriple())) {
4836+
getConflictOffloadArchCombination(Archs, TC->getTriple()))
48264837
C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo)
48274838
<< ConflictingArchs->first << ConflictingArchs->second;
4828-
C.setContainsError();
4829-
}
48304839

48314840
// Skip filling defaults if we're just querying what is availible.
4832-
if (SuppressError)
4833-
return Archs;
4834-
4835-
if (Archs.empty()) {
4841+
if (SpecificToolchain && Archs.empty()) {
48364842
if (Kind == Action::OFK_Cuda) {
48374843
Archs.insert(OffloadArchToString(OffloadArch::CudaDefault));
48384844
} else if (Kind == Action::OFK_HIP) {
@@ -4858,12 +4864,13 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
48584864
}
48594865
}
48604866
}
4861-
} else {
4862-
Args.ClaimAllArgs(options::OPT_offload_arch_EQ);
4863-
Args.ClaimAllArgs(options::OPT_no_offload_arch_EQ);
48644867
}
4868+
Args.ClaimAllArgs(options::OPT_offload_arch_EQ);
4869+
Args.ClaimAllArgs(options::OPT_no_offload_arch_EQ);
48654870

4866-
return Archs;
4871+
SmallVector<StringRef> Sorted(Archs.begin(), Archs.end());
4872+
llvm::sort(Sorted);
4873+
return Sorted;
48674874
}
48684875

48694876
Action *Driver::BuildOffloadingActions(Compilation &C,
@@ -4927,10 +4934,7 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
49274934
// Get the product of all bound architectures and toolchains.
49284935
SmallVector<std::pair<const ToolChain *, StringRef>> TCAndArchs;
49294936
for (const ToolChain *TC : ToolChains) {
4930-
llvm::DenseSet<StringRef> Arches = getOffloadArchs(C, Args, Kind, TC);
4931-
SmallVector<StringRef, 0> Sorted(Arches.begin(), Arches.end());
4932-
llvm::sort(Sorted);
4933-
for (StringRef Arch : Sorted) {
4937+
for (StringRef Arch : OffloadArchs.lookup(TC)) {
49344938
TCAndArchs.push_back(std::make_pair(TC, Arch));
49354939
DeviceActions.push_back(
49364940
C.MakeAction<InputAction>(*InputArg, InputType, CUID));

clang/test/Driver/openmp-offload-gpu.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@
307307
// DRIVER_EMBEDDING: -fembed-offload-object={{.*}}.out
308308

309309
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
310-
// RUN: --offload-host-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HOST-ONLY
310+
// RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 --offload-host-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HOST-ONLY
311311
// CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[OUTPUT:.*]]"
312312
// CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[OUTPUT]]"], output: "a.out"
313313

0 commit comments

Comments
 (0)