Skip to content

[SYCL][Offload] Add SYCLBIN format and dump tool #16873

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
1b9f3a1
[SYCL][Offload] Add SYCLBIN format and dump tool
steffenlarsen Feb 3, 2025
43c6227
Fix include mistake
steffenlarsen Feb 4, 2025
86e3128
Address comments
steffenlarsen Feb 4, 2025
1685f0f
SPIRV -> SPIR-V
steffenlarsen Feb 4, 2025
a024f11
Ignore unused variable in syclbin-dump
steffenlarsen Feb 4, 2025
716bdab
Disable on Windows until driver option is in place
steffenlarsen Feb 4, 2025
d07b06c
Fix fault revert
steffenlarsen Feb 4, 2025
b598b66
Address comments and improve code
steffenlarsen Feb 6, 2025
a4324ab
Merge remote-tracking branch 'intel/sycl' into steffen/syclbin_linker…
steffenlarsen Feb 6, 2025
be91678
Merge remote-tracking branch 'intel/sycl' into steffen/syclbin_linker…
steffenlarsen Feb 24, 2025
a3b6bf5
Adjust for new SYCLBIN design
steffenlarsen Feb 25, 2025
b01db8f
Fix formatting
steffenlarsen Feb 26, 2025
224db51
Separate byte tables
steffenlarsen Feb 26, 2025
da26032
Merge remote-tracking branch 'intel/sycl' into steffen/syclbin_linker…
steffenlarsen Mar 25, 2025
c5b5060
Merge branch 'sycl' into steffen/syclbin_linker_wrapper_impl
steffenlarsen Mar 26, 2025
e7075bb
Separate loop logic
steffenlarsen Mar 26, 2025
e74fc36
Fix formatting
steffenlarsen Mar 27, 2025
7c5b050
Add unittest
steffenlarsen Mar 28, 2025
4197187
Apply suggestions from code review
steffenlarsen Mar 31, 2025
9ef58cd
Rule of 5
steffenlarsen Mar 31, 2025
0cc7ba7
Fix syclbin-dump
steffenlarsen Mar 31, 2025
2fb9071
Fix syclbin-dump scopedindent assignment operators
steffenlarsen Apr 9, 2025
159428e
Fix RHEL build
steffenlarsen Apr 9, 2025
e58061e
Address some comments
steffenlarsen Apr 16, 2025
c280427
Fix formatting and unused var
steffenlarsen Apr 16, 2025
799dcc6
Refactor to write to ostream
steffenlarsen Apr 23, 2025
84b619f
Merge remote-tracking branch 'intel/sycl' into steffen/syclbin_linker…
steffenlarsen Apr 25, 2025
498779e
Remove noexcept
steffenlarsen Apr 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 105 additions & 15 deletions clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/OffloadBinary.h"
#include "llvm/Object/SYCLBIN.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/OptTable.h"
#include "llvm/Option/Option.h"
Expand Down Expand Up @@ -151,6 +152,8 @@ static std::optional<llvm::module_split::IRSplitMode> SYCLModuleSplitMode;

static bool UseSYCLPostLinkTool;

static bool OutputSYCLBIN;

static SmallString<128> OffloadImageDumpDir;

using OffloadingImage = OffloadBinary::OffloadingImage;
Expand Down Expand Up @@ -1182,6 +1185,62 @@ static Expected<StringRef> runCompile(StringRef &InputFile,
return *OutputFileOrErr;
}

/// Write an OffloadBinary containing the serialized SYCLBIN resulting from
/// \p ModuleDescs to the ExecutableName file with the .syclbin extension.
static Expected<StringRef>
packageSYCLBIN(SYCLBIN::BundleState State,
const ArrayRef<SYCLBIN::SYCLBINModuleDesc> Modules) {
SYCLBIN::SYCLBINDesc SYCLBIND{State, Modules};
size_t SYCLBINByteSize = 0;
if (Error E = SYCLBIND.getSYCLBINByteSite().moveInto(SYCLBINByteSize))
return std::move(E);

SmallString<0> SYCLBINImage;
SYCLBINImage.reserve(SYCLBINByteSize);
raw_svector_ostream SYCLBINImageOS{SYCLBINImage};
if (Error E = SYCLBIN::write(SYCLBIND, SYCLBINImageOS))
return std::move(E);

OffloadingImage Image{};
Image.TheImageKind = IMG_SYCLBIN;
Image.TheOffloadKind = OFK_SYCL;
Image.Image = MemoryBuffer::getMemBuffer(SYCLBINImage, /*BufferName=*/"",
/*RequiresNullTerminator=*/false);

std::unique_ptr<MemoryBuffer> Binary = MemoryBuffer::getMemBufferCopy(
OffloadBinary::write(Image), Image.Image->getBufferIdentifier());

auto OutFileOrErr =
createOutputFile(sys::path::filename(ExecutableName), "syclbin");
if (!OutFileOrErr)
return OutFileOrErr.takeError();

Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
FileOutputBuffer::create(*OutFileOrErr, Binary->getBufferSize());
if (!OutputOrErr)
return OutputOrErr.takeError();
std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
llvm::copy(Binary->getBuffer(), Output->getBufferStart());
if (Error E = Output->commit())
return std::move(E);

return *OutFileOrErr;
}

Error mergeSYCLBIN(ArrayRef<StringRef> Files, const ArgList &Args) {
// Fast path for the general case where there's only one file. In this case we
// do not need to parse it and can instead simply copy it.
if (Files.size() == 1) {
if (std::error_code EC = sys::fs::copy_file(Files[0], ExecutableName))
return createFileError(ExecutableName, EC);
return Error::success();
}
// TODO: Merge SYCLBIN files here and write to ExecutableName output.
// Use the first file as the base and modify.
assert(Files.size() == 1);
return Error::success();
}

// Run wrapping library and clang
static Expected<StringRef>
runWrapperAndCompile(std::vector<module_split::SplitModule> &SplitModules,
Expand Down Expand Up @@ -1962,6 +2021,12 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
// object file.
SmallVector<StringRef> WrappedOutput;

// When creating SYCLBIN files, we need to store the compiled modules for
// combined packaging.
std::mutex SYCLBINModulesMtx;
SYCLBIN::BundleState SYCLBINState = SYCLBIN::BundleState::Input;
SmallVector<SYCLBIN::SYCLBINModuleDesc> SYCLBINModules;

// Initialize the images with any overriding inputs.
if (Args.hasArg(OPT_override_image))
if (Error Err = handleOverrideImages(Args, Images))
Expand Down Expand Up @@ -2067,18 +2132,26 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
}
}

// TODO(NOM7): Remove this call and use community flow for bundle/wrap
auto OutputFile = sycl::runWrapperAndCompile(SplitModules, LinkerArgs);
if (!OutputFile)
return OutputFile.takeError();

// SYCL offload kind images are all ready to be sent to host linker.
// TODO: Currently, device code wrapping for SYCL offload happens in a
// separate path inside 'linkDevice' call seen above.
// This will eventually be refactored to use the 'common' wrapping logic
// that is used for other offload kinds.
std::scoped_lock Guard(ImageMtx);
WrappedOutput.push_back(*OutputFile);
if (OutputSYCLBIN) {
SYCLBIN::SYCLBINModuleDesc MD;
MD.ArchString = LinkerArgs.getLastArgValue(OPT_arch_EQ);
MD.SplitModules = std::move(SplitModules);
std::scoped_lock Guard(SYCLBINModulesMtx);
SYCLBINModules.emplace_back(std::move(MD));
} else {
// TODO(NOM7): Remove this call and use community flow for bundle/wrap
auto OutputFile = sycl::runWrapperAndCompile(SplitModules, LinkerArgs);
if (!OutputFile)
return OutputFile.takeError();

// SYCL offload kind images are all ready to be sent to host linker.
// TODO: Currently, device code wrapping for SYCL offload happens in a
// separate path inside 'linkDevice' call seen above.
// This will eventually be refactored to use the 'common' wrapping logic
// that is used for other offload kinds.
std::scoped_lock Guard(ImageMtx);
WrappedOutput.push_back(*OutputFile);
}
}
if (HasNonSYCLOffloadKinds) {
// Write any remaining device inputs to an output file.
Expand Down Expand Up @@ -2129,6 +2202,13 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
if (Err)
return std::move(Err);

if (OutputSYCLBIN) {
auto OutputOrErr = sycl::packageSYCLBIN(SYCLBINState, SYCLBINModules);
if (!OutputOrErr)
return OutputOrErr.takeError();
WrappedOutput.push_back(*OutputOrErr);
}

for (auto &[Kind, Input] : Images) {
if (Kind == OFK_SYCL)
continue;
Expand Down Expand Up @@ -2585,6 +2665,11 @@ int main(int Argc, char **Argv) {
"-no-use-sycl-post-link-tool options can't "
"be used together."));

OutputSYCLBIN = Args.hasArg(OPT_syclbin);
if (OutputSYCLBIN && Args.hasArg(OPT_sycl_embed_ir))
reportError(createStringError(
"-sycl-embed_ir and -syclbin can't be used together."));

if (Args.hasArg(OPT_sycl_module_split_mode_EQ)) {
if (UseSYCLPostLinkTool)
reportError(createStringError(
Expand Down Expand Up @@ -2623,9 +2708,14 @@ int main(int Argc, char **Argv) {
if (!FilesOrErr)
reportError(FilesOrErr.takeError());

// Run the host linking job with the rendered arguments.
if (Error Err = runLinker(*FilesOrErr, Args))
reportError(std::move(Err));
if (OutputSYCLBIN) {
if (Error Err = sycl::mergeSYCLBIN(*FilesOrErr, Args))
reportError(std::move(Err));
} else {
// Run the host linking job with the rendered arguments.
if (Error Err = runLinker(*FilesOrErr, Args))
reportError(std::move(Err));
}
}

if (const opt::Arg *Arg = Args.getLastArg(OPT_wrapper_time_trace_eq)) {
Expand Down
5 changes: 5 additions & 0 deletions clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
Original file line number Diff line number Diff line change
Expand Up @@ -236,3 +236,8 @@ def sycl_dump_device_code_EQ : Joined<["--", "-"], "sycl-dump-device-code=">,
def sycl_allow_device_image_dependencies : Flag<["--", "-"], "sycl-allow-device-image-dependencies">,
Flags<[WrapperOnlyOption, HelpHidden]>,
HelpText<"Allow dependencies between device code images">;

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure that this is the best design.

@asudarsa @mdtoguchi Do you have any thoughts about this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Anything in particular you dislike?

Ping @asudarsa & @mdtoguchi for their thoughts.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this more of an issue with using clang-linker-wrapper to create the SYCLBIN file? If there is a desire to keep clang-linker-wrapper more streamlined (i.e. should just do 'some stuff' then call the linker) maybe a dedicated tool would be better.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@asudarsa & @maksimsab - I imagine you know the future plans of the SYCL integration in the clang-linker-wrapper better than I do. What would be the better design here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have the idea of the whole pipeline with syclbin.
Do you?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a major refactoring of clang-linker-wrapper coming up. That should simplify things and help to add this support in a more streamlined way.

Thanks

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From my offline discussion with @asudarsa, it sounds like this would be a stopgap solution either way, as much of the SYCL linker wrapper is being moved anyway. I think it makes sense to discuss the integration of SYCLBIN in our tooling, but if it's going to look vastly different it may be a topic for then. From a user perspective, it shouldn't have any difference before and after.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had a look at the SYCLBin pipeline and it seems like something that can be easily integrated into the 'new' pipeline where most of the SYCL based compilation flow happens inside a new tool 'clang-sycl-linker. Unfortunately, the new flow is only available upstream and we need a few more changes before syncing it with downstream compiler.

Long story short, this PR can be merged as is with the caveat that there will be some modifications required whenever we sync with upstream compiler.

Thanks

// Options to force the output to be of the SYCLBIN format.
def syclbin : Flag<["--", "-"], "syclbin">,
Flags<[WrapperOnlyOption]>,
HelpText<"Output in the SYCLBIN binary format">;
1 change: 1 addition & 0 deletions llvm/include/llvm/Object/OffloadBinary.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ enum ImageKind : uint16_t {
IMG_Cubin,
IMG_Fatbinary,
IMG_PTX,
IMG_SYCLBIN,
IMG_LAST,
};

Expand Down
148 changes: 148 additions & 0 deletions llvm/include/llvm/Object/SYCLBIN.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
//===- SYCLBIN.h - SYCLBIN binary format support ----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_OBJECT_SYCLBIN_H
#define LLVM_OBJECT_SYCLBIN_H

#include "llvm/ADT/SmallString.h"
#include "llvm/SYCLPostLink/ModuleSplitter.h"
#include "llvm/Support/MemoryBuffer.h"
#include <string>

namespace llvm {

namespace object {

// Representation of a SYCLBIN binary object. This is intended for use as an
// image inside a OffloadBinary.
class SYCLBIN {
public:
SYCLBIN(MemoryBufferRef Source) : Data{Source} {}

SYCLBIN(const SYCLBIN &Other) = delete;
SYCLBIN(SYCLBIN &&Other) = default;

SYCLBIN &operator=(const SYCLBIN &Other) = delete;
SYCLBIN &operator=(SYCLBIN &&Other) = default;

MemoryBufferRef getMemoryBufferRef() const { return Data; }

enum class BundleState : uint8_t { Input = 0, Object = 1, Executable = 2 };

struct SYCLBINModuleDesc {
std::string ArchString;
std::vector<module_split::SplitModule> SplitModules;
};

class SYCLBINDesc {
public:
SYCLBINDesc(BundleState State, ArrayRef<SYCLBINModuleDesc> ModuleDescs);

SYCLBINDesc(const SYCLBINDesc &Other) = delete;
SYCLBINDesc(SYCLBINDesc &&Other) = default;

SYCLBINDesc &operator=(const SYCLBINDesc &Other) = delete;
SYCLBINDesc &operator=(SYCLBINDesc &&Other) = default;

size_t getMetadataTableByteSize() const;
Expected<size_t> getBinaryTableByteSize() const;
Expected<size_t> getSYCLBINByteSite() const;

private:
struct ImageDesc {
SmallString<0> Metadata;
SmallString<0> FilePath;
};

struct AbstractModuleDesc {
SmallString<0> Metadata;
SmallVector<ImageDesc, 4> IRModuleDescs;
SmallVector<ImageDesc, 4> NativeDeviceCodeImageDescs;
};

SmallString<0> GlobalMetadata;
SmallVector<AbstractModuleDesc, 4> AbstractModuleDescs;

friend class SYCLBIN;
};

/// The current version of the binary used for backwards compatibility.
static constexpr uint32_t CurrentVersion = 1;

/// Magic number used to identify SYCLBIN files.
static constexpr uint32_t MagicNumber = 0x53594249;

/// Serialize \p Desc to \p OS .
static Error write(const SYCLBIN::SYCLBINDesc &Desc, raw_ostream &OS);

/// Deserialize the contents of \p Source to produce a SYCLBIN object.
static Expected<std::unique_ptr<SYCLBIN>> read(MemoryBufferRef Source);

struct IRModule {
std::unique_ptr<llvm::util::PropertySetRegistry> Metadata;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we call these Properties instead of Metadata? That would match with how the type is used in module splitting/sycl-post-link

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer the name "metadata" in this case, as it should only really include the metadata property set for IR modules. You could argue that abstract modules shouldn't have its data named "metadata", but I think for consistency between the levels of modules/images it makes sense to call it metadata.

Metadata is also the terminology used in the design document and they all live in the same "metadata byte table", so mixing the terminology might make that concept confusing too.

StringRef RawIRBytes;
};
struct NativeDeviceCodeImage {
std::unique_ptr<llvm::util::PropertySetRegistry> Metadata;
StringRef RawDeviceCodeImageBytes;
};

struct AbstractModule {
std::unique_ptr<llvm::util::PropertySetRegistry> Metadata;
SmallVector<IRModule> IRModules;
SmallVector<NativeDeviceCodeImage> NativeDeviceCodeImages;
};

uint32_t Version;
std::unique_ptr<llvm::util::PropertySetRegistry> GlobalMetadata;
SmallVector<AbstractModule, 4> AbstractModules;

private:
MemoryBufferRef Data;

struct alignas(8) FileHeaderType {
uint32_t Magic;
uint32_t Version;
uint32_t AbstractModuleCount;
uint32_t IRModuleCount;
uint32_t NativeDeviceCodeImageCount;
uint64_t MetadataByteTableSize;
uint64_t BinaryByteTableSize;
uint64_t GlobalMetadataOffset;
uint64_t GlobalMetadataSize;
};

struct alignas(8) AbstractModuleHeaderType {
uint64_t MetadataOffset;
uint64_t MetadataSize;
uint32_t IRModuleCount;
uint32_t IRModuleOffset;
uint32_t NativeDeviceCodeImageCount;
uint32_t NativeDeviceCodeImageOffset;
};

struct alignas(8) IRModuleHeaderType {
uint64_t MetadataOffset;
uint64_t MetadataSize;
uint64_t RawIRBytesOffset;
uint64_t RawIRBytesSize;
};

struct alignas(8) NativeDeviceCodeImageHeaderType {
uint64_t MetadataOffset;
uint64_t MetadataSize;
uint64_t BinaryBytesOffset;
uint64_t BinaryBytesSize;
};
};

} // namespace object

} // namespace llvm

#endif
2 changes: 1 addition & 1 deletion llvm/include/llvm/SYCLPostLink/ModuleSplitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/SYCLLowerIR/SYCLDeviceRequirements.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/PropertySetIO.h"
Expand All @@ -29,7 +30,6 @@
namespace llvm {

class Function;
class Module;

namespace cl {
class OptionCategory;
Expand Down
9 changes: 9 additions & 0 deletions llvm/include/llvm/Support/PropertySetIO.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ class PropertyValue {
}
}

const char *data() const { return reinterpret_cast<const char *>(&Val); }

private:
template <typename T> T &getValueRef();
void copy(const PropertyValue &P);
Expand Down Expand Up @@ -219,6 +221,13 @@ class PropertySetRegistry {
static constexpr char PROPERTY_REQD_WORK_GROUP_SIZE[] =
"reqd_work_group_size_uint64_t";

// SYCLBIN specific property sets.
static constexpr char SYCLBIN_GLOBAL_METADATA[] = "SYCLBIN/global metadata";
static constexpr char SYCLBIN_IR_MODULE_METADATA[] =
"SYCLBIN/ir module metadata";
static constexpr char SYCLBIN_NATIVE_DEVICE_CODE_IMAGE_METADATA[] =
"SYCLBIN/native device code image metadata";

/// Function for bulk addition of an entire property set in the given
/// \p Category .
template <typename MapTy> void add(StringRef Category, const MapTy &Props) {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Object/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ add_llvm_component_library(LLVMObject
OffloadBinary.cpp
RecordStreamer.cpp
RelocationResolver.cpp
SYCLBIN.cpp
SymbolicFile.cpp
SymbolSize.cpp
TapiFile.cpp
Expand Down
Loading