diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index ab5a63f990170..f3e99b27f346c 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -326,13 +326,13 @@ void context_impl::removeAssociatedDeviceGlobal(const void *DeviceGlobalPtr) { } void context_impl::addDeviceGlobalInitializer( - ur_program_handle_t Program, const std::vector &Devs, + ur_program_handle_t Program, devices_range Devs, const RTDeviceBinaryImage *BinImage) { if (BinImage->getDeviceGlobals().empty()) return; std::lock_guard Lock(MDeviceGlobalInitializersMutex); - for (const device &Dev : Devs) { - auto Key = std::make_pair(Program, getSyclObjImpl(Dev)->getHandleRef()); + for (device_impl &Dev : Devs) { + auto Key = std::make_pair(Program, Dev.getHandleRef()); auto [Iter, Inserted] = MDeviceGlobalInitializers.emplace(Key, BinImage); if (Inserted && !Iter->second.MDeviceGlobalsFullyInitialized) ++MDeviceGlobalNotInitializedCnt; diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index 4be6ee9ac60f8..0e3f13afaae8a 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -218,7 +218,7 @@ class context_impl : public std::enable_shared_from_this { /// Adds a device global initializer. void addDeviceGlobalInitializer(ur_program_handle_t Program, - const std::vector &Devs, + devices_range Devs, const RTDeviceBinaryImage *BinImage); /// Initializes device globals for a program on the associated queue. diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index f4db11352e390..d797cbc3b2a7c 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -252,12 +252,12 @@ class device_image_impl using SpecConstMapT = std::map>; device_image_impl(const RTDeviceBinaryImage *BinImage, context Context, - std::vector Devices, bundle_state State, + devices_range Devices, bundle_state State, std::shared_ptr> KernelIDs, ur_program_handle_t Program, uint8_t Origins, private_tag) : MBinImage(BinImage), MContext(std::move(Context)), - MDevices(std::move(Devices)), MState(State), MProgram(Program), - MKernelIDs(std::move(KernelIDs)), + MDevices(Devices.to>()), MState(State), + MProgram(Program), MKernelIDs(std::move(KernelIDs)), MSpecConstsDefValBlob(getSpecConstsDefValBlob()), MOrigins(Origins) { updateSpecConstSymMap(); if (BinImage && (MOrigins & ImageOriginSYCLBIN)) { @@ -281,7 +281,7 @@ class device_image_impl device_image_impl( const RTDeviceBinaryImage *BinImage, const context &Context, - std::vector &&Devices, bundle_state State, + devices_range Devices, bundle_state State, std::shared_ptr> KernelIDs, ur_program_handle_t Program, const SpecConstMapT &SpecConstMap, const std::vector &SpecConstsBlob, uint8_t Origins, @@ -290,8 +290,9 @@ class device_image_impl KernelNameToArgMaskMap &&EliminatedKernelArgMasks, std::unique_ptr &&MergedImageStorage, private_tag) : MBinImage(BinImage), MContext(std::move(Context)), - MDevices(std::move(Devices)), MState(State), MProgram(Program), - MKernelIDs(std::move(KernelIDs)), MKernelNames{std::move(KernelNames)}, + MDevices(Devices.to>()), MState(State), + MProgram(Program), MKernelIDs(std::move(KernelIDs)), + MKernelNames{std::move(KernelNames)}, MEliminatedKernelArgMasks{std::move(EliminatedKernelArgMasks)}, MSpecConstsBlob(SpecConstsBlob), MSpecConstsDefValBlob(getSpecConstsDefValBlob()), @@ -300,13 +301,14 @@ class device_image_impl MMergedImageStorage(std::move(MergedImageStorage)) {} device_image_impl(const RTDeviceBinaryImage *BinImage, const context &Context, - const std::vector &Devices, bundle_state State, + devices_range Devices, bundle_state State, ur_program_handle_t Program, syclex::source_language Lang, KernelNameSetT &&KernelNames, KernelNameToArgMaskMap &&EliminatedKernelArgMasks, private_tag) : MBinImage(BinImage), MContext(std::move(Context)), - MDevices(std::move(Devices)), MState(State), MProgram(Program), + MDevices(Devices.to>()), MState(State), + MProgram(Program), MKernelIDs(std::make_shared>()), MKernelNames{std::move(KernelNames)}, MEliminatedKernelArgMasks{std::move(EliminatedKernelArgMasks)}, @@ -318,15 +320,16 @@ class device_image_impl device_image_impl( const RTDeviceBinaryImage *BinImage, const context &Context, - const std::vector &Devices, bundle_state State, + devices_range Devices, bundle_state State, std::shared_ptr> &&KernelIDs, syclex::source_language Lang, KernelNameSetT &&KernelNames, MangledKernelNameMapT &&MangledKernelNames, std::string &&Prefix, std::shared_ptr &&DeviceGlobalRegistry, private_tag) : MBinImage(BinImage), MContext(std::move(Context)), - MDevices(std::move(Devices)), MState(State), MProgram(nullptr), - MKernelIDs(std::move(KernelIDs)), MKernelNames{std::move(KernelNames)}, + MDevices(Devices.to>()), MState(State), + MProgram(nullptr), MKernelIDs(std::move(KernelIDs)), + MKernelNames{std::move(KernelNames)}, MSpecConstsDefValBlob(getSpecConstsDefValBlob()), MOrigins(ImageOriginKernelCompiler), MRTCBinInfo(KernelCompilerBinaryInfo{ @@ -336,12 +339,11 @@ class device_image_impl } device_image_impl(const std::string &Src, context Context, - const std::vector &Devices, - syclex::source_language Lang, + devices_range Devices, syclex::source_language Lang, include_pairs_t &&IncludePairsVec, private_tag) : MBinImage(Src), MContext(std::move(Context)), - MDevices(std::move(Devices)), MState(bundle_state::ext_oneapi_source), - MProgram(nullptr), + MDevices(Devices.to>()), + MState(bundle_state::ext_oneapi_source), MProgram(nullptr), MKernelIDs(std::make_shared>()), MSpecConstsDefValBlob(getSpecConstsDefValBlob()), MOrigins(ImageOriginKernelCompiler), @@ -351,11 +353,11 @@ class device_image_impl } device_image_impl(const std::vector &Bytes, const context &Context, - const std::vector &Devices, - syclex::source_language Lang, private_tag) + devices_range Devices, syclex::source_language Lang, + private_tag) : MBinImage(Bytes), MContext(std::move(Context)), - MDevices(std::move(Devices)), MState(bundle_state::ext_oneapi_source), - MProgram(nullptr), + MDevices(Devices.to>()), + MState(bundle_state::ext_oneapi_source), MProgram(nullptr), MKernelIDs(std::make_shared>()), MSpecConstsDefValBlob(getSpecConstsDefValBlob()), MOrigins(ImageOriginKernelCompiler), @@ -363,13 +365,14 @@ class device_image_impl updateSpecConstSymMap(); } - device_image_impl(const context &Context, const std::vector &Devices, + device_image_impl(const context &Context, devices_range Devices, bundle_state State, ur_program_handle_t Program, syclex::source_language Lang, KernelNameSetT &&KernelNames, private_tag) : MBinImage(static_cast(nullptr)), - MContext(std::move(Context)), MDevices(std::move(Devices)), - MState(State), MProgram(Program), + MContext(std::move(Context)), + MDevices(Devices.to>()), MState(State), + MProgram(Program), MKernelIDs(std::make_shared>()), MKernelNames{std::move(KernelNames)}, MSpecConstsDefValBlob(getSpecConstsDefValBlob()), @@ -391,8 +394,8 @@ class device_image_impl const device &DeviceCand) const noexcept { // If the device is in the device list and the kernel ID is in the kernel // bundle, return true. - for (const device &Device : MDevices) - if (Device == DeviceCand) + for (device_impl &Device : get_devices()) + if (&Device == &*getSyclObjImpl(DeviceCand)) return has_kernel(KernelIDCand); // Otherwise, if the device candidate is a sub-device it is also valid if @@ -541,12 +544,13 @@ class device_image_impl void set_state(bundle_state NewState) noexcept { MState = NewState; } - const std::vector &get_devices() const noexcept { return MDevices; } + devices_range get_devices() const noexcept { return MDevices; } bool compatible_with_device(const device &Dev) const { - return std::any_of( - MDevices.begin(), MDevices.end(), - [&Dev](const device &DevCand) { return Dev == DevCand; }); + return std::any_of(MDevices.begin(), MDevices.end(), + [Dev = &*getSyclObjImpl(Dev)](device_impl *DevCand) { + return Dev == DevCand; + }); } const ur_program_handle_t &get_ur_program_ref() const noexcept { @@ -703,7 +707,7 @@ class device_image_impl } std::vector> buildFromSource( - const std::vector &Devices, + devices_range Devices, const std::vector &BuildOptions, std::string *LogPtr, const std::vector &RegisteredKernelNames, @@ -715,13 +719,12 @@ class device_image_impl sycl::detail::context_impl &ContextImpl = *getSyclObjImpl(MContext); - for (const auto &SyclDev : Devices) { - device_impl &DevImpl = *getSyclObjImpl(SyclDev); - if (!ContextImpl.hasDevice(DevImpl)) { + for (device_impl &Dev : Devices) { + if (!ContextImpl.hasDevice(Dev)) { throw sycl::exception(make_error_code(errc::invalid), "device not part of kernel_bundle context"); } - if (!DevImpl.extOneapiCanBuild(MRTCBinInfo->MLanguage)) { + if (!Dev.extOneapiCanBuild(MRTCBinInfo->MLanguage)) { // This error cannot not be exercised in the current implementation, as // compatibility with a source language depends on the backend's // capabilities and all devices in one context share the same backend in @@ -736,10 +739,7 @@ class device_image_impl return createSYCLImages(Devices, bundle_state::executable, BuildOptions, LogPtr, RegisteredKernelNames, OutDeviceBins); - std::vector DeviceVec; - DeviceVec.reserve(Devices.size()); - for (const auto &SyclDev : Devices) - DeviceVec.push_back(getSyclObjImpl(SyclDev)->getHandleRef()); + auto DeviceVec = Devices.to>(); ur_program_handle_t UrProgram = nullptr; // SourceStrPtr will be null when source is Spir-V bytes. @@ -788,7 +788,7 @@ class device_image_impl } std::vector> compileFromSource( - const std::vector &Devices, + devices_range Devices, const std::vector &CompileOptions, std::string *LogPtr, const std::vector &RegisteredKernelNames, @@ -806,13 +806,12 @@ class device_image_impl sycl::detail::context_impl &ContextImpl = *getSyclObjImpl(MContext); - for (const auto &SyclDev : Devices) { - detail::device_impl &DevImpl = *getSyclObjImpl(SyclDev); - if (!ContextImpl.hasDevice(DevImpl)) { + for (device_impl &Dev : Devices) { + if (!ContextImpl.hasDevice(Dev)) { throw sycl::exception(make_error_code(errc::invalid), "device not part of kernel_bundle context"); } - if (!DevImpl.extOneapiCanCompile(MRTCBinInfo->MLanguage)) { + if (!Dev.extOneapiCanCompile(MRTCBinInfo->MLanguage)) { // This error cannot not be exercised in the current implementation, as // compatibility with a source language depends on the backend's // capabilities and all devices in one context share the same backend in @@ -877,7 +876,7 @@ class device_image_impl } bool extKernelCompilerFetchFromCache( - const std::vector Devices, + devices_range Devices, const std::vector &BuildOptions, const std::string &SourceStr, ur_program_handle_t &UrProgram) const { sycl::detail::context_impl &ContextImpl = *getSyclObjImpl(MContext); @@ -885,10 +884,7 @@ class device_image_impl std::string UserArgs = syclex::detail::userArgsAsString(BuildOptions); - std::vector DeviceHandles; - std::transform( - Devices.begin(), Devices.end(), std::back_inserter(DeviceHandles), - [](const device &Dev) { return getSyclObjImpl(Dev)->getHandleRef(); }); + auto DeviceHandles = Devices.to>(); std::vector Binaries; std::vector Lengths; @@ -991,7 +987,7 @@ class device_image_impl } std::vector> createSYCLImages( - const std::vector &Devices, bundle_state State, + devices_range Devices, bundle_state State, const std::vector &Options, std::string *LogPtr, const std::vector &RegisteredKernelNames, @@ -1055,13 +1051,12 @@ class device_image_impl const RTDeviceBinaryImage &NewImageRef = *NewImage; // Filter the devices that support the image requirements. - std::vector SupportingDevs = Devices; - auto NewSupportingDevsEnd = - std::remove_if(SupportingDevs.begin(), SupportingDevs.end(), - [&NewImageRef](const sycl::device &SDev) { - return !doesDevSupportDeviceRequirements( - *detail::getSyclObjImpl(SDev), NewImageRef); - }); + auto SupportingDevs = Devices.to>(); + auto NewSupportingDevsEnd = std::remove_if( + SupportingDevs.begin(), SupportingDevs.end(), + [&NewImageRef](device_impl *Dev) { + return !doesDevSupportDeviceRequirements(*Dev, NewImageRef); + }); // If there are no devices that support the image, we skip it. if (NewSupportingDevsEnd == SupportingDevs.begin()) @@ -1145,7 +1140,7 @@ class device_image_impl // Mark the image as input so the program manager will bring it into // the right state. auto DevImgImpl = device_image_impl::create( - NewImage, MContext, std::move(SupportingDevs), bundle_state::input, + NewImage, MContext, SupportingDevs, bundle_state::input, std::move(KernelIDs), MRTCBinInfo->MLanguage, std::move(KernelNames), std::move(MangledKernelNames), std::string{Prefix}, std::move(DGRegs)); @@ -1156,9 +1151,9 @@ class device_image_impl // TODO: Consider making a collectDeviceImageDeps variant that takes a // set reference and inserts into that instead. std::set ImgDeps; - for (const device &Device : DevImgImpl->get_devices()) { + for (device_impl &Device : DevImgImpl->get_devices()) { std::set DevImgDeps = - PM.collectDeviceImageDeps(*NewImage, *getSyclObjImpl(Device), + PM.collectDeviceImageDeps(*NewImage, Device, /*ErrorOnUnresolvableImport=*/State == bundle_state::executable); ImgDeps.insert(DevImgDeps.begin(), DevImgDeps.end()); @@ -1170,7 +1165,7 @@ class device_image_impl 1 + ((State == bundle_state::executable) * ImgDeps.size())); NewImageAndDeps.push_back( createSyclObjFromImpl(std::move(DevImgImpl))); - const std::vector &SupportingDevsRef = + devices_range SupportingDevsRef = getSyclObjImpl(NewImageAndDeps[0])->get_devices(); if (State == bundle_state::executable) { // If target is executable we bundle the image and dependencies together @@ -1200,7 +1195,7 @@ class device_image_impl } ur_program_handle_t - createProgramFromSource(const std::vector Devices, + createProgramFromSource(devices_range Devices, const std::vector &Options, std::string *LogPtr) const { sycl::detail::context_impl &ContextImpl = *getSyclObjImpl(MContext); @@ -1213,12 +1208,11 @@ class device_image_impl const auto &SourceStr = std::get(MBinImage); std::vector IPVersionVec(Devices.size()); std::transform(Devices.begin(), Devices.end(), IPVersionVec.begin(), - [&](const sycl::device &SyclDev) { + [&](device_impl &Dev) { uint32_t ipVersion = 0; Adapter.call( - getSyclObjImpl(SyclDev)->getHandleRef(), - UR_DEVICE_INFO_IP_VERSION, sizeof(uint32_t), - &ipVersion, nullptr); + Dev.getHandleRef(), UR_DEVICE_INFO_IP_VERSION, + sizeof(uint32_t), &ipVersion, nullptr); return ipVersion; }); return syclex::detail::OpenCLC_to_SPIRV(SourceStr, IPVersionVec, @@ -1273,7 +1267,7 @@ class device_image_impl const RTDeviceBinaryImage *, SYCLBINBinaries> MBinImage = static_cast(nullptr); context MContext; - std::vector MDevices; + std::vector MDevices; bundle_state MState; // Native program handler which this device image represents ur_program_handle_t MProgram = nullptr; diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 9c9a198a1f4d0..1b86d170267db 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -2297,7 +2297,9 @@ struct devices_deref_impl { using devices_iterator = variadic_iterator>::const_iterator, - std::vector::const_iterator, device_impl *>; + std::vector::const_iterator, + std::vector::const_iterator, + device_impl *>; class devices_range : public iterator_range { private: @@ -2305,8 +2307,21 @@ class devices_range : public iterator_range { public: using Base::Base; - devices_range(const device &Dev) - : devices_range(&*getSyclObjImpl(Dev), (&*getSyclObjImpl(Dev) + 1), 1) {} + template + decltype(std::declval().to()) to() const { + return this->Base::to(); + } + + template + std::enable_if_t>, + Container> + to() const { + std::vector DeviceHandles; + DeviceHandles.reserve(size()); + std::transform(begin(), end(), std::back_inserter(DeviceHandles), + [](device_impl &Dev) { return Dev.getHandleRef(); }); + return DeviceHandles; + } }; #ifndef __INTEL_PREVIEW_BREAKING_CHANGES diff --git a/sycl/source/detail/helpers.hpp b/sycl/source/detail/helpers.hpp index d79440a912677..3f002bf41b8e2 100644 --- a/sycl/source/detail/helpers.hpp +++ b/sycl/source/detail/helpers.hpp @@ -102,10 +102,19 @@ template class iterator_range { iterator_range(IterTy Begin, IterTy End, size_t Size) : Begin(Begin), End(End), Size(Size) {} + iterator_range() + : iterator_range(static_cast(nullptr), + static_cast(nullptr), 0) {} + template iterator_range(const ContainerTy &Container) : iterator_range(Container.begin(), Container.end(), Container.size()) {} + iterator_range(value_type &Obj) : iterator_range(&Obj, &Obj + 1, 1) {} + + iterator_range(const sycl_type &Obj) + : iterator_range(&*getSyclObjImpl(Obj), (&*getSyclObjImpl(Obj) + 1), 1) {} + iterator begin() const { return Begin; } iterator end() const { return End; } size_t size() const { return Size; } diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 2e1ddfe5e018d..e01ff15b4b8d4 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -1138,6 +1138,29 @@ class kernel_bundle_impl DeviceGlobalMap MDeviceGlobals{/*OwnerControlledCleanup=*/false}; }; +inline bool is_compatible(const std::vector &KernelIDs, + device_impl &Dev) { + if (KernelIDs.empty()) + return true; + // One kernel may be contained in several binary images depending on the + // number of targets. This kernel is compatible with the device if there is + // at least one image (containing this kernel) whose aspects are supported by + // the device and whose target matches the device. + for (const auto &KernelID : KernelIDs) { + std::set BinImages = + detail::ProgramManager::getInstance().getRawDeviceImages({KernelID}); + + if (std::none_of(BinImages.begin(), BinImages.end(), + [&](const detail::RTDeviceBinaryImage *Img) { + return doesDevSupportDeviceRequirements(Dev, *Img) && + doesImageTargetMatchDevice(*Img, Dev); + })) + return false; + } + + return true; +} + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 059c170df4bb1..a952532542fe2 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -71,15 +71,14 @@ ProgramManager &ProgramManager::getInstance() { } static ur_program_handle_t -createBinaryProgram(context_impl &Context, const std::vector &Devices, +createBinaryProgram(context_impl &Context, devices_range Devices, const uint8_t **Binaries, size_t *Lengths, const std::vector &Metadata) { + assert(!Devices.empty() && "No devices provided for program creation"); + adapter_impl &Adapter = Context.getAdapter(); ur_program_handle_t Program; - std::vector DeviceHandles; - std::transform( - Devices.begin(), Devices.end(), std::back_inserter(DeviceHandles), - [](const device &Dev) { return getSyclObjImpl(Dev)->getHandleRef(); }); + auto DeviceHandles = Devices.to>(); ur_result_t BinaryStatus = UR_RESULT_SUCCESS; ur_program_properties_t Properties = {}; Properties.stype = UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES; @@ -87,7 +86,6 @@ createBinaryProgram(context_impl &Context, const std::vector &Devices, Properties.count = Metadata.size(); Properties.pMetadatas = Metadata.data(); - assert(Devices.size() > 0 && "No devices provided for program creation"); Adapter.call( Context.getHandleRef(), DeviceHandles.size(), DeviceHandles.data(), Lengths, Binaries, &Properties, &Program); @@ -188,12 +186,9 @@ static bool isDeviceBinaryTypeSupported(context_impl &ContextImpl, ur_program_handle_t ProgramManager::createURProgram(const RTDeviceBinaryImage &Img, context_impl &ContextImpl, - const std::vector &Devices) { + devices_range Devices) { if constexpr (DbgProgMgr > 0) { - std::vector URDevices; - std::transform( - Devices.begin(), Devices.end(), std::back_inserter(URDevices), - [](const device &Dev) { return getSyclObjImpl(Dev)->getHandleRef(); }); + auto URDevices = Devices.to>(); std::cerr << ">>> ProgramManager::createPIProgram(" << &Img << ", " << ContextImpl.get() << ", " << VecToString(URDevices) << ")\n"; } @@ -336,8 +331,7 @@ appendCompileOptionsForGRFSizeProperties(std::string &CompileOpts, static void appendCompileOptionsFromImage(std::string &CompileOpts, const RTDeviceBinaryImage &Img, - const std::vector &Devs, - adapter_impl &) { + devices_range Devs, adapter_impl &) { // Build options are overridden if environment variables are present. // Environment variables are not changed during program lifecycle so it // is reasonable to use static here to read them only once. @@ -367,8 +361,7 @@ static void appendCompileOptionsFromImage(std::string &CompileOpts, appendCompileOptionsForGRFSizeProperties(CompileOpts, Img, isEsimdImage); - const platform_impl &PlatformImpl = - detail::getSyclObjImpl(Devs[0])->getPlatformImpl(); + const platform_impl &PlatformImpl = Devs.front().getPlatformImpl(); // Add optimization flags. auto str = getUint32PropAsOptStr(Img, "optLevel"); @@ -381,8 +374,8 @@ static void appendCompileOptionsFromImage(std::string &CompileOpts, optLevelStr[0] != '\0') { // Making sure all devices have the same platform. assert(!Devs.empty() && - std::all_of(Devs.begin(), Devs.end(), [&](const device &Dev) { - return Dev.get_platform() == Devs[0].get_platform(); + std::all_of(Devs.begin(), Devs.end(), [&](device_impl &Dev) { + return &Dev.getPlatformImpl() == &PlatformImpl; })); const char *backend_option = nullptr; // Empty string is returned in backend_option when no appropriate backend @@ -397,7 +390,7 @@ static void appendCompileOptionsFromImage(std::string &CompileOpts, bool IsIntelGPU = (PlatformImpl.getBackend() == backend::ext_oneapi_level_zero || PlatformImpl.getBackend() == backend::opencl) && - std::all_of(Devs.begin(), Devs.end(), [](const device &Dev) { + std::all_of(Devs.begin(), Devs.end(), [](device_impl &Dev) { return Dev.is_gpu() && Dev.get_info() == 0x8086; }); @@ -427,7 +420,7 @@ static void appendCompileOptionsFromImage(std::string &CompileOpts, auto Device = OptValue.substr(0, ColonPos); std::string BackendStrToAdd; bool IsPVC = - std::all_of(Devs.begin(), Devs.end(), [&](const device &Dev) { + std::all_of(Devs.begin(), Devs.end(), [&](device_impl &Dev) { return IsIntelGPU && (Dev.get_info() & 0xFF00) == 0x0B00; @@ -480,7 +473,7 @@ static void appendLinkEnvironmentVariablesThatAppend(std::string &LinkOpts) { static void applyOptionsFromImage(std::string &CompileOpts, std::string &LinkOpts, const RTDeviceBinaryImage &Img, - const std::vector &Devices, + devices_range Devices, adapter_impl &Adapter) { appendCompileOptionsFromImage(CompileOpts, Img, Devices, Adapter); appendLinkOptionsFromImage(LinkOpts, Img); @@ -515,7 +508,7 @@ static void applyOptionsFromEnvironment(std::string &CompileOpts, std::pair ProgramManager::getOrCreateURProgram( const RTDeviceBinaryImage &MainImg, const std::vector &AllImages, - context_impl &ContextImpl, const std::vector &Devices, + context_impl &ContextImpl, devices_range Devices, const std::string &CompileAndLinkOptions, SerializedObj SpecConsts) { ur_program_handle_t NativePrg; @@ -909,13 +902,14 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( std::back_inserter(AllImages)); return getBuiltURProgram(std::move(AllImages), ContextImpl, - {createSyclObjFromImpl(RootOrSubDevImpl)}); + {RootOrSubDevImpl}); } -ur_program_handle_t ProgramManager::getBuiltURProgram( - const BinImgWithDeps &ImgWithDeps, context_impl &ContextImpl, - const std::vector &Devs, const DevImgPlainWithDeps *DevImgWithDeps, - const SerializedObj &SpecConsts) { +ur_program_handle_t +ProgramManager::getBuiltURProgram(const BinImgWithDeps &ImgWithDeps, + context_impl &ContextImpl, devices_range Devs, + const DevImgPlainWithDeps *DevImgWithDeps, + const SerializedObj &SpecConsts) { std::string CompileOpts; std::string LinkOpts; applyOptionsFromEnvironment(CompileOpts, LinkOpts); @@ -983,9 +977,7 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( } } - std::vector URDevices; - for (auto &Dev : Devs) - URDevices.push_back(getSyclObjImpl(Dev).get()->getHandleRef()); + auto URDevices = Devs.to>(); ProgramPtr BuiltProgram = build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts, @@ -1030,9 +1022,7 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( std::set URDevicesSet; std::transform(Devs.begin(), Devs.end(), std::inserter(URDevicesSet, URDevicesSet.begin()), - [](const device &Dev) { - return getSyclObjImpl(Dev).get()->getHandleRef(); - }); + [](device_impl &Dev) { return Dev.getHandleRef(); }); auto CacheKey = std::make_pair(std::make_pair(SpecConsts, ImgId), URDevicesSet); @@ -2503,17 +2493,17 @@ device_image_plain ProgramManager::getDeviceImageFromBinaryImage( KernelIDs = m_BinImg2KernelIDs[BinImage]; } - DeviceImageImplPtr Impl = device_image_impl::create( - BinImage, Ctx, std::vector{Dev}, ImgState, KernelIDs, - /*PIProgram=*/nullptr, ImageOriginSYCLOffline); + DeviceImageImplPtr Impl = + device_image_impl::create(BinImage, Ctx, Dev, ImgState, KernelIDs, + /*PIProgram=*/nullptr, ImageOriginSYCLOffline); return createSyclObjFromImpl(std::move(Impl)); } std::vector ProgramManager::getSYCLDeviceImagesWithCompatibleState( - const context &Ctx, const std::vector &Devs, - bundle_state TargetState, const std::vector &KernelIDs) { + const context &Ctx, devices_range Devs, bundle_state TargetState, + const std::vector &KernelIDs) { // Collect unique raw device images taking into account kernel ids passed // TODO: Can we avoid repacking? @@ -2521,8 +2511,8 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( if (!KernelIDs.empty()) { for (const auto &KID : KernelIDs) { bool isCompatibleWithAtLeastOneDev = - std::any_of(Devs.begin(), Devs.end(), [&KID](const auto &Dev) { - return sycl::is_compatible({KID}, Dev); + std::any_of(Devs.begin(), Devs.end(), [&KID](device_impl &Dev) { + return detail::is_compatible({KID}, Dev); }); if (!isCompatibleWithAtLeastOneDev) throw sycl::exception( @@ -2568,9 +2558,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( std::unordered_map ImageInfoMap; - for (const sycl::device &Dev : Devs) { - - device_impl &DevImpl = *getSyclObjImpl(Dev); + for (device_impl &Dev : Devs) { // Track the highest image state for each requested kernel. using StateImagesPairT = std::pair>; @@ -2582,8 +2570,8 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( KernelImageMap.insert({KernelID, {}}); for (const RTDeviceBinaryImage *BinImage : BinImages) { - if (!compatibleWithDevice(BinImage, DevImpl) || - !doesDevSupportDeviceRequirements(DevImpl, *BinImage)) + if (!compatibleWithDevice(BinImage, Dev) || + !doesDevSupportDeviceRequirements(Dev, *BinImage)) continue; auto InsertRes = ImageInfoMap.insert({BinImage, {}}); @@ -2596,7 +2584,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage]; } ImgInfo.Deps = - collectDeviceImageDeps(*BinImage, {DevImpl}, + collectDeviceImageDeps(*BinImage, Dev, /*ErrorOnUnresolvableImport=*/TargetState == bundle_state::executable); } @@ -2686,9 +2674,10 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( return SYCLDeviceImages; } -device_image_plain ProgramManager::createDependencyImage( - const context &Ctx, const std::vector &Devs, - const RTDeviceBinaryImage *DepImage, bundle_state DepState) { +device_image_plain +ProgramManager::createDependencyImage(const context &Ctx, devices_range Devs, + const RTDeviceBinaryImage *DepImage, + bundle_state DepState) { std::shared_ptr> DepKernelIDs; { std::lock_guard KernelIDsGuard(m_KernelIDsMutex); @@ -2773,8 +2762,7 @@ void ProgramManager::bringSYCLDeviceImagesToState( } std::vector -ProgramManager::getSYCLDeviceImages(const context &Ctx, - const std::vector &Devs, +ProgramManager::getSYCLDeviceImages(const context &Ctx, devices_range Devs, bundle_state TargetState) { // Collect device images with compatible state std::vector DeviceImages = @@ -2784,9 +2772,10 @@ ProgramManager::getSYCLDeviceImages(const context &Ctx, return DeviceImages; } -std::vector ProgramManager::getSYCLDeviceImages( - const context &Ctx, const std::vector &Devs, - const DevImgSelectorImpl &Selector, bundle_state TargetState) { +std::vector +ProgramManager::getSYCLDeviceImages(const context &Ctx, devices_range Devs, + const DevImgSelectorImpl &Selector, + bundle_state TargetState) { // Collect device images with compatible state std::vector DeviceImages = getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState); @@ -2806,9 +2795,10 @@ std::vector ProgramManager::getSYCLDeviceImages( return DeviceImages; } -std::vector ProgramManager::getSYCLDeviceImages( - const context &Ctx, const std::vector &Devs, - const std::vector &KernelIDs, bundle_state TargetState) { +std::vector +ProgramManager::getSYCLDeviceImages(const context &Ctx, devices_range Devs, + const std::vector &KernelIDs, + bundle_state TargetState) { // Fast path for when no kernel IDs are requested if (KernelIDs.empty()) return {}; @@ -2836,8 +2826,7 @@ std::vector ProgramManager::getSYCLDeviceImages( DevImgPlainWithDeps ProgramManager::compile(const DevImgPlainWithDeps &ImgWithDeps, - const std::vector &Devs, - const property_list &PropList) { + devices_range Devs, const property_list &PropList) { { auto NoAllowedPropertiesCheck = [](int) { return false; }; detail::PropertyValidator::checkPropsAndThrow( @@ -2850,10 +2839,8 @@ ProgramManager::compile(const DevImgPlainWithDeps &ImgWithDeps, // TODO: Probably we could have cached compiled device images. // TODO: Handle zero sized Device list. - std::vector URDevices; - URDevices.reserve(Devs.size()); - for (const device &Dev : Devs) - URDevices.push_back(getSyclObjImpl(Dev)->getHandleRef()); + + auto URDevices = Devs.to>(); std::vector CompiledImages; CompiledImages.reserve(ImgWithDeps.size()); @@ -2877,9 +2864,8 @@ ProgramManager::compile(const DevImgPlainWithDeps &ImgWithDeps, std::optional RTCInfo = InputImpl.getRTCInfo(); DeviceImageImplPtr ObjectImpl = device_image_impl::create( - InputImpl.get_bin_image_ref(), InputImpl.get_context(), - std::vector{Devs}, bundle_state::object, - InputImpl.get_kernel_ids_ptr(), Prog, + InputImpl.get_bin_image_ref(), InputImpl.get_context(), Devs, + bundle_state::object, InputImpl.get_kernel_ids_ptr(), Prog, InputImpl.get_spec_const_data_ref(), InputImpl.get_spec_const_blob_ref(), InputImpl.getOriginMask(), std::move(RTCInfo), std::move(KernelNames), @@ -2975,8 +2961,7 @@ mergeImageData(const std::vector &Imgs, std::vector ProgramManager::link(const std::vector &Imgs, - const std::vector &Devs, - const property_list &PropList) { + devices_range Devs, const property_list &PropList) { { auto NoAllowedPropertiesCheck = [](int) { return false; }; detail::PropertyValidator::checkPropsAndThrow( @@ -2988,10 +2973,8 @@ ProgramManager::link(const std::vector &Imgs, for (const device_image_plain &Img : Imgs) URPrograms.push_back(getSyclObjImpl(Img)->get_ur_program_ref()); - std::vector URDevices; - URDevices.reserve(Devs.size()); - for (const device &Dev : Devs) - URDevices.push_back(getSyclObjImpl(Dev)->getHandleRef()); + auto URDevices = Devs.to>(); + // FIXME: Linker options are picked from the first object, but is that safe? std::string LinkOptionsStr; applyLinkOptionsFromEnvironment(LinkOptionsStr); @@ -3082,11 +3065,10 @@ ProgramManager::link(const std::vector &Imgs, auto MergedRTCInfo = detail::KernelCompilerBinaryInfo::Merge(RTCInfoPtrs); DeviceImageImplPtr ExecutableImpl = device_image_impl::create( - NewBinImg, Context, std::vector{Devs}, bundle_state::executable, - std::move(KernelIDs), LinkedProg, std::move(NewSpecConstMap), - std::move(NewSpecConstBlob), CombinedOrigins, std::move(MergedRTCInfo), - std::move(MergedKernelNames), std::move(MergedEliminatedKernelArgMasks), - std::move(MergedImageStorage)); + NewBinImg, Context, Devs, bundle_state::executable, std::move(KernelIDs), + LinkedProg, std::move(NewSpecConstMap), std::move(NewSpecConstBlob), + CombinedOrigins, std::move(MergedRTCInfo), std::move(MergedKernelNames), + std::move(MergedEliminatedKernelArgMasks), std::move(MergedImageStorage)); // TODO: Make multiple sets of device images organized by devices they are // compiled for. @@ -3099,8 +3081,7 @@ ProgramManager::link(const std::vector &Imgs, // Supports caching of a program built for multiple devices device_image_plain ProgramManager::build(const DevImgPlainWithDeps &DevImgWithDeps, - const std::vector &Devs, - const property_list &PropList) { + devices_range Devs, const property_list &PropList) { { auto NoAllowedPropertiesCheck = [](int) { return false; }; detail::PropertyValidator::checkPropsAndThrow( @@ -3166,11 +3147,11 @@ ProgramManager::build(const DevImgPlainWithDeps &DevImgWithDeps, auto MergedRTCInfo = detail::KernelCompilerBinaryInfo::Merge(RTCInfoPtrs); DeviceImageImplPtr ExecImpl = device_image_impl::create( - ResultBinImg, Context, std::vector{Devs}, - bundle_state::executable, std::move(KernelIDs), ResProgram, - std::move(SpecConstMap), std::move(SpecConstBlob), CombinedOrigins, - std::move(MergedRTCInfo), std::move(MergedKernelNames), - std::move(MergedEliminatedKernelArgMasks), std::move(MergedImageStorage)); + ResultBinImg, Context, Devs, bundle_state::executable, + std::move(KernelIDs), ResProgram, std::move(SpecConstMap), + std::move(SpecConstBlob), CombinedOrigins, std::move(MergedRTCInfo), + std::move(MergedKernelNames), std::move(MergedEliminatedKernelArgMasks), + std::move(MergedImageStorage)); return createSyclObjFromImpl(std::move(ExecImpl)); } diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index bd60661949e87..1f9fd8d92c3e5 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -73,6 +73,7 @@ static constexpr uint32_t inline ITTSpecConstId = 0xFF747469; class context_impl; class device_impl; +class devices_range; class queue_impl; class event_impl; // DeviceLibExt is shared between sycl runtime and sycl-post-link tool. @@ -144,7 +145,7 @@ class ProgramManager { ur_program_handle_t createURProgram(const RTDeviceBinaryImage &Img, context_impl &ContextImpl, - const std::vector &Devices); + devices_range Devices); /// Creates a UR program using either a cached device code binary if present /// in the persistent cache or from the supplied device image otherwise. /// \param Img The device image used to create the program. @@ -167,7 +168,7 @@ class ProgramManager { std::pair getOrCreateURProgram( const RTDeviceBinaryImage &Img, const std::vector &AllImages, - context_impl &ContextImpl, const std::vector &Devices, + context_impl &ContextImpl, devices_range Devices, const std::string &CompileAndLinkOptions, SerializedObj SpecConsts); /// Builds or retrieves from cache a program defining the kernel with given /// name. @@ -193,7 +194,7 @@ class ProgramManager { /// the program should be built with. ur_program_handle_t getBuiltURProgram(const BinImgWithDeps &ImgWithDeps, - context_impl &ContextImpl, const std::vector &Devs, + context_impl &ContextImpl, devices_range Devs, const DevImgPlainWithDeps *DevImgWithDeps = nullptr, const SerializedObj &SpecConsts = {}); @@ -292,12 +293,12 @@ class ProgramManager { // The function returns a vector of SYCL device images that are compiled with // the required state and at least one device from the passed list of devices. std::vector getSYCLDeviceImagesWithCompatibleState( - const context &Ctx, const std::vector &Devs, - bundle_state TargetState, const std::vector &KernelIDs = {}); + const context &Ctx, devices_range Devs, bundle_state TargetState, + const std::vector &KernelIDs = {}); // Creates a new dependency image for a given dependency binary image. device_image_plain createDependencyImage(const context &Ctx, - const std::vector &Devs, + devices_range Devs, const RTDeviceBinaryImage *DepImage, bundle_state DepState); @@ -312,15 +313,15 @@ class ProgramManager { // The function returns a vector of SYCL device images in required state, // which are compatible with at least one of the device from Devs. - std::vector - getSYCLDeviceImages(const context &Ctx, const std::vector &Devs, - bundle_state State); + std::vector getSYCLDeviceImages(const context &Ctx, + devices_range Devs, + bundle_state State); // The function returns a vector of SYCL device images, for which Selector // callable returns true, in required state, which are compatible with at // least one of the device from Devs. std::vector - getSYCLDeviceImages(const context &Ctx, const std::vector &Devs, + getSYCLDeviceImages(const context &Ctx, devices_range Devs, const DevImgSelectorImpl &Selector, bundle_state TargetState); @@ -328,27 +329,26 @@ class ProgramManager { // least one kernel from kernel ids vector in required state, which are // compatible with at least one of the device from Devs. std::vector - getSYCLDeviceImages(const context &Ctx, const std::vector &Devs, + getSYCLDeviceImages(const context &Ctx, devices_range Devs, const std::vector &KernelIDs, bundle_state TargetState); // Produces new device image by convering input device image to the object // state DevImgPlainWithDeps compile(const DevImgPlainWithDeps &ImgWithDeps, - const std::vector &Devs, + devices_range Devs, const property_list &PropList); // Produces set of device images by convering input device images to object // the executable state std::vector - link(const std::vector &Imgs, - const std::vector &Devs, const property_list &PropList); + link(const std::vector &Imgs, devices_range Devs, + const property_list &PropList); // Produces new device image by converting input device image to the // executable state device_image_plain build(const DevImgPlainWithDeps &ImgWithDeps, - const std::vector &Devs, - const property_list &PropList); + devices_range Devs, const property_list &PropList); std::tuple getOrCreateKernel(const context &Context, KernelNameStrRefT KernelName, diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index fcc380c9a537b..d77c569c9d3dc 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -366,26 +366,7 @@ std::vector get_kernel_ids() { } bool is_compatible(const std::vector &KernelIDs, const device &Dev) { - if (KernelIDs.empty()) - return true; - // One kernel may be contained in several binary images depending on the - // number of targets. This kernel is compatible with the device if there is - // at least one image (containing this kernel) whose aspects are supported by - // the device and whose target matches the device. - detail::device_impl &DevImpl = *getSyclObjImpl(Dev); - for (const auto &KernelID : KernelIDs) { - std::set BinImages = - detail::ProgramManager::getInstance().getRawDeviceImages({KernelID}); - - if (std::none_of(BinImages.begin(), BinImages.end(), - [&](const detail::RTDeviceBinaryImage *Img) { - return doesDevSupportDeviceRequirements(DevImpl, *Img) && - doesImageTargetMatchDevice(*Img, DevImpl); - })) - return false; - } - - return true; + return detail::is_compatible(KernelIDs, *getSyclObjImpl(Dev)); } /////////////////////////