diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index 32121db8306e0..2c5903a033301 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -105,6 +105,7 @@ def Aspectext_intel_xe_cores_per_cluster : Aspect<"ext_intel_xe_cores_per_cluste def Aspectext_intel_eus_per_xe_core : Aspect<"ext_intel_eus_per_xe_core">; def Aspectext_intel_max_lanes_per_hw_thread : Aspect<"ext_intel_max_lanes_per_hw_thread">; def Aspectext_oneapi_ipc_physical_memory : Aspect<"ext_oneapi_ipc_physical_memory">; +def Aspectext_oneapi_register_host_memory : Aspect<"ext_oneapi_register_host_memory">; // Deprecated aspects def AspectInt64_base_atomics : Aspect<"int64_base_atomics">; @@ -191,7 +192,8 @@ def : TargetInfo<"__TestAspectList", Aspectext_intel_xe_cores_per_cluster, Aspectext_intel_eus_per_xe_core, Aspectext_intel_max_lanes_per_hw_thread, - Aspectext_oneapi_ipc_physical_memory], + Aspectext_oneapi_ipc_physical_memory, + Aspectext_oneapi_register_host_memory], []>; // This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT // match. diff --git a/sycl/include/sycl/ext/oneapi/experimental/register_host_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/register_host_memory.hpp new file mode 100644 index 0000000000000..85fb45b78d59c --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/experimental/register_host_memory.hpp @@ -0,0 +1,82 @@ +//==--- register_host_memory.hpp - SYCL host memory registration extension -==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include // for __SYCL_EXPORT +#include + +#include // for size_t +#include // for uint32_t +#include + +namespace sycl { +inline namespace _V1 { +class context; + +namespace ext::oneapi::experimental { + +// Indicates that device code will only read from the registered range. Device +// writes to a range registered with this property are undefined behavior. +struct read_only_key : detail::compile_time_property_key< + detail::PropKind::RegisterHostMemoryReadOnly> { + using value_t = property_value; +}; + +inline constexpr read_only_key::value_t read_only; + +namespace detail { +// Implementation-internal flags describing a host memory registration. They +// are translated to UR flags in the runtime library. +enum register_host_memory_flags : uint32_t { + register_host_memory_flag_read_only = 1u << 0, +}; + +// Non-templated implementation entry points, defined in the SYCL runtime +// library. Flags is a bitwise OR of register_host_memory_flags values. +__SYCL_EXPORT void register_host_memory(void *Ptr, size_t NumBytes, + const context &Ctxt, uint32_t Flags); +__SYCL_EXPORT void unregister_host_memory(void *Ptr, const context &Ctxt); + +// Lowers a compile-time property list to the runtime flags word. +template uint32_t getRegisterHostMemoryFlags() { + uint32_t Flags = 0; + if constexpr (std::decay_t::template has_property< + read_only_key>()) + Flags |= register_host_memory_flag_read_only; + return Flags; +} +} // namespace detail + +/// Registers the existing host memory range \p ptr of \p numBytes bytes with +/// \p ctxt so that it behaves like a USM host allocation. See +/// sycl_ext_oneapi_register_host_memory for the full semantics. +/// +/// \p ptr and \p numBytes must both be aligned to the host page size, \p ptr +/// must not be null, \p numBytes must not be zero, and every device in \p ctxt +/// must have aspect::ext_oneapi_register_host_memory. +template +std::enable_if_t>> +register_host_memory(void *ptr, size_t numBytes, const context &ctxt, + Properties props = {}) { + (void)props; + detail::register_host_memory( + ptr, numBytes, ctxt, detail::getRegisterHostMemoryFlags()); +} + +/// Unregisters a host memory range previously registered with +/// register_host_memory. \p ptr must be the exact base pointer that was passed +/// to register_host_memory with the same \p ctxt, and the registration must +/// still be in effect. This does not free or unmap the underlying host memory. +inline void unregister_host_memory(void *ptr, const context &ctxt) { + detail::unregister_host_memory(ptr, ctxt); +} + +} // namespace ext::oneapi::experimental +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/ext/oneapi/properties/property.hpp b/sycl/include/sycl/ext/oneapi/properties/property.hpp index 3f6dde389b795..edb4e79be86a5 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property.hpp @@ -193,8 +193,9 @@ enum PropKind : uint32_t { ZeroInit = 48, FastLink = 49, PhysicalMemoryEnableIPC = 50, + RegisterHostMemoryReadOnly = 51, // PropKindSize must always be the last value. - PropKindSize = 51, + PropKindSize = 52, }; template struct PropertyToKind { diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def index 69ff4e02ee761..960493719e83f 100644 --- a/sycl/include/sycl/info/aspects.def +++ b/sycl/include/sycl/info/aspects.def @@ -91,3 +91,4 @@ __SYCL_ASPECT(ext_intel_xe_cores_per_cluster, 93) __SYCL_ASPECT(ext_intel_eus_per_xe_core, 94) __SYCL_ASPECT(ext_intel_max_lanes_per_hw_thread, 95) __SYCL_ASPECT(ext_oneapi_ipc_physical_memory, 96) +__SYCL_ASPECT(ext_oneapi_register_host_memory, 97) diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index bf3cd66cc102a..664fecf9f1184 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -127,6 +127,7 @@ can be disabled by setting SYCL_DISABLE_FSYCL_SYCLHPP_WARNING macro.") #include #include #include +#include #include #include #include diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 8ccd666880158..280a6d053c788 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -1391,6 +1391,11 @@ class device_impl { UR_DEVICE_INFO_EXTERNAL_MEMORY_IMPORT_SUPPORT_EXP>() .value_or(0); } + CASE(ext_oneapi_register_host_memory) { + return get_info_impl_nocheck< + UR_DEVICE_INFO_USM_HOST_ALLOC_REGISTER_SUPPORT_EXP>() + .value_or(0); + } CASE(ext_oneapi_external_semaphore_import) { return get_info_impl_nocheck< UR_DEVICE_INFO_EXTERNAL_SEMAPHORE_IMPORT_SUPPORT_EXP>() diff --git a/sycl/source/detail/ur_device_info_ret_types.inc b/sycl/source/detail/ur_device_info_ret_types.inc index b09099e1721d2..5dd0829a2584e 100644 --- a/sycl/source/detail/ur_device_info_ret_types.inc +++ b/sycl/source/detail/ur_device_info_ret_types.inc @@ -209,4 +209,5 @@ MAP(UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP, ur_bool_t) MAP(UR_DEVICE_INFO_IS_INTEGRATED_GPU, ur_bool_t) MAP(UR_DEVICE_INFO_DEVICE_WAIT_SUPPORT_EXP, ur_bool_t) MAP(UR_DEVICE_INFO_IPC_PHYSICAL_MEMORY_SUPPORT_EXP, ur_bool_t) +MAP(UR_DEVICE_INFO_USM_HOST_ALLOC_REGISTER_SUPPORT_EXP, ur_bool_t) // clang-format on diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp index d4ae709903748..0d688c51d7e45 100644 --- a/sycl/source/detail/usm/usm_impl.cpp +++ b/sycl/source/detail/usm/usm_impl.cpp @@ -14,14 +14,23 @@ #include #include #include +#include #include #include #include #include +#include #include +#include #include +#ifdef _WIN32 +#include +#else +#include +#endif + #ifdef XPTI_ENABLE_INSTRUMENTATION // Include the headers necessary for emitting // traces using the trace framework @@ -609,6 +618,105 @@ void release_from_device_copy(const void *Ptr, const queue &Queue) { release_from_usm_device_copy(Ptr, Queue.get_context()); } +// Host memory registration APIs, see sycl_ext_oneapi_register_host_memory. + +namespace detail { + +// Throws errc::feature_not_supported unless every device in the context +// reports aspect::ext_oneapi_register_host_memory. +static void checkRegisterHostMemorySupport(const context &Ctxt) { + detail::context_impl &CtxtImpl = *detail::getSyclObjImpl(Ctxt); + for (detail::device_impl &Dev : CtxtImpl.getDevices()) { + if (!Dev.has(aspect::ext_oneapi_register_host_memory)) + throw sycl::exception( + make_error_code(errc::feature_not_supported), + "At least one device in the context does not support registering " + "host memory (aspect::ext_oneapi_register_host_memory)."); + } +} + +// Maps a failed UR result from the host memory registration APIs to a +// sycl::exception with the error code mandated by the extension specification. +// Invalid argument conditions map to errc::invalid; anything else is a backend +// error. +static void throwRegisterHostMemoryError(ur_result_t Err, const char *What) { + errc Code; + switch (Err) { + case UR_RESULT_ERROR_INVALID_NULL_POINTER: + case UR_RESULT_ERROR_INVALID_VALUE: + case UR_RESULT_ERROR_INVALID_ARGUMENT: + Code = errc::invalid; + break; + default: + Code = errc::runtime; + break; + } + throw detail::set_ur_error(sycl::exception(make_error_code(Code), What), Err); +} + +void register_host_memory(void *Ptr, size_t NumBytes, const context &Ctxt, + uint32_t Flags) { + if (Ptr == nullptr) + throw sycl::exception(make_error_code(errc::invalid), + "register_host_memory: pointer must not be null."); + if (NumBytes == 0) + throw sycl::exception(make_error_code(errc::invalid), + "register_host_memory: size must not be zero."); + static const size_t PageSize = []() { +#ifdef _WIN32 + SYSTEM_INFO Info; + GetSystemInfo(&Info); + return static_cast(Info.dwPageSize); +#else + return static_cast(sysconf(_SC_PAGESIZE)); +#endif + }(); + if (reinterpret_cast(Ptr) % PageSize != 0) + throw sycl::exception( + make_error_code(errc::invalid), + "register_host_memory: pointer must be aligned to the host page size."); + if (NumBytes % PageSize != 0) + throw sycl::exception( + make_error_code(errc::invalid), + "register_host_memory: size must be a multiple of the host page size."); + if (NumBytes > + std::numeric_limits::max() - reinterpret_cast(Ptr)) + throw sycl::exception(make_error_code(errc::invalid), + "register_host_memory: range is not representable in " + "the host address space."); + checkRegisterHostMemorySupport(Ctxt); + + ur_exp_usm_host_alloc_register_properties_t Props = { + UR_STRUCTURE_TYPE_EXP_USM_HOST_ALLOC_REGISTER_PROPERTIES, + /*pNext=*/nullptr, + /*flags=*/0}; + if (Flags & register_host_memory_flag_read_only) + Props.flags |= UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_READ_ONLY; + + auto [urCtx, Adapter] = get_ur_handles(Ctxt); + ur_result_t Err = + Adapter->call_nocheck( + urCtx, Ptr, NumBytes, &Props); + if (Err != UR_RESULT_SUCCESS) + throwRegisterHostMemoryError(Err, "register_host_memory failed."); +} + +void unregister_host_memory(void *Ptr, const context &Ctxt) { + if (Ptr == nullptr) + throw sycl::exception(make_error_code(errc::invalid), + "unregister_host_memory: pointer must not be null."); + checkRegisterHostMemorySupport(Ctxt); + + auto [urCtx, Adapter] = get_ur_handles(Ctxt); + ur_result_t Err = + Adapter->call_nocheck( + urCtx, Ptr); + if (Err != UR_RESULT_SUCCESS) + throwRegisterHostMemoryError(Err, "unregister_host_memory failed."); +} + +} // namespace detail + void *malloc_device(size_t numBytes, const device &syclDevice, const property_list &propList) { sycl::context ctxt = syclDevice.get_platform().khr_get_default_context(); diff --git a/sycl/source/feature_test.hpp.in b/sycl/source/feature_test.hpp.in index 35db9c3a97c89..364423910ae26 100644 --- a/sycl/source/feature_test.hpp.in +++ b/sycl/source/feature_test.hpp.in @@ -77,6 +77,7 @@ inline namespace _V1 { #define SYCL_EXT_ONEAPI_KERNEL_ARG_PROPERTIES 1 #define SYCL_EXT_ONEAPI_ANNOTATED_PTR 1 #define SYCL_EXT_ONEAPI_COPY_OPTIMIZE 1 +#define SYCL_EXT_ONEAPI_REGISTER_HOST_MEMORY 1 #define SYCL_EXT_ONEAPI_VIRTUAL_MEM 1 #define SYCL_EXT_ONEAPI_USM_MALLOC_PROPERTIES 1 #define SYCL_EXT_INTEL_CACHE_CONFIG 1 diff --git a/sycl/test-e2e/USM/register_host_memory.cpp b/sycl/test-e2e/USM/register_host_memory.cpp new file mode 100644 index 0000000000000..4f0ca89a16697 --- /dev/null +++ b/sycl/test-e2e/USM/register_host_memory.cpp @@ -0,0 +1,177 @@ +// REQUIRES: aspect-ext_oneapi_register_host_memory +// REQUIRES: level_zero_v2_adapter + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// End-to-end test for sycl_ext_oneapi_register_host_memory. Registers a +// page-aligned host allocation and exercises: +// - using the registered pointer directly in device code, +// - explicit USM copies to and from the registered memory, +// - get_pointer_type reporting usm::alloc::host while registered, +// - error handling for null pointer and zero size, +// - registering with the read_only property and reading from it in device +// code (device writes to a read_only range are undefined behavior and are +// therefore not exercised). + +#include +#include +#include + +#include +#include +#include + +#if defined(_WIN32) +#include +#include +#else +#include +#endif + +namespace syclexp = sycl::ext::oneapi::experimental; + +static size_t getHostPageSize() { +#if defined(_WIN32) + SYSTEM_INFO Info; + GetSystemInfo(&Info); + return static_cast(Info.dwPageSize); +#else + return static_cast(sysconf(_SC_PAGESIZE)); +#endif +} + +static void *allocatePageAligned(size_t Alignment, size_t Size) { +#if defined(_WIN32) + return _aligned_malloc(Size, Alignment); +#else + return aligned_alloc(Alignment, Size); +#endif +} + +static void freePageAligned(void *Ptr) { +#if defined(_WIN32) + _aligned_free(Ptr); +#else + free(Ptr); +#endif +} + +int main() { + sycl::queue Q; + sycl::context Ctxt = Q.get_context(); + + const size_t PageSize = getHostPageSize(); + const size_t NumElems = 1024; + // Round the byte size up to a multiple of the page size as required. + size_t NumBytes = NumElems * sizeof(int); + NumBytes = (NumBytes + PageSize - 1) & ~(PageSize - 1); + + int *Data = static_cast(allocatePageAligned(PageSize, NumBytes)); + assert(Data != nullptr && "host allocation failed"); + + // Error handling: null pointer and zero size must throw errc::invalid. + { + bool Threw = false; + try { + syclexp::register_host_memory(nullptr, NumBytes, Ctxt); + } catch (const sycl::exception &E) { + Threw = (E.code() == sycl::errc::invalid); + } + assert(Threw && "null pointer should throw errc::invalid"); + + Threw = false; + try { + syclexp::register_host_memory(Data, 0, Ctxt); + } catch (const sycl::exception &E) { + Threw = (E.code() == sycl::errc::invalid); + } + assert(Threw && "zero size should throw errc::invalid"); + + // An unaligned pointer must throw errc::invalid. + Threw = false; + try { + syclexp::register_host_memory(reinterpret_cast(Data) + 64, + NumBytes, Ctxt); + } catch (const sycl::exception &E) { + Threw = (E.code() == sycl::errc::invalid); + } + assert(Threw && "unaligned pointer should throw errc::invalid"); + + // A size that is not a multiple of the page size must throw errc::invalid. + Threw = false; + try { + syclexp::register_host_memory(Data, NumBytes + 1, Ctxt); + } catch (const sycl::exception &E) { + Threw = (E.code() == sycl::errc::invalid); + } + assert(Threw && "unaligned size should throw errc::invalid"); + + // A range whose end address would overflow the host address space must + // throw errc::invalid. + Threw = false; + void *TopPage = + reinterpret_cast(static_cast(-1) & ~(PageSize - 1)); + try { + syclexp::register_host_memory(TopPage, PageSize, Ctxt); + } catch (const sycl::exception &E) { + Threw = (E.code() == sycl::errc::invalid); + } + assert(Threw && "non-representable range should throw errc::invalid"); + } + + syclexp::register_host_memory(Data, NumBytes, Ctxt); + + // While registered, the pointer behaves like a USM host allocation. + assert(sycl::get_pointer_type(Data, Ctxt) == sycl::usm::alloc::host); + // Interior pointers are also reported as host allocations. + assert(sycl::get_pointer_type(Data + 1, Ctxt) == sycl::usm::alloc::host); + + // The registered pointer can be referenced directly from device code. + Q.parallel_for(NumElems, [=](sycl::id<1> I) { + Data[I] = static_cast(I.get(0)) * 2; + }).wait(); + for (size_t I = 0; I < NumElems; ++I) + assert(Data[I] == static_cast(I) * 2); + + // Explicit copies to and from the registered memory. + std::vector HostSrc(NumElems); + for (size_t I = 0; I < NumElems; ++I) + HostSrc[I] = static_cast(I) + 7; + + // Copy from unregistered host memory into the registered range. + Q.memcpy(Data, HostSrc.data(), NumElems * sizeof(int)).wait(); + for (size_t I = 0; I < NumElems; ++I) + assert(Data[I] == static_cast(I) + 7); + + // Copy from the registered range back out to plain host memory. + std::vector HostDst(NumElems, 0); + Q.memcpy(HostDst.data(), Data, NumElems * sizeof(int)).wait(); + for (size_t I = 0; I < NumElems; ++I) + assert(HostDst[I] == static_cast(I) + 7); + + syclexp::unregister_host_memory(Data, Ctxt); + + // Register the same range with the read_only property and have device code + // read (but never write) it, writing results to a separate allocation. + for (size_t I = 0; I < NumElems; ++I) + Data[I] = static_cast(I) + 1; + + syclexp::register_host_memory(Data, NumBytes, Ctxt, + syclexp::properties{syclexp::read_only}); + assert(sycl::get_pointer_type(Data, Ctxt) == sycl::usm::alloc::host); + + int *Out = sycl::malloc_host(NumElems, Q); + assert(Out != nullptr && "host allocation failed"); + Q.parallel_for(NumElems, [=](sycl::id<1> I) { Out[I] = Data[I] * 2; }).wait(); + for (size_t I = 0; I < NumElems; ++I) + assert(Out[I] == (static_cast(I) + 1) * 2); + + sycl::free(Out, Q); + syclexp::unregister_host_memory(Data, Ctxt); + + // The application still owns and must free the host memory. + freePageAligned(Data); + + return 0; +} diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 6f63c8165f746..f311bbb0e49ee 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3100,6 +3100,7 @@ _ZN4sycl3_V13ext6oneapi12experimental6detail14image_mem_implD1Ev _ZN4sycl3_V13ext6oneapi12experimental6detail14image_mem_implD2Ev _ZN4sycl3_V13ext6oneapi12experimental6detail17build_from_sourceERNS0_13kernel_bundleILNS0_12bundle_stateE3EEERKSt6vectorINS0_6deviceESaISA_EERKS9_INS0_6detail11string_viewESaISG_EEPNSF_6stringESK_ _ZN4sycl3_V13ext6oneapi12experimental6detail19compile_from_sourceERNS0_13kernel_bundleILNS0_12bundle_stateE3EEERKSt6vectorINS0_6deviceESaISA_EERKS9_INS0_6detail11string_viewESaISG_EEPNSF_6stringESK_ +_ZN4sycl3_V13ext6oneapi12experimental6detail20register_host_memoryEPvmRKNS0_7contextEj _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_base11updateValueEPKNS3_14raw_kernel_argEm _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_base11updateValueEPKvm _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_base14updateAccessorEPKNS0_6detail16AccessorBaseHostE @@ -3109,6 +3110,7 @@ _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC1Ev _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC2ERKSt10shared_ptrINS4_22dynamic_parameter_implEE _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC2EmPKv _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC2Ev +_ZN4sycl3_V13ext6oneapi12experimental6detail22unregister_host_memoryEPvRKNS0_7contextE _ZN4sycl3_V13ext6oneapi12experimental6detail24executable_command_graph12finalizeImplEv _ZN4sycl3_V13ext6oneapi12experimental6detail24executable_command_graph6updateERKNS3_13command_graphILNS3_11graph_stateE0EEE _ZN4sycl3_V13ext6oneapi12experimental6detail24executable_command_graph6updateERKNS3_4nodeE diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 078c703d1d3aa..90e2803f77200 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -4302,6 +4302,7 @@ ?reduGetMaxWGSize@detail@_V1@sycl@@YA_KAEAVhandler@23@_K@Z ?reduGetPreferredWGSize@detail@_V1@sycl@@YA_KAEAVhandler@23@_K@Z ?registerDynamicParameter@handler@_V1@sycl@@AEAAXPEAVdynamic_parameter_impl@detail@experimental@oneapi@ext@23@H@Z +?register_host_memory@detail@experimental@oneapi@ext@_V1@sycl@@YAXPEAX_KAEBVcontext@56@I@Z ?release_external_memory@experimental@oneapi@ext@_V1@sycl@@YAXUexternal_mem@12345@AEBVdevice@45@AEBVcontext@45@@Z ?release_external_memory@experimental@oneapi@ext@_V1@sycl@@YAXUexternal_mem@12345@AEBVqueue@45@@Z ?release_external_semaphore@experimental@oneapi@ext@_V1@sycl@@YAXUexternal_semaphore@12345@AEBVdevice@45@AEBVcontext@45@@Z @@ -4386,6 +4387,7 @@ ?unmap@experimental@oneapi@ext@_V1@sycl@@YAXPEBX_KAEBVcontext@45@@Z ?unmap_external_image_memory@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@W4image_type@12345@AEBVdevice@45@AEBVcontext@45@@Z ?unmap_external_linear_memory@experimental@oneapi@ext@_V1@sycl@@YAXPEAXAEBVdevice@45@AEBVcontext@45@@Z +?unregister_host_memory@detail@experimental@oneapi@ext@_V1@sycl@@YAXPEAXAEBVcontext@56@@Z ?unsampledImageConstructorNotification@detail@_V1@sycl@@YAXPEAX0AEBV?$optional@W4image_target@_V1@sycl@@@std@@W4mode@access@23@PEBXIAEBUcode_location@123@@Z ?unsampledImageConstructorNotification@image_plain@detail@_V1@sycl@@IEAAXAEBUcode_location@234@PEAXPEBXIQEA_KW4image_format@34@@Z ?unsampledImageDestructorNotification@image_plain@detail@_V1@sycl@@IEAAXPEAX@Z diff --git a/sycl/unittests/Extensions/CMakeLists.txt b/sycl/unittests/Extensions/CMakeLists.txt index 63730d56ed088..9102a5e45668a 100644 --- a/sycl/unittests/Extensions/CMakeLists.txt +++ b/sycl/unittests/Extensions/CMakeLists.txt @@ -25,6 +25,7 @@ add_sycl_unittest(ExtensionsTests OBJECT DeviceInfo.cpp RootGroup.cpp USMPrefetch.cpp + RegisterHostMemory.cpp ) add_subdirectory(BindlessImages) diff --git a/sycl/unittests/Extensions/RegisterHostMemory.cpp b/sycl/unittests/Extensions/RegisterHostMemory.cpp new file mode 100644 index 0000000000000..af255e1c75c00 --- /dev/null +++ b/sycl/unittests/Extensions/RegisterHostMemory.cpp @@ -0,0 +1,236 @@ +//==------------------------ RegisterHostMemory.cpp ------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Test that sycl_ext_oneapi_register_host_memory validates its arguments, +// honors the device aspect, and calls the UR host memory registration APIs +// with the correct arguments. + +#include + +#include +#include +#include +#include +#include + +#include + +#ifdef _WIN32 +#include +#else +#include +#endif + +using namespace sycl; +namespace syclexp = sycl::ext::oneapi::experimental; + +namespace { + +static size_t getHostPageSize() { +#ifdef _WIN32 + SYSTEM_INFO Info; + GetSystemInfo(&Info); + return static_cast(Info.dwPageSize); +#else + return static_cast(sysconf(_SC_PAGESIZE)); +#endif +} + +// Whether the mock device should advertise support for host memory +// registration via aspect::ext_oneapi_register_host_memory. +thread_local bool DeviceSupportsRegister = true; + +// Captured arguments of the most recent UR register/unregister call. +thread_local void *LastRegisterPtr = nullptr; +thread_local size_t LastRegisterSize = 0; +thread_local void *LastUnregisterPtr = nullptr; +thread_local int RegisterCallCount = 0; +thread_local int UnregisterCallCount = 0; + +// Registration flags captured from the most recent register call. +thread_local ur_exp_usm_host_alloc_register_flags_t LastRegisterFlags = 0; + +// Result code the register/unregister mock should return, to exercise the +// UR-result-to-errc mapping in the runtime. +thread_local ur_result_t RegisterResult = UR_RESULT_SUCCESS; +thread_local ur_result_t UnregisterResult = UR_RESULT_SUCCESS; + +ur_result_t redefinedDeviceGetInfo(void *pParams) { + auto Params = *static_cast(pParams); + if (*Params.ppropName == UR_DEVICE_INFO_USM_HOST_ALLOC_REGISTER_SUPPORT_EXP) { + if (*Params.ppPropValue) + *static_cast(*Params.ppPropValue) = DeviceSupportsRegister; + if (*Params.ppPropSizeRet) + **Params.ppPropSizeRet = sizeof(ur_bool_t); + return UR_RESULT_SUCCESS; + } + return sycl::unittest::MockAdapter::mock_urDeviceGetInfo(pParams); +} + +ur_result_t redefinedHostAllocRegister(void *pParams) { + auto Params = + *static_cast(pParams); + LastRegisterPtr = *Params.ppHostMem; + LastRegisterSize = *Params.psize; + LastRegisterFlags = *Params.ppProperties ? (*Params.ppProperties)->flags : 0; + ++RegisterCallCount; + return RegisterResult; +} + +ur_result_t redefinedHostAllocUnregister(void *pParams) { + auto Params = + *static_cast(pParams); + LastUnregisterPtr = *Params.ppHostMem; + ++UnregisterCallCount; + return UnregisterResult; +} + +class RegisterHostMemoryTests : public ::testing::Test { +public: + RegisterHostMemoryTests() : Mock{}, Ctxt{platform().get_devices()[0]} {} + +protected: + void SetUp() override { + DeviceSupportsRegister = true; + LastRegisterPtr = nullptr; + LastRegisterSize = 0; + LastUnregisterPtr = nullptr; + RegisterCallCount = 0; + UnregisterCallCount = 0; + RegisterResult = UR_RESULT_SUCCESS; + UnregisterResult = UR_RESULT_SUCCESS; + LastRegisterFlags = 0; + mock::getCallbacks().set_replace_callback("urDeviceGetInfo", + &redefinedDeviceGetInfo); + mock::getCallbacks().set_replace_callback("urUSMHostAllocRegisterExp", + &redefinedHostAllocRegister); + mock::getCallbacks().set_replace_callback("urUSMHostAllocUnregisterExp", + &redefinedHostAllocUnregister); + } + + unittest::UrMock<> Mock; + context Ctxt; +}; + +// A successful registration forwards the exact pointer and size to UR and a +// matching unregistration forwards the same pointer. +TEST_F(RegisterHostMemoryTests, RegisterAndUnregisterForwardArguments) { + const size_t PageSize = getHostPageSize(); + void *Ptr = detail::OSUtil::alignedAlloc(PageSize, PageSize); + ASSERT_NE(Ptr, nullptr); + + syclexp::register_host_memory(Ptr, PageSize, Ctxt); + EXPECT_EQ(RegisterCallCount, 1); + EXPECT_EQ(LastRegisterPtr, Ptr); + EXPECT_EQ(LastRegisterSize, PageSize); + // No properties passed: no registration flags should be set. + EXPECT_EQ(LastRegisterFlags, 0u); + + syclexp::unregister_host_memory(Ptr, Ctxt); + EXPECT_EQ(UnregisterCallCount, 1); + EXPECT_EQ(LastUnregisterPtr, Ptr); + + detail::OSUtil::alignedFree(Ptr); +} + +// The read_only property is lowered to the UR read-only registration flag. +TEST_F(RegisterHostMemoryTests, ReadOnlyPropertyLowersToFlag) { + const size_t PageSize = getHostPageSize(); + void *Ptr = detail::OSUtil::alignedAlloc(PageSize, PageSize); + ASSERT_NE(Ptr, nullptr); + + syclexp::register_host_memory(Ptr, PageSize, Ctxt, + syclexp::properties{syclexp::read_only}); + EXPECT_EQ(RegisterCallCount, 1); + EXPECT_TRUE(LastRegisterFlags & + UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_READ_ONLY); + + syclexp::unregister_host_memory(Ptr, Ctxt); + EXPECT_EQ(UnregisterCallCount, 1); + + detail::OSUtil::alignedFree(Ptr); +} + +// A null pointer is rejected with errc::invalid before reaching UR. +TEST_F(RegisterHostMemoryTests, NullPointerThrowsInvalid) { + try { + syclexp::register_host_memory(nullptr, 4096, Ctxt); + FAIL() << "Expected an exception."; + } catch (const sycl::exception &E) { + EXPECT_EQ(E.code(), make_error_code(errc::invalid)); + } + EXPECT_EQ(RegisterCallCount, 0); +} + +// A zero size is rejected with errc::invalid before reaching UR. +TEST_F(RegisterHostMemoryTests, ZeroSizeThrowsInvalid) { + int Storage = 0; + try { + syclexp::register_host_memory(&Storage, 0, Ctxt); + FAIL() << "Expected an exception."; + } catch (const sycl::exception &E) { + EXPECT_EQ(E.code(), make_error_code(errc::invalid)); + } + EXPECT_EQ(RegisterCallCount, 0); +} + +// Unregistering a null pointer is rejected with errc::invalid. +TEST_F(RegisterHostMemoryTests, UnregisterNullThrowsInvalid) { + try { + syclexp::unregister_host_memory(nullptr, Ctxt); + FAIL() << "Expected an exception."; + } catch (const sycl::exception &E) { + EXPECT_EQ(E.code(), make_error_code(errc::invalid)); + } + EXPECT_EQ(UnregisterCallCount, 0); +} + +// When no device in the context supports the feature, registration throws +// errc::feature_not_supported and does not reach UR. +TEST_F(RegisterHostMemoryTests, UnsupportedDeviceThrowsFeatureNotSupported) { + DeviceSupportsRegister = false; + const size_t PageSize = getHostPageSize(); + void *Ptr = detail::OSUtil::alignedAlloc(PageSize, PageSize); + ASSERT_NE(Ptr, nullptr); + try { + syclexp::register_host_memory(Ptr, PageSize, Ctxt); + FAIL() << "Expected an exception."; + } catch (const sycl::exception &E) { + EXPECT_EQ(E.code(), make_error_code(errc::feature_not_supported)); + } + EXPECT_EQ(RegisterCallCount, 0); + detail::OSUtil::alignedFree(Ptr); +} + +// The runtime maps a UR INVALID_VALUE result from either registration API to +// errc::invalid. The result is injected via the mock to test the mapping in +// isolation. +TEST_F(RegisterHostMemoryTests, BackendInvalidValueMapsToInvalid) { + const size_t PageSize = getHostPageSize(); + void *Ptr = detail::OSUtil::alignedAlloc(PageSize, PageSize); + ASSERT_NE(Ptr, nullptr); + + RegisterResult = UR_RESULT_ERROR_INVALID_VALUE; + try { + syclexp::register_host_memory(Ptr, PageSize, Ctxt); + FAIL() << "Expected an exception."; + } catch (const sycl::exception &E) { + EXPECT_EQ(E.code(), make_error_code(errc::invalid)); + } + + UnregisterResult = UR_RESULT_ERROR_INVALID_VALUE; + try { + syclexp::unregister_host_memory(Ptr, Ctxt); + FAIL() << "Expected an exception."; + } catch (const sycl::exception &E) { + EXPECT_EQ(E.code(), make_error_code(errc::invalid)); + } + + detail::OSUtil::alignedFree(Ptr); +} + +} // namespace