Skip to content

Commit f89d360

Browse files
committed
[SYCL] Implement register_host_memory extension API
- Add the sycl::ext::oneapi::experimental::register_host_memory and unregister_host_memory free functions defined by sycl_ext_oneapi_register_host_memory. - Implement read_only property. - Add a unit tests to verify argument forwarding to the UR host memory registration APIs. - Add e2e test that registers a page-aligned host allocation and checks that the pointer can be used in a kernel, that explicit copies to and from it work etc. Assisted-By: Claude
1 parent a34863f commit f89d360

13 files changed

Lines changed: 561 additions & 1 deletion

File tree

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
//==--- register_host_memory.hpp - SYCL host memory registration extension -==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include <sycl/detail/export.hpp> // for __SYCL_EXPORT
12+
#include <sycl/ext/oneapi/properties.hpp>
13+
14+
#include <cstddef> // for size_t
15+
#include <cstdint> // for uint32_t
16+
#include <type_traits>
17+
18+
namespace sycl {
19+
inline namespace _V1 {
20+
class context;
21+
22+
namespace ext::oneapi::experimental {
23+
24+
// Indicates that device code will only read from the registered range. Device
25+
// writes to a range registered with this property are undefined behavior.
26+
struct read_only_key
27+
: detail::compile_time_property_key<detail::PropKind::
28+
RegisterHostMemoryReadOnly> {
29+
using value_t = property_value<read_only_key>;
30+
};
31+
32+
inline constexpr read_only_key::value_t read_only;
33+
34+
namespace detail {
35+
// Implementation-internal flags describing a host memory registration. They
36+
// are translated to UR flags in the runtime library.
37+
enum register_host_memory_flags : uint32_t {
38+
register_host_memory_flag_read_only = 1u << 0,
39+
};
40+
41+
// Non-templated implementation entry points, defined in the SYCL runtime
42+
// library. Flags is a bitwise OR of register_host_memory_flags values.
43+
__SYCL_EXPORT void register_host_memory(void *Ptr, size_t NumBytes,
44+
const context &Ctxt, uint32_t Flags);
45+
__SYCL_EXPORT void unregister_host_memory(void *Ptr, const context &Ctxt);
46+
47+
// Lowers a compile-time property list to the runtime flags word.
48+
template <typename Properties> uint32_t getRegisterHostMemoryFlags() {
49+
uint32_t Flags = 0;
50+
if constexpr (std::decay_t<Properties>::template has_property<read_only_key>())
51+
Flags |= register_host_memory_flag_read_only;
52+
return Flags;
53+
}
54+
} // namespace detail
55+
56+
/// Registers the existing host memory range \p ptr of \p numBytes bytes with
57+
/// \p ctxt so that it behaves like a USM host allocation. See
58+
/// sycl_ext_oneapi_register_host_memory for the full semantics.
59+
///
60+
/// \p ptr and \p numBytes must both be aligned to the host page size, \p ptr
61+
/// must not be null, \p numBytes must not be zero, and every device in \p ctxt
62+
/// must have aspect::ext_oneapi_register_host_memory.
63+
template <typename Properties = empty_properties_t>
64+
std::enable_if_t<is_property_list_v<std::decay_t<Properties>>>
65+
register_host_memory(void *ptr, size_t numBytes, const context &ctxt,
66+
Properties props = {}) {
67+
(void)props;
68+
detail::register_host_memory(
69+
ptr, numBytes, ctxt, detail::getRegisterHostMemoryFlags<Properties>());
70+
}
71+
72+
/// Unregisters a host memory range previously registered with
73+
/// register_host_memory. \p ptr must be the exact base pointer that was passed
74+
/// to register_host_memory with the same \p ctxt, and the registration must
75+
/// still be in effect. This does not free or unmap the underlying host memory.
76+
inline void unregister_host_memory(void *ptr, const context &ctxt) {
77+
detail::unregister_host_memory(ptr, ctxt);
78+
}
79+
80+
} // namespace ext::oneapi::experimental
81+
} // namespace _V1
82+
} // namespace sycl

sycl/include/sycl/ext/oneapi/properties/property.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,8 +193,9 @@ enum PropKind : uint32_t {
193193
ZeroInit = 48,
194194
FastLink = 49,
195195
PhysicalMemoryEnableIPC = 50,
196+
RegisterHostMemoryReadOnly = 51,
196197
// PropKindSize must always be the last value.
197-
PropKindSize = 51,
198+
PropKindSize = 52,
198199
};
199200

200201
template <typename PropertyT> struct PropertyToKind {

sycl/include/sycl/info/aspects.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,4 @@ __SYCL_ASPECT(ext_intel_xe_cores_per_cluster, 93)
9191
__SYCL_ASPECT(ext_intel_eus_per_xe_core, 94)
9292
__SYCL_ASPECT(ext_intel_max_lanes_per_hw_thread, 95)
9393
__SYCL_ASPECT(ext_oneapi_ipc_physical_memory, 96)
94+
__SYCL_ASPECT(ext_oneapi_register_host_memory, 97)

sycl/include/sycl/sycl.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ can be disabled by setting SYCL_DISABLE_FSYCL_SYCLHPP_WARNING macro.")
126126
#include <sycl/ext/oneapi/experimental/prefetch.hpp>
127127
#include <sycl/ext/oneapi/experimental/profiling_tag.hpp>
128128
#include <sycl/ext/oneapi/experimental/raw_kernel_arg.hpp>
129+
#include <sycl/ext/oneapi/experimental/register_host_memory.hpp>
129130
#include <sycl/ext/oneapi/experimental/reduction_properties.hpp>
130131
#include <sycl/ext/oneapi/experimental/root_group.hpp>
131132
#include <sycl/ext/oneapi/experimental/syclbin_kernel_bundle.hpp>

sycl/source/detail/device_impl.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1391,6 +1391,11 @@ class device_impl {
13911391
UR_DEVICE_INFO_EXTERNAL_MEMORY_IMPORT_SUPPORT_EXP>()
13921392
.value_or(0);
13931393
}
1394+
CASE(ext_oneapi_register_host_memory) {
1395+
return get_info_impl_nocheck<
1396+
UR_DEVICE_INFO_USM_HOST_ALLOC_REGISTER_SUPPORT_EXP>()
1397+
.value_or(0);
1398+
}
13941399
CASE(ext_oneapi_external_semaphore_import) {
13951400
return get_info_impl_nocheck<
13961401
UR_DEVICE_INFO_EXTERNAL_SEMAPHORE_IMPORT_SUPPORT_EXP>()

sycl/source/detail/ur_device_info_ret_types.inc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,4 +209,5 @@ MAP(UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP, ur_bool_t)
209209
MAP(UR_DEVICE_INFO_IS_INTEGRATED_GPU, ur_bool_t)
210210
MAP(UR_DEVICE_INFO_DEVICE_WAIT_SUPPORT_EXP, ur_bool_t)
211211
MAP(UR_DEVICE_INFO_IPC_PHYSICAL_MEMORY_SUPPORT_EXP, ur_bool_t)
212+
MAP(UR_DEVICE_INFO_USM_HOST_ALLOC_REGISTER_SUPPORT_EXP, ur_bool_t)
212213
// clang-format on

sycl/source/detail/usm/usm_impl.cpp

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <sycl/detail/ur.hpp>
1515
#include <sycl/device.hpp>
1616
#include <sycl/ext/intel/experimental/usm_properties.hpp>
17+
#include <sycl/ext/oneapi/experimental/register_host_memory.hpp>
1718
#include <sycl/ext/oneapi/memcpy2d.hpp>
1819
#include <sycl/usm.hpp>
1920

@@ -609,6 +610,83 @@ void release_from_device_copy(const void *Ptr, const queue &Queue) {
609610
release_from_usm_device_copy(Ptr, Queue.get_context());
610611
}
611612

613+
// Host memory registration APIs, see sycl_ext_oneapi_register_host_memory.
614+
615+
namespace detail {
616+
617+
// Throws errc::feature_not_supported unless every device in the context
618+
// reports aspect::ext_oneapi_register_host_memory.
619+
static void checkRegisterHostMemorySupport(const context &Ctxt) {
620+
detail::context_impl &CtxtImpl = *detail::getSyclObjImpl(Ctxt);
621+
for (detail::device_impl &Dev : CtxtImpl.getDevices()) {
622+
if (!Dev.has(aspect::ext_oneapi_register_host_memory))
623+
throw sycl::exception(
624+
make_error_code(errc::feature_not_supported),
625+
"At least one device in the context does not support registering "
626+
"host memory (aspect::ext_oneapi_register_host_memory).");
627+
}
628+
}
629+
630+
// Maps a failed UR result from the host memory registration APIs to a
631+
// sycl::exception with the error code mandated by the extension specification.
632+
// Invalid argument conditions map to errc::invalid; anything else is a backend
633+
// error.
634+
static void throwRegisterHostMemoryError(ur_result_t Err, const char *What) {
635+
errc Code;
636+
switch (Err) {
637+
case UR_RESULT_ERROR_INVALID_NULL_POINTER:
638+
case UR_RESULT_ERROR_INVALID_VALUE:
639+
case UR_RESULT_ERROR_INVALID_ARGUMENT:
640+
Code = errc::invalid;
641+
break;
642+
default:
643+
Code = errc::runtime;
644+
break;
645+
}
646+
throw detail::set_ur_error(sycl::exception(make_error_code(Code), What), Err);
647+
}
648+
649+
void register_host_memory(void *Ptr, size_t NumBytes, const context &Ctxt,
650+
uint32_t Flags) {
651+
if (Ptr == nullptr)
652+
throw sycl::exception(make_error_code(errc::invalid),
653+
"register_host_memory: pointer must not be null.");
654+
if (NumBytes == 0)
655+
throw sycl::exception(make_error_code(errc::invalid),
656+
"register_host_memory: size must not be zero.");
657+
checkRegisterHostMemorySupport(Ctxt);
658+
659+
ur_exp_usm_host_alloc_register_properties_t Props = {
660+
UR_STRUCTURE_TYPE_EXP_USM_HOST_ALLOC_REGISTER_PROPERTIES,
661+
/*pNext=*/nullptr,
662+
/*flags=*/0};
663+
if (Flags & register_host_memory_flag_read_only)
664+
Props.flags |= UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_READ_ONLY;
665+
666+
auto [urCtx, Adapter] = get_ur_handles(Ctxt);
667+
ur_result_t Err =
668+
Adapter->call_nocheck<detail::UrApiKind::urUSMHostAllocRegisterExp>(
669+
urCtx, Ptr, NumBytes, &Props);
670+
if (Err != UR_RESULT_SUCCESS)
671+
throwRegisterHostMemoryError(Err, "register_host_memory failed.");
672+
}
673+
674+
void unregister_host_memory(void *Ptr, const context &Ctxt) {
675+
if (Ptr == nullptr)
676+
throw sycl::exception(make_error_code(errc::invalid),
677+
"unregister_host_memory: pointer must not be null.");
678+
checkRegisterHostMemorySupport(Ctxt);
679+
680+
auto [urCtx, Adapter] = get_ur_handles(Ctxt);
681+
ur_result_t Err =
682+
Adapter->call_nocheck<detail::UrApiKind::urUSMHostAllocUnregisterExp>(
683+
urCtx, Ptr);
684+
if (Err != UR_RESULT_SUCCESS)
685+
throwRegisterHostMemoryError(Err, "unregister_host_memory failed.");
686+
}
687+
688+
} // namespace detail
689+
612690
void *malloc_device(size_t numBytes, const device &syclDevice,
613691
const property_list &propList) {
614692
sycl::context ctxt = syclDevice.get_platform().khr_get_default_context();

sycl/source/feature_test.hpp.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ inline namespace _V1 {
7777
#define SYCL_EXT_ONEAPI_KERNEL_ARG_PROPERTIES 1
7878
#define SYCL_EXT_ONEAPI_ANNOTATED_PTR 1
7979
#define SYCL_EXT_ONEAPI_COPY_OPTIMIZE 1
80+
#define SYCL_EXT_ONEAPI_REGISTER_HOST_MEMORY 1
8081
#define SYCL_EXT_ONEAPI_VIRTUAL_MEM 1
8182
#define SYCL_EXT_ONEAPI_USM_MALLOC_PROPERTIES 1
8283
#define SYCL_EXT_INTEL_CACHE_CONFIG 1

0 commit comments

Comments
 (0)