Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/workflows/sycl-linux-run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,18 @@ jobs:
cat /usr/local/lib/igc/IGCTAG.txt
fi

# Best-effort reservation of a few HugeTLB pages so the explicit huge-page
# path of the register_host_memory E2E test is exercised rather than being
# UNSUPPORTED. Failing to set it just means the huge-page test is UNSUPPORTED
# there, so never fail the job.
- name: Reserve HugeTLB pages
if: inputs.tests_selector == 'e2e'
continue-on-error: true
run: |
grep -i Huge /proc/meminfo
echo 2 | sudo tee /proc/sys/vm/nr_hugepages || true
grep -i Huge /proc/meminfo

- name: Run E2E Tests
if: inputs.tests_selector == 'e2e'
uses: ./devops/actions/run-tests/linux/e2e
Expand Down
29 changes: 29 additions & 0 deletions sycl/test-e2e/USM/Inputs/register_host_memory_helpers.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Shared helpers for the sycl_ext_oneapi_register_host_memory end-to-end tests.

#pragma once

#include <cstddef>

#if defined(_WIN32)
#include <windows.h>
#else
#include <unistd.h>
#endif

// Returns the host page size. The extension requires registered ranges to be
// aligned to (and a multiple of) the host page size.
inline size_t getHostPageSize() {
#if defined(_WIN32)
SYSTEM_INFO Info;
GetSystemInfo(&Info);
return static_cast<size_t>(Info.dwPageSize);
#else
return static_cast<size_t>(sysconf(_SC_PAGESIZE));
#endif
}

// Rounds NumBytes up to a whole number of host pages, as the extension requires
// the registered size to be a multiple of the host page size.
inline size_t roundUpToPage(size_t NumBytes, size_t PageSize) {
return (NumBytes + PageSize - 1) & ~(PageSize - 1);
}
18 changes: 3 additions & 15 deletions sycl/test-e2e/USM/register_host_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
// code (device writes to a read_only range are undefined behavior and are
// therefore not exercised).

#include "Inputs/register_host_memory_helpers.hpp"

#include <sycl/detail/core.hpp>
#include <sycl/ext/oneapi/experimental/register_host_memory.hpp>
#include <sycl/usm.hpp>
Expand All @@ -24,23 +26,10 @@

#if defined(_WIN32)
#include <malloc.h>
#include <windows.h>
#else
#include <unistd.h>
#endif

namespace syclexp = sycl::ext::oneapi::experimental;

static size_t getHostPageSize() {
#if defined(_WIN32)
SYSTEM_INFO Info;
GetSystemInfo(&Info);
return static_cast<size_t>(Info.dwPageSize);
#else
return static_cast<size_t>(sysconf(_SC_PAGESIZE));
#endif
}

static void *allocatePageAligned(size_t Alignment, size_t Size) {
#if defined(_WIN32)
return _aligned_malloc(Size, Alignment);
Expand All @@ -64,8 +53,7 @@ int main() {
const size_t PageSize = getHostPageSize();
const size_t NumElems = 1024;
// Round the byte size up to a multiple of the page size as required.
size_t NumBytes = NumElems * sizeof(int);
NumBytes = (NumBytes + PageSize - 1) & ~(PageSize - 1);
const size_t NumBytes = roundUpToPage(NumElems * sizeof(int), PageSize);

int *Data = static_cast<int *>(allocatePageAligned(PageSize, NumBytes));
assert(Data != nullptr && "host allocation failed");
Expand Down
129 changes: 129 additions & 0 deletions sycl/test-e2e/USM/register_host_memory_huge_pages.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
// REQUIRES: aspect-ext_oneapi_register_host_memory
// REQUIRES: level_zero_v2_adapter
// REQUIRES: linux
// REQUIRES: hugepages

// RUN: %{build} -o %t.out
// RUN: %{run} %t.out | FileCheck %s

// End-to-end test for sycl_ext_oneapi_register_host_memory with huge-page
// backed host memory. Two huge-page acquisition paths are exercised:
// - explicit huge pages via mmap(MAP_HUGETLB), and
// - transparent huge pages via mmap + madvise(MADV_HUGEPAGE).
//
// Explicit huge pages require the OS to have huge pages reserved (e.g. via
// /proc/sys/vm/nr_hugepages). The test therefore REQUIRES the "hugepages"
// feature (free HugeTLB pages present) so it is UNSUPPORTED rather than
// silently skipped where none are available. A huge page is itself a multiple
// of the host base page size, so a huge-page-aligned range trivially satisfies
// the extension's page-alignment and size requirements.

#include "Inputs/register_host_memory_helpers.hpp"

#include <sycl/detail/core.hpp>
#include <sycl/ext/oneapi/experimental/register_host_memory.hpp>
#include <sycl/usm.hpp>

#include <cassert>
#include <cstdio>
#include <vector>

#include <sys/mman.h>
#include <unistd.h>

// Older glibc headers may not define MAP_HUGETLB; define it to its well-known
// value so the test still builds. The mmap call will simply fail at runtime if
// the running kernel does not support the flag, and that path is skipped.
#ifndef MAP_HUGETLB
#define MAP_HUGETLB 0x40000
#endif

namespace syclexp = sycl::ext::oneapi::experimental;

// Default huge page size on x86-64 Linux. Used to size and align the explicit
// MAP_HUGETLB mapping.
static constexpr size_t HugePageSize = 2 * 1024 * 1024; // 2 MiB

// Runs device + copy exercises over a registered range and verifies results.
// Reused by both huge-page paths.
static void exerciseRegisteredRange(sycl::queue &Q, sycl::context &Ctxt,
int *Data, size_t NumElems) {
syclexp::register_host_memory(Data, NumElems * sizeof(int), Ctxt);

// The pointer behaves like a USM host allocation while registered.
assert(sycl::get_pointer_type(Data, Ctxt) == sycl::usm::alloc::host);
// Interior pointers are reported as host allocations too.
assert(sycl::get_pointer_type(Data + NumElems / 2, Ctxt) ==
sycl::usm::alloc::host);

// Use the registered pointer directly from device code.
Q.parallel_for(NumElems, [=](sycl::id<1> I) {
Data[I] = static_cast<int>(I.get(0)) + 11;
}).wait();
for (size_t I = 0; I < NumElems; ++I)
assert(Data[I] == static_cast<int>(I) + 11);

// Explicit copy out of the registered range.
std::vector<int> HostDst(NumElems, 0);
Q.memcpy(HostDst.data(), Data, NumElems * sizeof(int)).wait();
for (size_t I = 0; I < NumElems; ++I)
assert(HostDst[I] == static_cast<int>(I) + 11);

syclexp::unregister_host_memory(Data, Ctxt);
}

// Path 1: explicit huge pages via MAP_HUGETLB. The "hugepages" REQUIRES feature
// guarantees free HugeTLB pages exist, so the mapping is expected to succeed.
static void testExplicitHugePages(sycl::queue &Q, sycl::context &Ctxt) {
// One whole huge page worth of memory.
const size_t NumBytes = HugePageSize;
const size_t NumElems = NumBytes / sizeof(int);

void *Map = mmap(nullptr, NumBytes, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
assert(Map != MAP_FAILED && "explicit MAP_HUGETLB mmap failed");
int *Data = static_cast<int *>(Map);

exerciseRegisteredRange(Q, Ctxt, Data, NumElems);

assert(munmap(Map, NumBytes) == 0 && "munmap failed");
}

// Path 2: transparent huge pages. madvise(MADV_HUGEPAGE) is a best-effort hint
// and does not change the mapping's address or size, so registration of the
// (base-page-aligned) range is valid regardless of whether the kernel actually
// backs it with a transparent huge page.
static void testTransparentHugePages(sycl::queue &Q, sycl::context &Ctxt) {
const size_t PageSize = getHostPageSize();
// Request a 2 MiB region, aligned to the huge page size so the kernel can
// promote it to a transparent huge page.
const size_t NumBytes = HugePageSize;
const size_t NumElems = NumBytes / sizeof(int);

void *Map = mmap(nullptr, NumBytes, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
assert(Map != MAP_FAILED && "anonymous mmap failed");
assert((reinterpret_cast<uintptr_t>(Map) & (PageSize - 1)) == 0 &&
"mmap result is not page aligned");

// Best-effort hint; ignore the result. Even if unsupported, the plain
// anonymous mapping below is still valid registrable host memory.
(void)madvise(Map, NumBytes, MADV_HUGEPAGE);

int *Data = static_cast<int *>(Map);
exerciseRegisteredRange(Q, Ctxt, Data, NumElems);

assert(munmap(Map, NumBytes) == 0 && "munmap failed");
}

int main() {
sycl::queue Q;
sycl::context Ctxt = Q.get_context();

testExplicitHugePages(Q, Ctxt);
testTransparentHugePages(Q, Ctxt);

// CHECK: Done (explicit and transparent huge pages tested).
printf("Done (explicit and transparent huge pages tested).\n");
return 0;
}
60 changes: 60 additions & 0 deletions sycl/test-e2e/USM/register_host_memory_mmap.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// REQUIRES: aspect-ext_oneapi_register_host_memory
// REQUIRES: level_zero_v2_adapter
// REQUIRES: linux

// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

// End-to-end test for sycl_ext_oneapi_register_host_memory with host memory
// obtained from an anonymous private read/write mmap mapping
// (MAP_PRIVATE | MAP_ANONYMOUS). mmap returns page-aligned memory, so the
// registration's page-alignment requirement is satisfied naturally. The
// registered pointer is used directly in device code and read back through an
// explicit copy.

#include "Inputs/register_host_memory_helpers.hpp"

#include <sycl/detail/core.hpp>
#include <sycl/ext/oneapi/experimental/register_host_memory.hpp>
#include <sycl/usm.hpp>

#include <cassert>
#include <vector>

#include <sys/mman.h>
#include <unistd.h>

namespace syclexp = sycl::ext::oneapi::experimental;

int main() {
sycl::queue Q;
sycl::context Ctxt = Q.get_context();
const size_t PageSize = getHostPageSize();

const size_t NumElems = 1024;
const size_t NumBytes = roundUpToPage(NumElems * sizeof(int), PageSize);

void *Map = mmap(nullptr, NumBytes, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
assert(Map != MAP_FAILED && "anonymous mmap failed");
int *Data = static_cast<int *>(Map);

syclexp::register_host_memory(Data, NumBytes, Ctxt);

// The pointer behaves like a USM host allocation while registered.
assert(sycl::get_pointer_type(Data, Ctxt) == sycl::usm::alloc::host);

Q.parallel_for(NumElems, [=](sycl::id<1> I) {
Data[I] = static_cast<int>(I.get(0)) + 1;
}).wait();

std::vector<int> HostDst(NumElems, 0);
Q.memcpy(HostDst.data(), Data, NumElems * sizeof(int)).wait();
for (size_t I = 0; I < NumElems; ++I)
assert(HostDst[I] == static_cast<int>(I) + 1);

syclexp::unregister_host_memory(Data, Ctxt);
assert(munmap(Map, NumBytes) == 0 && "munmap failed");

return 0;
}
73 changes: 73 additions & 0 deletions sycl/test-e2e/USM/register_host_memory_mmap_file_backed.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// REQUIRES: aspect-ext_oneapi_register_host_memory
// REQUIRES: level_zero_v2_adapter
// REQUIRES: linux

// RUN: %{build} -o %t.out
// RUN: %{run} %t.out %t

// End-to-end test for sycl_ext_oneapi_register_host_memory with host memory
// obtained from a file-backed shared mmap mapping (MAP_SHARED) over a temporary
// file. mmap returns page-aligned memory, so the registration's page-alignment
// requirement is satisfied naturally. The temporary file is created next to
// TmpPrefix (a path within the test's output directory) rather than a global
// location such as /tmp.

#include "Inputs/register_host_memory_helpers.hpp"

#include <sycl/detail/core.hpp>
#include <sycl/ext/oneapi/experimental/register_host_memory.hpp>
#include <sycl/usm.hpp>

#include <cassert>
#include <string>
#include <vector>

#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>

namespace syclexp = sycl::ext::oneapi::experimental;

int main(int argc, char **argv) {
// A path prefix inside the test's output directory is passed as the first
// argument and is used for the temporary file.
std::string TmpPrefix = argc > 1 ? argv[1] : "reghostmem";

sycl::queue Q;
sycl::context Ctxt = Q.get_context();
const size_t PageSize = getHostPageSize();

const size_t NumElems = 1024;
const size_t NumBytes = roundUpToPage(NumElems * sizeof(int), PageSize);

std::string Tmpl = TmpPrefix + "_reghostmem_XXXXXX";
int Fd = mkstemp(Tmpl.data());
assert(Fd >= 0 && "mkstemp failed");
// Unlink immediately; the open fd keeps the file alive until close.
unlink(Tmpl.c_str());
assert(ftruncate(Fd, static_cast<off_t>(NumBytes)) == 0 &&
"ftruncate failed");

void *Map =
mmap(nullptr, NumBytes, PROT_READ | PROT_WRITE, MAP_SHARED, Fd, 0);
assert(Map != MAP_FAILED && "file-backed mmap failed");
int *Data = static_cast<int *>(Map);

syclexp::register_host_memory(Data, NumBytes, Ctxt);
assert(sycl::get_pointer_type(Data, Ctxt) == sycl::usm::alloc::host);

std::vector<int> HostSrc(NumElems);
for (size_t I = 0; I < NumElems; ++I)
HostSrc[I] = static_cast<int>(I) - 5;
Q.memcpy(Data, HostSrc.data(), NumElems * sizeof(int)).wait();

Q.parallel_for(NumElems, [=](sycl::id<1> I) { Data[I] *= 2; }).wait();
for (size_t I = 0; I < NumElems; ++I)
assert(Data[I] == (static_cast<int>(I) - 5) * 2);

syclexp::unregister_host_memory(Data, Ctxt);
assert(munmap(Map, NumBytes) == 0 && "munmap failed");
close(Fd);

return 0;
}
Loading
Loading