Skip to content

Commit 40c4d21

Browse files
committed
[libc] Efficiently implement 'realloc' for AMDGPU devices
Summary: Now that we have `malloc` we can implement `realloc` efficiently. This uses the known chunk sizes to avoid unnecessary allocations. We just return nullptr for NVPTX. I'd remove the list for the entrypoint but then the libc++ code would stop working. When someone writes the NVPTX support this will be trivial.
1 parent 48e8937 commit 40c4d21

File tree

5 files changed

+89
-11
lines changed

5 files changed

+89
-11
lines changed

libc/src/__support/GPU/allocator.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "src/__support/GPU/utils.h"
2323
#include "src/__support/RPC/rpc_client.h"
2424
#include "src/__support/threads/sleep.h"
25+
#include "src/string/memory_utils/inline_memcpy.h"
2526

2627
namespace LIBC_NAMESPACE_DECL {
2728

@@ -550,5 +551,26 @@ void deallocate(void *ptr) {
550551
release_slab(slab);
551552
}
552553

554+
void *reallocate(void *ptr, uint64_t size) {
555+
if (ptr == nullptr)
556+
return gpu::allocate(size);
557+
558+
// Non-slab allocations are considered foreign pointers so we fail.
559+
if ((reinterpret_cast<uintptr_t>(ptr) & SLAB_ALIGNMENT) == 0)
560+
return nullptr;
561+
562+
// The original slab pointer is the 2MiB boundary using the given pointer.
563+
Slab *slab = reinterpret_cast<Slab *>(
564+
(reinterpret_cast<uintptr_t>(ptr) & ~SLAB_ALIGNMENT));
565+
if (slab->get_chunk_size() >= size)
566+
return ptr;
567+
568+
// If we need a new chunk we reallocate and copy it over.
569+
void *new_ptr = gpu::allocate(size);
570+
inline_memcpy(new_ptr, ptr, slab->get_chunk_size());
571+
gpu::deallocate(ptr);
572+
return new_ptr;
573+
}
574+
553575
} // namespace gpu
554576
} // namespace LIBC_NAMESPACE_DECL

libc/src/__support/GPU/allocator.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ namespace gpu {
1717

1818
void *allocate(uint64_t size);
1919
void deallocate(void *ptr);
20+
void *reallocate(void *ptr, uint64_t size);
2021

2122
} // namespace gpu
2223
} // namespace LIBC_NAMESPACE_DECL

libc/src/stdlib/gpu/realloc.cpp

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,13 @@
1616
namespace LIBC_NAMESPACE_DECL {
1717

1818
LLVM_LIBC_FUNCTION(void *, realloc, (void *ptr, size_t size)) {
19-
if (ptr == nullptr)
20-
return gpu::allocate(size);
21-
22-
void *newmem = gpu::allocate(size);
23-
if (newmem == nullptr)
24-
return nullptr;
25-
26-
// This will copy garbage if it goes beyond the old allocation size.
27-
inline_memcpy(newmem, ptr, size);
28-
gpu::deallocate(ptr);
29-
return newmem;
19+
// FIXME: NVIDIA targets currently use the built-in 'malloc' which we cannot
20+
// reason with. But we still need to provide this function for compatibility.
21+
#ifndef LIBC_TARGET_ARCH_IS_NVPTX
22+
return gpu::reallocate(ptr, size);
23+
#else
24+
return nullptr;
25+
#endif
3026
}
3127

3228
} // namespace LIBC_NAMESPACE_DECL

libc/test/integration/src/stdlib/gpu/CMakeLists.txt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,21 @@ if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
1717
--blocks 1024
1818
)
1919

20+
add_integration_test(
21+
realloc
22+
SUITE
23+
stdlib-gpu-integration-tests
24+
SRCS
25+
realloc.cpp
26+
DEPENDS
27+
libc.src.stdlib.malloc
28+
libc.src.stdlib.free
29+
libc.src.stdlib.realloc
30+
LOADER_ARGS
31+
--threads 256
32+
--blocks 1024
33+
)
34+
2035
add_integration_test(
2136
malloc_stress
2237
SUITE
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#include "test/IntegrationTest/test.h"
2+
3+
#include "src/__support/GPU/utils.h"
4+
#include "src/stdlib/free.h"
5+
#include "src/stdlib/malloc.h"
6+
#include "src/stdlib/realloc.h"
7+
8+
using namespace LIBC_NAMESPACE;
9+
10+
TEST_MAIN(int, char **, char **) {
11+
// realloc(nullptr, size) is equivalent to malloc.
12+
int *alloc = reinterpret_cast<int *>(LIBC_NAMESPACE::realloc(nullptr, 32));
13+
EXPECT_NE(alloc, nullptr);
14+
*alloc = 42;
15+
EXPECT_EQ(*alloc, 42);
16+
17+
// realloc to same size returns the same pointer.
18+
void *same = LIBC_NAMESPACE::realloc(alloc, 32);
19+
EXPECT_NE(same, nullptr);
20+
EXPECT_EQ(reinterpret_cast<int *>(same)[0], 42);
21+
22+
// realloc to smaller size returns same pointer.
23+
void *smaller = LIBC_NAMESPACE::realloc(same, 16);
24+
EXPECT_NE(smaller, nullptr);
25+
EXPECT_EQ(reinterpret_cast<int *>(smaller)[0], 42);
26+
27+
// realloc to larger size returns new pointer and preserves contents.
28+
int *larger = reinterpret_cast<int *>(LIBC_NAMESPACE::realloc(smaller, 128));
29+
EXPECT_NE(larger, nullptr);
30+
EXPECT_EQ(larger[0], 42);
31+
32+
// realloc works when called with a divergent size.
33+
int *div = reinterpret_cast<int *>(
34+
LIBC_NAMESPACE::malloc((gpu::get_thread_id() + 1) * 16));
35+
EXPECT_NE(div, nullptr);
36+
div[0] = static_cast<int>(gpu::get_thread_id());
37+
int *div_realloc = reinterpret_cast<int *>(
38+
LIBC_NAMESPACE::realloc(div, ((gpu::get_thread_id() + 1) * 32)));
39+
EXPECT_NE(div_realloc, nullptr);
40+
EXPECT_EQ(div_realloc[0], static_cast<int>(gpu::get_thread_id()));
41+
LIBC_NAMESPACE::free(div_realloc);
42+
43+
return 0;
44+
}

0 commit comments

Comments
 (0)