From 1375385916ab0268feaa22faac167bcc98b56353 Mon Sep 17 00:00:00 2001 From: Thomas Spader Date: Tue, 9 Jun 2026 16:42:01 -0400 Subject: [PATCH] mem: heap allocator Implements a reasonably simple heap allocator. Small (less than ~2KB) allocations are rounded up to a fixed size bucket. These allocations are satisfied from 4KB pages of memory which is chunked by this bucket size. These chunks are linked, using an intrusive pointer in the chunk itself, to form a free list. In addition to the granular free list inside a span, the heap stores spans themselves in a free list. On initialization, it asks the OS for 64KB of memory, splits it into 4KB spans, and links them. When a bucket doesn't have any more memory to serve a request, it claims a span. When a span becomes empty, it is returned to the free list. It is not eagerly returned to the OS; only when the heap itself is destroyed. This means that the footprint of the heap is equal to its high watermark. In return for this tradeoff, the allocator's overhead is strictly a function of (a) this watermark at any given time and (b) the number of allocations and their sizes. Compare this to malloc(), in which the *order* of allocation also gives different fragmentation patterns. I also added some small things: - I pulled in a small fork of ubench.h that I've been working on, which is not in a releasable state but is usable - I ported the existing sp_glob.h benchmark - I wrote some benchmarks for the heap allocator to ballpark it against glibc, but ended up not using the framework - I wrote a small table renderer for the benchmarks - WIP: I moved the existing hash table benchmark to WIP - Unrelated: removed SP_MEM_ARENA_BLOCK_SIZE - Unrelated: reordered the mem section of the header so that the docs for the heap allocator (and by extension other structs) could be colocated with its and only its header - Remove: sp_mem_os_alloc_zero(), vestigial --- Makefile | 12 +- sp.h | 512 ++++++-- sp/sp_prompt.h | 2 +- test/bench/glob.c | 226 ++-- test/bench/heap.c | 557 +++++++++ test/bench/ubench.h | 1876 ++++++++++++++++++++++++++++ test/mem.c | 1 + test/mem/heap.c | 696 +++++++++++ test/mem/mem.h | 2 +- test/tools/table.h | 182 +++ {test => tools/wip}/bench/ht.c | 0 {test => tools/wip}/bench/stb_ds.h | 0 12 files changed, 3870 insertions(+), 196 deletions(-) create mode 100644 test/bench/heap.c create mode 100644 test/bench/ubench.h create mode 100644 test/mem/heap.c create mode 100644 test/tools/table.h rename {test => tools/wip}/bench/ht.c (100%) rename {test => tools/wip}/bench/stb_ds.h (100%) diff --git a/Makefile b/Makefile index 4582377..d9b4d8e 100644 --- a/Makefile +++ b/Makefile @@ -42,8 +42,10 @@ endif CFLAGS = $(CFLAGS_LANG) -g -Werror=return-type -fsanitize=undefined,alignment -fno-sanitize-recover=all $(CFLAGS_PLATFORM) CFLAGS_TEST = -DSP_IMPLEMENTATION -DSP_TEST_IMPLEMENTATION -I. -Itest/tools -Itest +CFLAGS_BENCH = $(CFLAGS_LANG) -g -Werror=return-type -O2 -DSP_IMPLEMENTATION -DUBENCH_ENABLE_PERF_COUNTERS -I. -Itest/bench -Itest/tools TESTS = amalg app array asset etc cv env format fmon fs glob ht io math process ps rb str thread time mem prompt leak +BENCHES = glob heap EXAMPLES = app array format hash_table io zero_copy ls palette prompt prompt_fancy signal wc TRIPLES = \ x86_64-linux-none x86_64-linux-gnu x86_64-linux-musl \ @@ -54,16 +56,19 @@ TRIPLES = \ TEST_DIR = $(BUILD_DIR)/test EXAMPLE_DIR = $(BUILD_DIR)/example +BENCH_DIR = $(BUILD_DIR)/bench TEST_BINARIES = $(addsuffix $(EXE),$(addprefix $(TEST_DIR)/,$(TESTS))) EXAMPLE_BINARIES = $(addsuffix $(EXE),$(addprefix $(EXAMPLE_DIR)/,$(EXAMPLES))) +BENCH_BINARIES = $(addsuffix $(EXE),$(addprefix $(BENCH_DIR)/,$(BENCHES))) SP_HEADERS = sp.h $(wildcard sp/*.h) TEST_SOURCES = $(wildcard test/*/*.c) $(wildcard test/*/*.h) $(wildcard test/*/*/*.c) $(wildcard test/*/*/*.h) -.PHONY: all clean tests examples smoke big c cpp gcc tcc $(TRIPLES) +.PHONY: all clean tests examples bench smoke big c cpp gcc tcc $(TRIPLES) all: examples tests tests: $(TEST_BINARIES) examples: $(EXAMPLE_BINARIES) +bench: $(BENCH_BINARIES) $(EXAMPLE_DIR)/%$(EXE): example/%.c $(SP_HEADERS) | $(EXAMPLE_DIR) $(CC) $(CFLAGS) -I. -o $@ $< @@ -71,6 +76,9 @@ $(EXAMPLE_DIR)/%$(EXE): example/%.c $(SP_HEADERS) | $(EXAMPLE_DIR) $(TEST_DIR)/%$(EXE): test/%.c $(SP_HEADERS) $(TEST_SOURCES) | $(TEST_DIR) $(CC) $(CFLAGS) $(CFLAGS_TEST) -o $@ $< +$(BENCH_DIR)/%$(EXE): test/bench/%.c $(SP_HEADERS) test/bench/ubench.h test/tools/table.h | $(BENCH_DIR) + $(CC) $(CFLAGS_BENCH) -o $@ $< + $(TRIPLES): +$(MAKE) TRIPLE=$@ examples tests @@ -83,7 +91,7 @@ wasm: +$(MAKE) wasm32-wasi wasm32-freestanding +$(MAKE) MODE=cpp wasm32-wasi wasm32-freestanding -$(BUILD_DIR) $(EXAMPLE_DIR) $(TEST_DIR): +$(BUILD_DIR) $(EXAMPLE_DIR) $(TEST_DIR) $(BENCH_DIR): mkdir -p $@ clean: diff --git a/sp.h b/sp.h index b5bb726..f20f908 100644 --- a/sp.h +++ b/sp.h @@ -67,7 +67,6 @@ sp_utf8 encode, decode, validation, iteration SP_RT_NUM_SPIN_LOCK - SP_MEM_ARENA_BLOCK_SIZE SP_PS_MAX_ARGS SP_PS_MAX_ENV @@ -498,9 +497,13 @@ #define SP_MEM_ALIGNMENT 16 #define SP_ALIGNED SP_ALIGN(SP_MEM_ALIGNMENT) -#define sp_align_up(ptr, align) ((void*)(((uintptr_t)(ptr) + ((uintptr_t)(align) - 1)) & ~((uintptr_t)(align) - 1))) #define sp_align_offset(val, align) ((((val) + ((u64)(align) - 1)) & ~((u64)(align) - 1))) +#define sp_uptr(ptr) ((uintptr_t)(ptr)) +#define sp_align_mask(align) (sp_uptr(align) - 1) +#define sp_align_down(ptr, align) (sp_uptr(ptr) & ~sp_align_mask(align)) +#define sp_align_up(ptr, align) sp_align_down(sp_uptr(ptr) + sp_align_mask(align), align) + #define sp_try(expr) \ do { \ sp_err_t _sp_result = (expr); \ @@ -1427,17 +1430,6 @@ typedef s32 (*sp_entry_fn_t)(s32, const c8**); #endif -// ██████████ ███████████ ███████████ ███████ ███████████ -// ░░███░░░░░█░░███░░░░░███ ░░███░░░░░███ ███░░░░░███ ░░███░░░░░███ -// ░███ █ ░ ░███ ░███ ░███ ░███ ███ ░░███ ░███ ░███ -// ░██████ ░██████████ ░██████████ ░███ ░███ ░██████████ -// ░███░░█ ░███░░░░░███ ░███░░░░░███ ░███ ░███ ░███░░░░░███ -// ░███ ░ █ ░███ ░███ ░███ ░███ ░░███ ███ ░███ ░███ -// ██████████ █████ █████ █████ █████ ░░░███████░ █████ █████ -// ░░░░░░░░░░ ░░░░░ ░░░░░ ░░░░░ ░░░░░ ░░░░░░░ ░░░░░ ░░░░░ -// @error - - // ██████ ██████ ██████████ ██████ ██████ ███████ ███████████ █████ █████ // ░░██████ ██████ ░░███░░░░░█░░██████ ██████ ███░░░░░███ ░░███░░░░░███ ░░███ ░░███ // ░███░█████░███ ░███ █ ░ ░███░█████░███ ███ ░░███ ░███ ░███ ░░███ ███ @@ -1447,10 +1439,10 @@ typedef s32 (*sp_entry_fn_t)(s32, const c8**); // █████ █████ ██████████ █████ █████ ░░░███████░ █████ █████ █████ // ░░░░░ ░░░░░ ░░░░░░░░░░ ░░░░░ ░░░░░ ░░░░░░░ ░░░░░ ░░░░░ ░░░░░ // @memory -#ifndef SP_MEM_ARENA_BLOCK_SIZE - #define SP_MEM_ARENA_BLOCK_SIZE 4096 -#endif +/////////////// +// ALLOCATOR // +/////////////// typedef enum { SP_ALLOCATOR_MODE_ALLOC, SP_ALLOCATOR_MODE_FREE, @@ -1468,6 +1460,66 @@ typedef struct sp_allocator_t { void* user_data; } sp_mem_t; +SP_API void* sp_mem_allocator_alloc(sp_mem_t arena, u64 size); +SP_API void* sp_mem_allocator_realloc(sp_mem_t arena, void* ptr, u64 size); +SP_API void sp_mem_allocator_free(sp_mem_t arena, void* buffer); +SP_API void* sp_alloc(sp_mem_t mem, u64 size); +SP_API void* sp_realloc(sp_mem_t mem, void* memory, u64 size); +SP_API void sp_free(sp_mem_t mem, void* memory); +SP_API sp_mem_t sp_mem_get_scratch(); + +////////// +// CORE // +////////// +SP_API void sp_mem_copy(void* dest, const void* source, u64 num_bytes); +SP_API void sp_mem_move(void* dest, const void* source, u64 num_bytes); +SP_API bool sp_mem_is_equal(const void* a, const void* b, u64 len); +SP_API void sp_mem_fill(void* buffer, u64 bsize, void* fill, u64 fsize); +SP_API void sp_mem_fill_u8(void* buffer, u64 buffer_size, u8 fill); +SP_API void sp_mem_zero(void* buffer, u64 buffer_size); +#define sp_sys_alloc_n(T, n) (T*)sp_sys_alloc((n) * sizeof(T)) +#define sp_sys_alloc_type(T) sp_sys_alloc_n(T, 1) +#define sp_mem_allocator_alloc_n(a, T, n) (T*)sp_mem_allocator_alloc(a, (n) * sizeof(T)) +#define sp_mem_allocator_alloc_type(a, T) sp_mem_allocator_alloc_n(a, T, 1) +#define sp_mem_arena_alloc_n(a, T, n) (T*)sp_mem_arena_alloc((a), (n) * sizeof(T)) +#define sp_mem_arena_alloc_type(a, T) sp_mem_arena_alloc_n(a, T, 1) +#define sp_alloc_n(a, T, n) (T*)sp_alloc(a, (n) * sizeof(T)) +#define sp_alloc_type(a, T) sp_alloc_n(a, T, 1) + +///////////////////// +// FIXED ALLOCATOR // +///////////////////// +typedef struct { + u8* buffer; + u64 capacity; + u64 bytes_used; + u8 alignment; +} sp_mem_fixed_t; + +SP_API sp_mem_fixed_t sp_mem_fixed(void* buffer, u64 capacity); +SP_API sp_mem_fixed_t sp_mem_fixed_ex(void* buffer, u64 capacity, u8 alignment); +SP_API sp_mem_t sp_mem_fixed_as_allocator(sp_mem_fixed_t* fixed); +SP_API void sp_mem_fixed_clear(sp_mem_fixed_t* fixed); +SP_API u64 sp_mem_fixed_bytes_used(sp_mem_fixed_t* fixed); +SP_API void* sp_mem_fixed_on_alloc(void* ud, sp_mem_alloc_mode_t mode, u64 size, void* old); + +//////////////////// +// PAGE ALLOCATOR // +//////////////////// +typedef struct SP_ALIGNED { + u64 size; +} sp_mem_os_header_t; + +SP_API void* sp_mem_os_alloc(u64 size); +SP_API void* sp_mem_os_realloc(void* ptr, u64 size); +SP_API void sp_mem_os_free(void* ptr); +SP_API void* sp_mem_os_on_alloc(void* ud, sp_mem_alloc_mode_t mode, u64 size, void* ptr); +SP_API sp_mem_os_header_t* sp_mem_os_get_header(void* ptr); +SP_API sp_mem_t sp_mem_os_new(); + +///////////////////// +// ARENA ALLOCATOR // +///////////////////// typedef enum { SP_MEM_ARENA_MODE_DEFAULT, SP_MEM_ARENA_MODE_NO_REALLOC, @@ -1489,10 +1541,6 @@ typedef struct { u8 alignment; } sp_mem_arena_t; -typedef struct SP_ALIGNED { - u64 size; -} sp_mem_os_header_t; - typedef struct { sp_mem_arena_t* arena; sp_mem_arena_block_t* block; @@ -1500,35 +1548,9 @@ typedef struct { sp_mem_t mem; } sp_mem_arena_marker_t; -typedef struct { - u8* buffer; - u64 capacity; - u64 bytes_used; - u8 alignment; -} sp_mem_fixed_t; - -SP_API void sp_mem_copy(void* dest, const void* source, u64 num_bytes); -SP_API void sp_mem_move(void* dest, const void* source, u64 num_bytes); -SP_API bool sp_mem_is_equal(const void* a, const void* b, u64 len); -SP_API void sp_mem_fill(void* buffer, u64 bsize, void* fill, u64 fsize); -SP_API void sp_mem_fill_u8(void* buffer, u64 buffer_size, u8 fill); -SP_API void sp_mem_zero(void* buffer, u64 buffer_size); -SP_API void* sp_mem_allocator_alloc(sp_mem_t arena, u64 size); -SP_API void* sp_mem_allocator_realloc(sp_mem_t arena, void* ptr, u64 size); -SP_API void sp_mem_allocator_free(sp_mem_t arena, void* buffer); -SP_API void* sp_mem_os_alloc(u64 size); -SP_API void* sp_mem_os_alloc_zero(u64 size); -SP_API void* sp_mem_os_realloc(void* ptr, u64 size); -SP_API void sp_mem_os_free(void* ptr); -SP_API void* sp_mem_os_on_alloc(void* ud, sp_mem_alloc_mode_t mode, u64 size, void* ptr); -SP_API sp_mem_os_header_t* sp_mem_os_get_header(void* ptr); -SP_API sp_mem_t sp_mem_os_new(); -SP_API sp_mem_t sp_mem_arena_as_allocator(sp_mem_arena_t* arena); -SP_API void* sp_alloc(sp_mem_t mem, u64 size); -SP_API void* sp_realloc(sp_mem_t mem, void* memory, u64 size); -SP_API void sp_free(sp_mem_t mem, void* memory); SP_API sp_mem_arena_t* sp_mem_arena_new(sp_mem_t mem); SP_API sp_mem_arena_t* sp_mem_arena_new_ex(sp_mem_t mem, u64 block_size, sp_mem_arena_mode_t mode, u8 alignment); +SP_API sp_mem_t sp_mem_arena_as_allocator(sp_mem_arena_t* arena); SP_API void sp_mem_arena_clear(sp_mem_arena_t* arena); SP_API void sp_mem_arena_destroy(sp_mem_arena_t* arena); SP_API void* sp_mem_arena_on_alloc(void* ptr, sp_mem_alloc_mode_t mode, u64 n, void* old); @@ -1539,29 +1561,129 @@ SP_API u64 sp_mem_arena_bytes_used(sp_mem_arena_t* arena); SP_API void* sp_mem_arena_alloc(sp_mem_arena_t* arena, u64 size); SP_API void* sp_mem_arena_realloc(sp_mem_arena_t* arena, void* ptr, u64 size); SP_API void sp_mem_arena_free(sp_mem_arena_t* arena, void* ptr); -SP_API sp_mem_fixed_t sp_mem_fixed(void* buffer, u64 capacity); -SP_API sp_mem_fixed_t sp_mem_fixed_ex(void* buffer, u64 capacity, u8 alignment); -SP_API sp_mem_t sp_mem_fixed_as_allocator(sp_mem_fixed_t* fixed); -SP_API void sp_mem_fixed_clear(sp_mem_fixed_t* fixed); -SP_API u64 sp_mem_fixed_bytes_used(sp_mem_fixed_t* fixed); -SP_API void* sp_mem_fixed_on_alloc(void* ud, sp_mem_alloc_mode_t mode, u64 size, void* old); -SP_API sp_mem_t sp_mem_get_scratch(); SP_API sp_mem_arena_t* sp_mem_get_scratch_arena(); SP_API sp_mem_arena_t* sp_mem_get_scratch_arena_for(sp_mem_t mem); SP_API sp_mem_arena_marker_t sp_mem_begin_scratch(); SP_API sp_mem_arena_marker_t sp_mem_begin_scratch_for(sp_mem_t mem); SP_API void sp_mem_end_scratch(sp_mem_arena_marker_t marker); -#define sp_sys_alloc_n(T, n) (T*)sp_sys_alloc((n) * sizeof(T)) -#define sp_sys_alloc_type(T) sp_sys_alloc_n(T, 1) -#define sp_mem_allocator_alloc_n(a, T, n) (T*)sp_mem_allocator_alloc(a, (n) * sizeof(T)) -#define sp_mem_allocator_alloc_type(a, T) sp_mem_allocator_alloc_n(a, T, 1) -#define sp_mem_arena_alloc_n(a, T, n) (T*)sp_mem_arena_alloc((a), (n) * sizeof(T)) -#define sp_mem_arena_alloc_type(a, T) sp_mem_arena_alloc_n(a, T, 1) -#define sp_alloc_n(a, T, n) (T*)sp_alloc(a, (n) * sizeof(T)) -#define sp_alloc_type(a, T) sp_alloc_n(a, T, 1) +/* + HEAP ALLOCATOR + + sp_mem_heap_t is a general purpose heap allocator, like malloc(). This means + that it's suitable for most allocations that programs want to do, but not + necessarily optimal or ergonomic. + + Small allocations are rounded up to a bucket size and handed out from + bucket-sized chunks carved out of 4KB pages, called spans. Spans are in + turn carved out of 64KB segments, which are what the heap actually + requests from the OS; empty spans go onto a free list for reuse by any + bucket, and segments are only returned to the OS when the heap is + destroyed. Large allocations aren't carved out from anything; each large + allocation makes a syscall. + + ## Design + + The diagram below is a simplified visual of how that 4KB is laid out. First, + there's a header. Then, the remaining memory is split into bucket sized + chunks. The span keeps a free list of chunks by using the first 8 bytes + of each chunk as the pointer in a linked list + + │ 48B │ 512B │ 512B │ 512B │ + │ header │ chunks[0] │ chunks[1] │ chunks[2] │ + ┌───────────┬────────┬──────────┬────────┬──────────┬───────────────────┐ + │ free_head │ next │ junk │ NULL │ junk │ user bytes │ + └─────┬─────┴─┬───┬──┴──────────┴─┬──────┴──────────┴───────────────────┘ + │ ▲ │ ▲ + └───────┘ └───────────────┘ + + Allocation simply calculates the bucket for the request, finds the span that + holds the chunks for that bucket size, and returns the first such chunk from + the page's free list. + + │ header │ chunks[0] │ chunks[1] │ chunks[2] │ + ┌───────────┬───────────────────┬────────┬──────────┬───────────────────┐ + │ free_head │ user bytes │ NULL │ junk │ user bytes │ + └─────┬─────┴───────────────────┴─┬──────┴──────────┴───────────────────┘ + │ ▲ + └───────────────────────────┘ + + Deallocation is identical, but in reverse; it sets the span's free list head + to the newly freed chunk and links it to the rest of the free list. + + # Performance Characteristics + + Allocations are always rounded up to the nearest bucket. If you ask + for, say, 50 bytes, the allocator will use a 64 byte chunk. This is, + obviously, inefficient in overhead. Buckets are spaced 1.25x to 1.5x + apart and sized so that a whole number of chunks packs a span almost + exactly; the worst rounding waste is bounded by the gap to the next + bucket, and the worst packing waste by the span's leftover tail. + + In exchange for this inefficiency, you get better utilization. A freed + chunk is immediately reusable, always. malloc() can fragment across a + program's lifetime no matter what. This heap allocator does not; overhead + is purely a function of your size distribution, not how long the program + has been running. + */ +#define SP_MEM_HEAP_NUM_BUCKETS 16 +#define SP_MEM_HEAP_SEGMENT_SIZE 65536 +#define SP_MEM_HEAP_SPAN_SIZE 4096 +#define SP_MEM_HEAP_MAX_SMALL 2016 +#define SP_MEM_HEAP_SPAN_MAGIC 0x53504D48u +#define SP_MEM_HEAP_LARGE_MAGIC 0x53504C47u + +typedef struct SP_ALIGNED sp_mem_heap_span_t { + u32 magic; + u32 bucket; + u32 in_use; + void* free_head; + struct sp_mem_heap_span_t* prev; + struct sp_mem_heap_span_t* next; + struct sp_mem_heap_t* heap; +} sp_mem_heap_span_t; + +typedef struct sp_mem_heap_segment_t { + struct sp_mem_heap_segment_t* next; +} sp_mem_heap_segment_t; + +typedef struct SP_ALIGNED sp_mem_heap_large_t { + u32 magic; + u32 pad; + u64 size; + u64 capacity; + struct sp_mem_heap_large_t* prev; + struct sp_mem_heap_large_t* next; + struct sp_mem_heap_t* heap; +} sp_mem_heap_large_t; +typedef struct { + sp_mem_heap_span_t* partial; + sp_mem_heap_span_t* full; +} sp_mem_heap_bucket_t; + +typedef struct sp_mem_heap_t { + sp_mem_heap_bucket_t buckets [SP_MEM_HEAP_NUM_BUCKETS]; + sp_mem_heap_large_t* larges; + sp_mem_heap_segment_t* segments; + sp_mem_heap_span_t* recycled; + u64 bytes_used; + u64 bytes_reserved; + u64 peak_reserved; +} sp_mem_heap_t; + +SP_API sp_mem_heap_t* sp_mem_heap_new(); +SP_API void sp_mem_heap_destroy(sp_mem_heap_t* heap); +SP_API sp_mem_t sp_mem_heap_as_allocator(sp_mem_heap_t* heap); +SP_API void* sp_mem_heap_on_alloc(void* ud, sp_mem_alloc_mode_t mode, u64 size, void* ptr); +SP_API void* sp_mem_heap_alloc(sp_mem_heap_t* heap, u64 size); +SP_API void* sp_mem_heap_realloc(sp_mem_heap_t* heap, void* ptr, u64 size); +SP_API void sp_mem_heap_free(sp_mem_heap_t* heap, void* ptr); +SP_API sp_mem_heap_span_t* sp_mem_heap_find_span(sp_mem_heap_t* heap, void* ptr); +/////////// +// SLICE // +/////////// typedef struct { sp_mem_slice_t slice; u64 index; @@ -3624,10 +3746,14 @@ SP_IMP sp_hash_t sp_hash_str(sp_str_t str); // @memory SP_IMP sp_mem_arena_block_t* sp_mem_arena_block_new(sp_mem_arena_t* arena, u64 capacity); -SP_IMP void* sp_mem_arena_align_block(sp_mem_arena_block_t* block, u8 alignment); +SP_IMP void* sp_mem_arena_align_block(sp_mem_arena_block_t* block, u8 alignment); SP_IMP sp_mem_arena_block_t* sp_mem_arena_get_block(sp_mem_arena_t* arena, u64 alloc_size); -SP_IMP void* sp_mem_arena_alloc_with_header(sp_mem_arena_t* arena, u64 size); -SP_IMP void* sp_mem_arena_alloc_no_header(sp_mem_arena_t* arena, u64 size); +SP_IMP void* sp_mem_arena_alloc_with_header(sp_mem_arena_t* arena, u64 size); +SP_IMP void* sp_mem_arena_alloc_no_header(sp_mem_arena_t* arena, u64 size); +SP_IMP u32 sp_mem_heap_bucket_of(u64 size); +SP_IMP u64 sp_mem_heap_bucket_size(u32 bucket); +SP_IMP void sp_mem_heap_track_reserve(sp_mem_heap_t* heap, u64 bytes); +SP_IMP void sp_mem_heap_span_release(sp_mem_heap_t* heap, sp_mem_heap_span_t* span); // @string SP_IMP bool sp_utf8_is_cont(u8 b); @@ -4055,6 +4181,7 @@ s32 errno; #define SP_SYSCALL_NUM_DUP3 292 #define SP_SYSCALL_NUM_PIPE2 293 #define SP_SYSCALL_NUM_INOTIFY_INIT1 294 + #define SP_SYSCALL_NUM_PERF_EVENT_OPEN 298 #elif defined(SP_ARM64) #define SP_SYSCALL_NUM_GETCWD 17 @@ -4100,6 +4227,7 @@ s32 errno; #define SP_SYSCALL_NUM_CLONE 220 #define SP_SYSCALL_NUM_EXECVE 221 #define SP_SYSCALL_NUM_MMAP 222 + #define SP_SYSCALL_NUM_PERF_EVENT_OPEN 241 #define SP_SYSCALL_NUM_WAIT4 260 #define SP_SYSCALL_NUM_SENDFILE 71 #define SP_SYSCALL_NUM_COPY_FILE_RANGE 285 @@ -8064,7 +8192,7 @@ sp_mem_arena_block_t* sp_mem_arena_block_new(sp_mem_arena_t* arena, u64 capacity } sp_mem_arena_t* sp_mem_arena_new(sp_mem_t mem) { - return sp_mem_arena_new_ex(mem, SP_MEM_ARENA_BLOCK_SIZE, SP_MEM_ARENA_MODE_DEFAULT, SP_MEM_ALIGNMENT); + return sp_mem_arena_new_ex(mem, 4096, SP_MEM_ARENA_MODE_DEFAULT, SP_MEM_ALIGNMENT); } sp_mem_arena_t* sp_mem_arena_new_ex(sp_mem_t mem, u64 block_size, sp_mem_arena_mode_t mode, u8 alignment) { @@ -8365,10 +8493,6 @@ void* sp_mem_os_alloc(u64 size) { return h + 1; } -void* sp_mem_os_alloc_zero(u64 size) { - return sp_mem_os_alloc(size); -} - void sp_mem_os_free(void* ptr) { if (!ptr) return; sp_mem_os_header_t* h = sp_mem_os_get_header(ptr); @@ -14246,7 +14370,7 @@ sp_mem_os_header_t* sp_mem_os_get_header(void* ptr) { void* sp_mem_os_on_alloc(void* user_data, sp_mem_alloc_mode_t mode, u64 size, void* ptr) { (void)user_data; switch (mode) { - case SP_ALLOCATOR_MODE_ALLOC: return sp_mem_os_alloc_zero(size); + case SP_ALLOCATOR_MODE_ALLOC: return sp_mem_os_alloc(size); case SP_ALLOCATOR_MODE_RESIZE: return sp_mem_os_realloc(ptr, size); case SP_ALLOCATOR_MODE_FREE: sp_mem_os_free(ptr); return SP_NULLPTR; default: return SP_NULLPTR; @@ -14260,6 +14384,252 @@ sp_mem_t sp_mem_os_new() { return allocator; } +//////////////////// +// HEAP ALLOCATOR // +//////////////////// +u32 sp_mem_heap_bucket_of(u64 size) { + sp_for(it, SP_MEM_HEAP_NUM_BUCKETS) { + if (size <= sp_mem_heap_bucket_size(it)) return it; + } + return SP_MEM_HEAP_NUM_BUCKETS; +} + +u64 sp_mem_heap_bucket_size(u32 bucket) { + static const u16 sizes [SP_MEM_HEAP_NUM_BUCKETS] = { + 16, 32, 48, 64, 96, 128, 192, 256, 336, 448, 576, 672, 800, 1008, 1344, SP_MEM_HEAP_MAX_SMALL + }; + return sizes[bucket]; +} + +#define sp_mem_heap_list_push(head, node) do { \ + (node)->prev = SP_NULLPTR; \ + (node)->next = *(head); \ + if (*(head)) (*(head))->prev = (node); \ + *(head) = (node); \ + } while (0) + +#define sp_mem_heap_list_unlink(head, node) do { \ + if ((node)->prev) (node)->prev->next = (node)->next; \ + else *(head) = (node)->next; \ + if ((node)->next) (node)->next->prev = (node)->prev; \ + (node)->prev = SP_NULLPTR; \ + (node)->next = SP_NULLPTR; \ + } while (0) + +void sp_mem_heap_track_reserve(sp_mem_heap_t* heap, u64 bytes) { + heap->bytes_reserved += bytes; + heap->peak_reserved = sp_max(heap->peak_reserved, heap->bytes_reserved); +} + +sp_mem_heap_t* sp_mem_heap_new() { + sp_mem_heap_t* heap = sp_sys_alloc_type(sp_mem_heap_t); + if (!heap) return SP_NULLPTR; + sp_mem_heap_track_reserve(heap, sp_align_offset(sizeof(sp_mem_heap_t), SP_MEM_HEAP_SPAN_SIZE)); + return heap; +} + +void sp_mem_heap_destroy(sp_mem_heap_t* heap) { + if (!heap) return; + + sp_mem_heap_large_t* large = heap->larges; + while (large) { + sp_mem_heap_large_t* next = large->next; + sp_sys_free(large, large->capacity); + large = next; + } + + sp_mem_heap_segment_t* segment = heap->segments; + while (segment) { + sp_mem_heap_segment_t* next = segment->next; + sp_sys_free(segment, SP_MEM_HEAP_SEGMENT_SIZE); + segment = next; + } + + sp_sys_free(heap, sizeof(*heap)); +} + +sp_mem_heap_span_t* sp_mem_heap_find_span(sp_mem_heap_t* heap, void* ptr) { + if (!heap || !ptr) return SP_NULLPTR; + + sp_mem_heap_span_t* span = sp_ptr_cast(sp_mem_heap_span_t*, sp_align_down(ptr, SP_MEM_HEAP_SPAN_SIZE)); + if (span->magic != SP_MEM_HEAP_SPAN_MAGIC) return SP_NULLPTR; + if (span->heap != heap) return SP_NULLPTR; + return span; +} + +static sp_mem_heap_span_t* sp_mem_heap_span_new(sp_mem_heap_t* heap, u32 bucket) { + sp_mem_heap_span_t* span = heap->recycled; + if (span) { + sp_mem_heap_list_unlink(&heap->recycled, span); + } + else { + sp_mem_heap_segment_t* segment = sp_ptr_cast(sp_mem_heap_segment_t*, sp_sys_alloc(SP_MEM_HEAP_SEGMENT_SIZE)); + if (!segment) return SP_NULLPTR; + sp_assert(sp_align_down(segment, SP_MEM_HEAP_SPAN_SIZE) == sp_uptr(segment)); + segment->next = heap->segments; + heap->segments = segment; + sp_mem_heap_track_reserve(heap, SP_MEM_HEAP_SEGMENT_SIZE); + + span = sp_ptr_cast(sp_mem_heap_span_t*, (u8*)segment + SP_MEM_HEAP_SPAN_SIZE); + sp_for_range(it, 2, SP_MEM_HEAP_SEGMENT_SIZE / SP_MEM_HEAP_SPAN_SIZE) { + sp_mem_heap_span_t* slot = sp_ptr_cast(sp_mem_heap_span_t*, (u8*)segment + (it * SP_MEM_HEAP_SPAN_SIZE)); + sp_mem_heap_list_push(&heap->recycled, slot); + } + } + + u64 bucket_size = sp_mem_heap_bucket_size(bucket); + u8* base = (u8*)span + sizeof(sp_mem_heap_span_t); + u8* end = (u8*)span + SP_MEM_HEAP_SPAN_SIZE; + u32 num_chunks = (u32)((u64)(end - base) / bucket_size); + sp_assert(num_chunks); + + span->magic = SP_MEM_HEAP_SPAN_MAGIC; + span->bucket = bucket; + span->in_use = 0; + span->heap = heap; + + void* head = SP_NULLPTR; + sp_for(it, num_chunks) { + u8* chunk = base + ((num_chunks - 1 - it) * bucket_size); + *(void**)chunk = head; + head = chunk; + } + span->free_head = head; + + sp_mem_heap_list_push(&heap->buckets[bucket].partial, span); + return span; +} + +void sp_mem_heap_span_release(sp_mem_heap_t* heap, sp_mem_heap_span_t* span) { + sp_mem_heap_list_unlink(&heap->buckets[span->bucket].partial, span); + span->magic = 0; + sp_mem_heap_list_push(&heap->recycled, span); +} + +void* sp_mem_heap_alloc(sp_mem_heap_t* heap, u64 size) { + if (!heap) return SP_NULLPTR; + + u32 bucket = sp_mem_heap_bucket_of(size); + if (bucket < SP_MEM_HEAP_NUM_BUCKETS) { + sp_mem_heap_span_t* span = heap->buckets[bucket].partial; + if (!span) span = sp_mem_heap_span_new(heap, bucket); + if (!span) return SP_NULLPTR; + + void* chunk = span->free_head; + span->free_head = *(void**)chunk; + span->in_use++; + if (!span->free_head) { + sp_mem_heap_list_unlink(&heap->buckets[bucket].partial, span); + sp_mem_heap_list_push(&heap->buckets[bucket].full, span); + } + + u64 bucket_size = sp_mem_heap_bucket_size(bucket); + heap->bytes_used += bucket_size; + sp_mem_zero(chunk, bucket_size); + return chunk; + } + + u64 capacity = sp_align_offset(size + sizeof(sp_mem_heap_large_t), SP_MEM_HEAP_SPAN_SIZE); + if (capacity <= size) return SP_NULLPTR; + sp_mem_heap_large_t* large = (sp_mem_heap_large_t*)sp_sys_alloc(capacity); + if (!large) return SP_NULLPTR; + large->magic = SP_MEM_HEAP_LARGE_MAGIC; + large->size = size; + large->capacity = capacity; + large->heap = heap; + sp_mem_heap_list_push(&heap->larges, large); + sp_mem_heap_track_reserve(heap, capacity); + heap->bytes_used += size; + return large + 1; +} + +void sp_mem_heap_free(sp_mem_heap_t* heap, void* ptr) { + if (!heap || !ptr) return; + + sp_mem_heap_span_t* span = sp_mem_heap_find_span(heap, ptr); + if (span) { + sp_assert(span->in_use); + bool was_full = !span->free_head; + *(void**)ptr = span->free_head; + span->free_head = ptr; + span->in_use--; + heap->bytes_used -= sp_mem_heap_bucket_size(span->bucket); + + if (was_full) { + sp_mem_heap_list_unlink(&heap->buckets[span->bucket].full, span); + sp_mem_heap_list_push(&heap->buckets[span->bucket].partial, span); + } + if (!span->in_use) { + sp_mem_heap_span_release(heap, span); + } + return; + } + + sp_mem_heap_large_t* large = ((sp_mem_heap_large_t*)ptr) - 1; + sp_assert(large->magic == SP_MEM_HEAP_LARGE_MAGIC); + sp_assert(large->heap == heap); + sp_mem_heap_list_unlink(&heap->larges, large); + heap->bytes_used -= large->size; + heap->bytes_reserved -= large->capacity; + sp_sys_free(large, large->capacity); +} + +void* sp_mem_heap_realloc(sp_mem_heap_t* heap, void* ptr, u64 size) { + if (!heap) return SP_NULLPTR; + if (!ptr) return sp_mem_heap_alloc(heap, size); + if (!size) { + sp_mem_heap_free(heap, ptr); + return SP_NULLPTR; + } + + u64 old_size = 0; + sp_mem_heap_span_t* span = sp_mem_heap_find_span(heap, ptr); + if (span) { + u64 bucket_size = sp_mem_heap_bucket_size(span->bucket); + if (sp_mem_heap_bucket_of(size) == span->bucket) { + sp_mem_zero((u8*)ptr + size, bucket_size - size); + return ptr; + } + old_size = bucket_size; + } + else { + sp_mem_heap_large_t* large = ((sp_mem_heap_large_t*)ptr) - 1; + sp_assert(large->magic == SP_MEM_HEAP_LARGE_MAGIC); + sp_assert(large->heap == heap); + if (size > SP_MEM_HEAP_MAX_SMALL && size <= large->capacity - sizeof(sp_mem_heap_large_t)) { + if (size < large->size) sp_mem_zero((u8*)ptr + size, large->size - size); + heap->bytes_used -= large->size; + heap->bytes_used += size; + large->size = size; + return ptr; + } + old_size = large->size; + } + + void* fresh = sp_mem_heap_alloc(heap, size); + if (!fresh) return SP_NULLPTR; + sp_mem_copy(fresh, ptr, sp_min(old_size, size)); + sp_mem_heap_free(heap, ptr); + return fresh; +} + +void* sp_mem_heap_on_alloc(void* user_data, sp_mem_alloc_mode_t mode, u64 size, void* ptr) { + sp_mem_heap_t* heap = (sp_mem_heap_t*)user_data; + switch (mode) { + case SP_ALLOCATOR_MODE_ALLOC: return sp_mem_heap_alloc(heap, size); + case SP_ALLOCATOR_MODE_RESIZE: return sp_mem_heap_realloc(heap, ptr, size); + case SP_ALLOCATOR_MODE_FREE: sp_mem_heap_free(heap, ptr); return SP_NULLPTR; + } + return SP_NULLPTR; +} + +sp_mem_t sp_mem_heap_as_allocator(sp_mem_heap_t* heap) { + return (sp_mem_t) { + .on_alloc = sp_mem_heap_on_alloc, + .user_data = heap + }; +} + void* sp_alloc(sp_mem_t allocator, u64 size) { return sp_mem_allocator_alloc(allocator, size); } diff --git a/sp/sp_prompt.h b/sp/sp_prompt.h index 2df9f83..95c911a 100644 --- a/sp/sp_prompt.h +++ b/sp/sp_prompt.h @@ -1014,7 +1014,7 @@ void sp_prompt_ctx_init(sp_prompt_ctx_t* ctx, sp_mem_t mem, u32 cols, u32 rows) sp_da_init(ctx->mem, ctx->frames); sp_mutex_init(&ctx->channel.lock, SP_MUTEX_PLAIN); - ctx->channel.arena = sp_mem_arena_new_ex(mem, SP_MEM_ARENA_BLOCK_SIZE, SP_MEM_ARENA_MODE_DEFAULT, SP_MEM_ALIGNMENT); + ctx->channel.arena = sp_mem_arena_new_ex(mem, 4096, SP_MEM_ARENA_MODE_DEFAULT, SP_MEM_ALIGNMENT); // Write buffering is really important, because our rendering algorithm is extremely // naive. It's not much more than this: diff --git a/test/bench/glob.c b/test/bench/glob.c index 8f237bb..e9811f5 100644 --- a/test/bench/glob.c +++ b/test/bench/glob.c @@ -1,144 +1,128 @@ -#define SP_APP -#include "sp.h" +#include "ubench.h" #include "sp/sp_glob.h" -#define BENCH_ITERATIONS 1000000 +#define GLOB_BENCH_MAX_PATTERNS 16 typedef struct { - const c8* pattern; - const c8* path; -} bench_case_t; + bool match; +} glob_bench_expect_t; typedef struct { - sp_str_t name; - f64 ns_per_op; -} bench_result_t; - -// From original rust globset benchmarks -static bench_case_t bench_cases[] = { - {.pattern = "*.txt", .path = "some/a/bigger/path/to/the/crazy/needle.txt"}, - {.pattern = "some/**/needle.txt", .path = "some/needle.txt"}, - {.pattern = "some/**/needle.txt", .path = "some/a/bigger/path/to/the/crazy/needle.txt"}, -}; - -static const c8* case_names[] = { - "ext", "short", "long", -}; - -// From original rust globset benchmarks -static const c8* many_short_patterns[] = { - ".*.swp", - "tags", - "target", - "*.lock", - "tmp", - "*.csv", - "*.fst", - "*-got", - "*.csv.idx", - "words", - "98m*", - "dict", - "test", - "months", -}; - -static const c8* many_short_path = "98m-blah.csv.idx"; - -static f64 run_glob_bench(sp_glob_t* g, sp_str_t path) { - for (u32 i = 0; i < 1000; i++) { - sp_glob_match(g, path); - } - - volatile bool result; - sp_tm_point_t start = sp_tm_now_point(); - for (u32 i = 0; i < BENCH_ITERATIONS; i++) { - result = sp_glob_match(g, path); + const c8* patterns[GLOB_BENCH_MAX_PATTERNS]; + const c8* path; + glob_bench_expect_t expect; +} glob_bench_t; + +static void run_glob_bench(ubench_run_state_t* ubench_run_state, glob_bench_t bench) { + sp_mem_arena_marker_t scratch = sp_mem_begin_scratch(); + sp_glob_t* glob = sp_glob_new(scratch.mem, bench.patterns[0]); + sp_str_t path = sp_str_view(bench.path); + + SP_ASSERT(glob != SP_NULLPTR); + SP_ASSERT(sp_glob_match(glob, path) == bench.expect.match); + + UBENCH_DO_BENCHMARK() { + UBENCH_LOOP { + bool matched = sp_glob_match(glob, path); + UBENCH_DO_NOT_OPTIMIZE(matched); + } } - sp_tm_point_t end = sp_tm_now_point(); - (void)result; - return (f64)sp_tm_point_diff(end, start) / (f64)BENCH_ITERATIONS; + sp_mem_end_scratch(scratch); } -static f64 run_glob_set_bench(sp_glob_set_t* set, sp_str_t path) { - for (u32 i = 0; i < 1000; i++) { - sp_glob_set_match(set, path); +static void run_glob_set_bench(ubench_run_state_t* ubench_run_state, glob_bench_t bench) { + sp_mem_arena_marker_t scratch = sp_mem_begin_scratch(); + sp_glob_set_t* set = sp_glob_set_new(scratch.mem); + sp_carr_for(bench.patterns, it) { + if (!bench.patterns[it]) break; + sp_glob_set_add(set, bench.patterns[it]); } + sp_glob_set_build(set); + sp_str_t path = sp_str_view(bench.path); + + SP_ASSERT(sp_glob_set_match(set, path) == bench.expect.match); - volatile bool result; - sp_tm_point_t start = sp_tm_now_point(); - for (u32 i = 0; i < BENCH_ITERATIONS; i++) { - result = sp_glob_set_match(set, path); + UBENCH_DO_BENCHMARK() { + UBENCH_LOOP { + bool matched = sp_glob_set_match(set, path); + UBENCH_DO_NOT_OPTIMIZE(matched); + } } - sp_tm_point_t end = sp_tm_now_point(); - (void)result; - return (f64)sp_tm_point_diff(end, start) / (f64)BENCH_ITERATIONS; + sp_mem_end_scratch(scratch); } -int main(int argc, char** argv) { - (void)argc; - (void)argv; - - sp_mem_arena_t* arena = sp_mem_arena_new_ex(sp_mem_os_new(), 4 * 1024 * 1024, SP_MEM_ARENA_MODE_DEFAULT, SP_MEM_ALIGNMENT); - sp_mem_t allocator = sp_mem_arena_as_allocator(arena); - (void)allocator; - - u32 num_cases = sizeof(bench_cases) / sizeof(bench_cases[0]); - sp_da(bench_result_t) results = sp_da_new(allocator, bench_result_t); - - // Pre-compile all globs - sp_da(sp_glob_t*) globs = sp_da_new(allocator, sp_glob_t*); - sp_da(sp_glob_set_t*) globsets = sp_da_new(allocator, sp_glob_set_t*); - sp_carr_for(bench_cases, i) { - sp_glob_t* g = sp_glob_new(allocator, bench_cases[i].pattern); - SP_ASSERT(g != SP_NULLPTR); - sp_da_push(globs, g); - - sp_glob_set_t* set = sp_glob_set_new(allocator); - sp_glob_set_add(set, bench_cases[i].pattern); - sp_glob_set_build(set); - sp_da_push(globsets, set); - } +UBENCH_EX(glob, ext) { + run_glob_bench(ubench_run_state, (glob_bench_t) { + .patterns = { "*.txt" }, + .path = "some/a/bigger/path/to/the/crazy/needle.txt", + .expect = { .match = true }, + }); +} - // Pre-compile many_short globset - sp_glob_set_t* many_short_set = sp_glob_set_new(allocator); - sp_carr_for(many_short_patterns, i) { - sp_glob_set_add(many_short_set, many_short_patterns[i]); - } - sp_glob_set_build(many_short_set); - - // Single glob benchmarks - for (u32 i = 0; i < num_cases; i++) { - sp_str_t path = sp_str_view(bench_cases[i].path); - SP_ASSERT(sp_glob_match(globs[i], path)); - f64 ns = run_glob_bench(globs[i], path); - sp_str_t name = sp_fmt(sp_mem_get_scratch(), "{}_glob", sp_fmt_cstr(case_names[i])).value; - sp_da_push(results, ((bench_result_t){.name = name, .ns_per_op = ns})); - } +UBENCH_EX(glob, short) { + run_glob_bench(ubench_run_state, (glob_bench_t) { + .patterns = { "some/**/needle.txt" }, + .path = "some/needle.txt", + .expect = { .match = true }, + }); +} - // GlobSet single pattern benchmarks - for (u32 i = 0; i < num_cases; i++) { - sp_str_t path = sp_str_view(bench_cases[i].path); - SP_ASSERT(sp_glob_set_match(globsets[i], path)); - f64 ns = run_glob_set_bench(globsets[i], path); - sp_str_t name = sp_fmt(sp_mem_get_scratch(), "{}_globset", sp_fmt_cstr(case_names[i])).value; - sp_da_push(results, ((bench_result_t){.name = name, .ns_per_op = ns})); - } +UBENCH_EX(glob, long) { + run_glob_bench(ubench_run_state, (glob_bench_t) { + .patterns = { "some/**/needle.txt" }, + .path = "some/a/bigger/path/to/the/crazy/needle.txt", + .expect = { .match = true }, + }); +} - // many_short benchmark (14 patterns, 2 matches expected) - { - sp_str_t path = sp_str_view(many_short_path); - f64 ns = run_glob_set_bench(many_short_set, path); - sp_da_push(results, ((bench_result_t){.name = sp_str_lit("many_short_globset"), .ns_per_op = ns})); - } +UBENCH_EX(globset, ext) { + run_glob_set_bench(ubench_run_state, (glob_bench_t) { + .patterns = { "*.txt" }, + .path = "some/a/bigger/path/to/the/crazy/needle.txt", + .expect = { .match = true }, + }); +} - // Print space-separated pairs - sp_da_for(results, i) { - sp_log("{} {}", sp_fmt_str(results[i].name), sp_fmt_float(results[i].ns_per_op)); - } +UBENCH_EX(globset, short) { + run_glob_set_bench(ubench_run_state, (glob_bench_t) { + .patterns = { "some/**/needle.txt" }, + .path = "some/needle.txt", + .expect = { .match = true }, + }); +} + +UBENCH_EX(globset, long) { + run_glob_set_bench(ubench_run_state, (glob_bench_t) { + .patterns = { "some/**/needle.txt" }, + .path = "some/a/bigger/path/to/the/crazy/needle.txt", + .expect = { .match = true }, + }); +} - return 0; +UBENCH_EX(globset, many_short) { + run_glob_set_bench(ubench_run_state, (glob_bench_t) { + .patterns = { + ".*.swp", + "tags", + "target", + "*.lock", + "tmp", + "*.csv", + "*.fst", + "*-got", + "*.csv.idx", + "words", + "98m*", + "dict", + "test", + "months", + }, + .path = "98m-blah.csv.idx", + .expect = { .match = true }, + }); } + +UBENCH_MAIN() diff --git a/test/bench/heap.c b/test/bench/heap.c new file mode 100644 index 0000000..7acf82f --- /dev/null +++ b/test/bench/heap.c @@ -0,0 +1,557 @@ +#include "sp.h" + +#define SP_TABLE_IMPLEMENTATION +#include "table.h" + +#if defined(__GLIBC__) + #include +#endif + +#define BENCH_MAX_SLOTS 262144 +#define BENCH_MAX_BACKENDS 3 +#define BENCH_MAX_PHASES 3 + +typedef enum { + DIST_FIXED, + DIST_UNIFORM, + DIST_LOG, +} bench_dist_t; + +typedef enum { + WORK_CHURN, + WORK_RAMP_LIFO, + WORK_RAMP_FIFO, + WORK_PIN, + WORK_REALLOC, +} bench_work_t; + +typedef struct { + const c8* name; + bench_work_t kind; + bench_dist_t dist; + u64 lo; + u64 hi; + u32 slots; + u32 ops; + u32 survive_pct; +} bench_workload_t; + +typedef struct { + const c8* name; + void* (*create)(); + void (*destroy)(void* ctx); + sp_mem_t (*as_mem)(void* ctx); + bool (*sample)(void* ctx, u64* used, u64* reserved); +} bench_backend_t; + +typedef struct { + void* ptrs [BENCH_MAX_SLOTS]; + u64 sizes [BENCH_MAX_SLOTS]; + u64 live_req; + u64 rng; + sp_mem_t mem; +} bench_state_t; + +static bench_state_t state = sp_zero; + +static u64 bench_rng_next() { + u64 x = state.rng; + x ^= x >> 12; + x ^= x << 25; + x ^= x >> 27; + state.rng = x; + return x * 0x2545F4914F6CDD1DULL; +} + +static u64 bench_log2(u64 v) { + u64 r = 0; + while (v >>= 1) r++; + return r; +} + +static u64 bench_dist_size(const bench_workload_t* w) { + switch (w->dist) { + case DIST_FIXED: return w->lo; + case DIST_UNIFORM: return w->lo + (bench_rng_next() % (w->hi - w->lo + 1)); + case DIST_LOG: { + u64 lo_exp = bench_log2(w->lo); + u64 hi_exp = bench_log2(w->hi); + u64 e = lo_exp + (bench_rng_next() % (hi_exp - lo_exp + 1)); + u64 size = ((u64)1 << e) + (bench_rng_next() % ((u64)1 << e)); + return sp_min(sp_max(size, w->lo), w->hi); + } + } + return w->lo; +} + +static void bench_touch(void* ptr, u64 size) { + u8* bytes = (u8*)ptr; + u64 head = size & ~(u64)7; + for (u64 i = 0; i < head; i += 8) { + *(u64*)(bytes + i) = 0x5050505050505050ULL; + } + for (u64 i = head; i < size; i++) { + bytes[i] = 0x50; + } +} + +static void bench_alloc_slot(const bench_workload_t* w, u32 slot) { + u64 size = bench_dist_size(w); + state.ptrs[slot] = sp_alloc(state.mem, size); + SP_ASSERT(state.ptrs[slot]); + bench_touch(state.ptrs[slot], size); + state.sizes[slot] = size; + state.live_req += size; +} + +static void bench_free_slot(u32 slot) { + sp_free(state.mem, state.ptrs[slot]); + state.live_req -= state.sizes[slot]; + state.ptrs[slot] = SP_NULLPTR; + state.sizes[slot] = 0; +} + +static void bench_realloc_slot(u32 slot, u64 size) { + state.ptrs[slot] = sp_realloc(state.mem, state.ptrs[slot], size); + SP_ASSERT(state.ptrs[slot]); + bench_touch(state.ptrs[slot], size); + state.live_req -= state.sizes[slot]; + state.live_req += size; + state.sizes[slot] = size; +} + +static void* bench_heap_create() { + return sp_mem_heap_new(); +} + +static void bench_heap_destroy(void* ctx) { + sp_mem_heap_destroy((sp_mem_heap_t*)ctx); +} + +static sp_mem_t bench_heap_as_mem(void* ctx) { + return sp_mem_heap_as_allocator((sp_mem_heap_t*)ctx); +} + +static bool bench_heap_sample(void* ctx, u64* used, u64* reserved) { + sp_mem_heap_t* heap = (sp_mem_heap_t*)ctx; + *used = heap->bytes_used; + *reserved = heap->bytes_reserved; + return true; +} + +#if !defined(SP_FREESTANDING) +static void* bench_malloc_on_alloc(void* user_data, sp_mem_alloc_mode_t mode, u64 size, void* ptr) { + sp_unused(user_data); + switch (mode) { + case SP_ALLOCATOR_MODE_ALLOC: return malloc(size); + case SP_ALLOCATOR_MODE_RESIZE: return realloc(ptr, size); + case SP_ALLOCATOR_MODE_FREE: free(ptr); return SP_NULLPTR; + } + return SP_NULLPTR; +} + +static void* bench_malloc_create() { + return SP_NULLPTR; +} + +static void bench_malloc_destroy(void* ctx) { + sp_unused(ctx); +} + +static sp_mem_t bench_malloc_as_mem(void* ctx) { + sp_unused(ctx); + return (sp_mem_t) { + .on_alloc = bench_malloc_on_alloc + }; +} + +static bool bench_malloc_sample(void* ctx, u64* used, u64* reserved) { + sp_unused(ctx); + #if defined(__GLIBC__) + struct mallinfo2 info = mallinfo2(); + *used = info.uordblks; + *reserved = info.arena + info.hblkhd; + return true; + #else + *used = 0; + *reserved = 0; + return false; + #endif +} +#endif + +typedef struct { + u64 used; + u64 reserved; + u64 peak_reserved; +} bench_os_counters_t; + +static bench_os_counters_t bench_os_counters = sp_zero; + +static u64 bench_os_reservation(u64 size) { + return sp_align_offset(size + sizeof(sp_mem_os_header_t), 4096); +} + +static void* bench_os_on_alloc(void* user_data, sp_mem_alloc_mode_t mode, u64 size, void* ptr) { + bench_os_counters_t* counters = (bench_os_counters_t*)user_data; + switch (mode) { + case SP_ALLOCATOR_MODE_ALLOC: { + void* p = sp_mem_os_alloc(size); + if (p) { + counters->used += size; + counters->reserved += bench_os_reservation(size); + counters->peak_reserved = sp_max(counters->peak_reserved, counters->reserved); + } + return p; + } + case SP_ALLOCATOR_MODE_RESIZE: { + if (!ptr) return bench_os_on_alloc(user_data, SP_ALLOCATOR_MODE_ALLOC, size, SP_NULLPTR); + u64 old = sp_mem_os_get_header(ptr)->size; + void* p = sp_mem_os_realloc(ptr, size); + if (p) { + u64 now = sp_mem_os_get_header(p)->size; + counters->used -= old; + counters->used += now; + counters->reserved -= bench_os_reservation(old); + counters->reserved += bench_os_reservation(now); + counters->peak_reserved = sp_max(counters->peak_reserved, counters->reserved); + } + return p; + } + case SP_ALLOCATOR_MODE_FREE: { + if (ptr) { + u64 old = sp_mem_os_get_header(ptr)->size; + counters->used -= old; + counters->reserved -= bench_os_reservation(old); + sp_mem_os_free(ptr); + } + return SP_NULLPTR; + } + } + return SP_NULLPTR; +} + +static void* bench_os_create() { + bench_os_counters = sp_zero_s(bench_os_counters_t); + return &bench_os_counters; +} + +static void bench_os_destroy(void* ctx) { + sp_unused(ctx); +} + +static sp_mem_t bench_os_as_mem(void* ctx) { + return (sp_mem_t) { + .on_alloc = bench_os_on_alloc, + .user_data = ctx + }; +} + +static bool bench_os_sample(void* ctx, u64* used, u64* reserved) { + bench_os_counters_t* counters = (bench_os_counters_t*)ctx; + *used = counters->used; + *reserved = counters->reserved; + return true; +} + +static const bench_backend_t backends [] = { + { "sp_heap", bench_heap_create, bench_heap_destroy, bench_heap_as_mem, bench_heap_sample }, + #if !defined(SP_FREESTANDING) + { "malloc", bench_malloc_create, bench_malloc_destroy, bench_malloc_as_mem, bench_malloc_sample }, + #endif + // { "sp_os", bench_os_create, bench_os_destroy, bench_os_as_mem, bench_os_sample }, +}; + +static const bench_workload_t workloads [] = { + { .name = "fixed_16", .kind = WORK_CHURN, .dist = DIST_FIXED, .lo = 16, .slots = 65536, .ops = 400000 }, + { .name = "fixed_64", .kind = WORK_CHURN, .dist = DIST_FIXED, .lo = 64, .slots = 65536, .ops = 400000 }, + { .name = "fixed_512", .kind = WORK_CHURN, .dist = DIST_FIXED, .lo = 512, .slots = 16384, .ops = 100000 }, + { .name = "uniform_small", .kind = WORK_CHURN, .dist = DIST_UNIFORM, .lo = 1, .hi = 1024, .slots = 32768, .ops = 200000 }, + { .name = "log_mixed", .kind = WORK_CHURN, .dist = DIST_LOG, .lo = 16, .hi = 16384, .slots = 8192, .ops = 100000 }, + { .name = "large", .kind = WORK_CHURN, .dist = DIST_UNIFORM, .lo = 4096, .hi = 65536, .slots = 1024, .ops = 20000 }, + { .name = "ramp_lifo", .kind = WORK_RAMP_LIFO, .dist = DIST_UNIFORM, .lo = 16, .hi = 512, .slots = 200000 }, + { .name = "ramp_fifo", .kind = WORK_RAMP_FIFO, .dist = DIST_UNIFORM, .lo = 16, .hi = 512, .slots = 200000 }, + { .name = "pin_5pct", .kind = WORK_PIN, .dist = DIST_UNIFORM, .lo = 16, .hi = 256, .slots = 200000, .survive_pct = 5 }, + { .name = "realloc_grow", .kind = WORK_REALLOC, .dist = DIST_FIXED, .lo = 16, .hi = 65536, .slots = 2048, .ops = 100000 }, +}; + +typedef struct { + const c8* phase; + u64 ns_per_op; + void* ctx; + const bench_backend_t* backend; +} bench_report_t; + +typedef struct { + const c8* phase; + const c8* backend; + u64 ns_per_op; + u64 req; + u64 used; + u64 reserved; + u64 util; + bool exact; +} bench_result_t; + +typedef struct { + bench_result_t rows [BENCH_MAX_BACKENDS][BENCH_MAX_PHASES]; + u32 num_phases [BENCH_MAX_BACKENDS]; + u32 num_backends; +} bench_results_t; + +static bench_results_t results = sp_zero; + +static void bench_report(bench_report_t r) { + u64 used = 0; + u64 reserved = 0; + bool exact = r.backend->sample(r.ctx, &used, &reserved); + + SP_ASSERT(results.num_backends < BENCH_MAX_BACKENDS); + SP_ASSERT(results.num_phases[results.num_backends] < BENCH_MAX_PHASES); + bench_result_t* result = &results.rows[results.num_backends][results.num_phases[results.num_backends]++]; + *result = (bench_result_t) { + .phase = r.phase, + .backend = r.backend->name, + .ns_per_op = r.ns_per_op, + .req = state.live_req, + .used = used, + .reserved = reserved, + .util = reserved ? (state.live_req * 100) / reserved : 0, + .exact = exact, + }; +} + +static void bench_write_ratio(sp_table_writer_t* table, u64 value, u64 best, bool valid) { + if (!valid) { + sp_table_write_cstr(table, "?"); + return; + } + if (value == best) sp_table_color(table, SP_ANSI_FG_GREEN); + if (!best) { + if (value) sp_table_write_cstr(table, "-"); + else sp_table_write_f64(table, 1.0); + return; + } + sp_table_write_f64(table, (f64)value / (f64)best); +} + +static void bench_render_results() { + if (!results.num_backends) return; + + sp_mem_arena_marker_t scratch = sp_mem_begin_scratch(); + sp_table_writer_t table = sp_zero; + sp_table_init(&table, scratch.mem); + sp_table_add_col(&table, (sp_table_col_t) { .header = sp_str_lit("phase") }); + sp_table_add_col(&table, (sp_table_col_t) { .header = sp_str_lit("backend") }); + sp_table_add_col(&table, (sp_table_col_t) { .header = sp_str_lit("ns/op"), .align = SP_FMT_ALIGN_RIGHT }); + sp_table_add_col(&table, (sp_table_col_t) { .header = sp_str_lit("req"), .fmt = "{.bytes}", .align = SP_FMT_ALIGN_RIGHT }); + sp_table_add_col(&table, (sp_table_col_t) { .header = sp_str_lit("used"), .fmt = "{.bytes}", .align = SP_FMT_ALIGN_RIGHT }); + sp_table_add_col(&table, (sp_table_col_t) { .header = sp_str_lit("rsvd"), .fmt = "{.bytes}", .align = SP_FMT_ALIGN_RIGHT }); + sp_table_add_col(&table, (sp_table_col_t) { .header = sp_str_lit("util"), .fmt = "{}%", .align = SP_FMT_ALIGN_RIGHT }); + sp_table_add_col(&table, (sp_table_col_t) { .header = sp_str_lit("ns/best"), .fmt = "{:.2}x", .align = SP_FMT_ALIGN_RIGHT }); + sp_table_add_col(&table, (sp_table_col_t) { .header = sp_str_lit("rsvd/best"), .fmt = "{:.2}x", .align = SP_FMT_ALIGN_RIGHT }); + + u32 num_phases = results.num_phases[0]; + sp_for(phase, num_phases) { + u64 best_ns = 0; + u64 best_rsvd = 0; + bool have_rsvd = false; + sp_for(b, results.num_backends) { + bench_result_t* result = &results.rows[b][phase]; + if (!b || result->ns_per_op < best_ns) best_ns = result->ns_per_op; + if (result->exact && (!have_rsvd || result->reserved < best_rsvd)) { + best_rsvd = result->reserved; + have_rsvd = true; + } + } + + sp_for(b, results.num_backends) { + SP_ASSERT(results.num_phases[b] == num_phases); + bench_result_t* result = &results.rows[b][phase]; + sp_table_begin(&table); + sp_table_write_cstr(&table, result->phase); + sp_table_write_cstr(&table, result->backend); + sp_table_write_u64(&table, result->ns_per_op); + sp_table_write_u64(&table, result->req); + if (result->exact) { + sp_table_write_u64(&table, result->used); + sp_table_write_u64(&table, result->reserved); + sp_table_write_u64(&table, result->util); + } + else { + sp_table_write_cstr(&table, "?"); + sp_table_write_cstr(&table, "?"); + sp_table_write_cstr(&table, "?"); + } + bench_write_ratio(&table, result->ns_per_op, best_ns, true); + bench_write_ratio(&table, result->reserved, best_rsvd, result->exact && have_rsvd); + } + } + sp_table_log(&table); + sp_mem_end_scratch(scratch); +} + +static void bench_run(const bench_workload_t* w, const bench_backend_t* backend) { + void* ctx = backend->create(); + state.mem = backend->as_mem(ctx); + state.live_req = 0; + state.rng = 0x5EED5EED5EED5EEDULL; + sp_mem_zero(state.ptrs, w->slots * sizeof(void*)); + sp_mem_zero(state.sizes, w->slots * sizeof(u64)); + + + sp_tm_timer_t timer = sp_tm_start_timer(); + sp_for(it, w->slots) { + bench_alloc_slot(w, it); + } + bench_report((bench_report_t) { + .phase = "fill", + .ns_per_op = sp_tm_read_timer(&timer) / w->slots, + .ctx = ctx, + .backend = backend, + }); + + switch (w->kind) { + case WORK_CHURN: { + sp_tm_reset_timer(&timer); + sp_for(it, w->ops) { + u32 slot = (u32)(bench_rng_next() % w->slots); + if (state.ptrs[slot]) bench_free_slot(slot); + else bench_alloc_slot(w, slot); + } + bench_report((bench_report_t) { + .phase = "churn", + .ns_per_op = sp_tm_read_timer(&timer) / w->ops, + .ctx = ctx, + .backend = backend, + }); + break; + } + case WORK_RAMP_LIFO: { + sp_tm_reset_timer(&timer); + for (u32 it = w->slots; it > 0; it--) { + bench_free_slot(it - 1); + } + bench_report((bench_report_t) { + .phase = "free", + .ns_per_op = sp_tm_read_timer(&timer) / w->slots, + .ctx = ctx, + .backend = backend, + }); + break; + } + case WORK_RAMP_FIFO: { + sp_tm_reset_timer(&timer); + sp_for(it, w->slots) { + bench_free_slot(it); + } + bench_report((bench_report_t) { + .phase = "free", + .ns_per_op = sp_tm_read_timer(&timer) / w->slots, + .ctx = ctx, + .backend = backend, + }); + break; + } + case WORK_PIN: { + sp_tm_reset_timer(&timer); + u32 freed = 0; + sp_for(it, w->slots) { + if (bench_rng_next() % 100 >= w->survive_pct) { + bench_free_slot(it); + freed++; + } + } + bench_report((bench_report_t) { + .phase = "pinned", + .ns_per_op = sp_tm_read_timer(&timer) / sp_max(freed, 1), + .ctx = ctx, + .backend = backend, + }); + break; + } + case WORK_REALLOC: { + sp_tm_reset_timer(&timer); + sp_for(it, w->ops) { + u32 slot = (u32)(bench_rng_next() % w->slots); + if (!state.ptrs[slot]) { + bench_alloc_slot(w, slot); + } + else if (state.sizes[slot] * 2 > w->hi) { + bench_free_slot(slot); + } + else { + bench_realloc_slot(slot, state.sizes[slot] * 2); + } + } + bench_report((bench_report_t) { + .phase = "grow", + .ns_per_op = sp_tm_read_timer(&timer) / w->ops, + .ctx = ctx, + .backend = backend, + }); + break; + } + } + + sp_tm_reset_timer(&timer); + u32 drained = 0; + sp_for(it, w->slots) { + if (state.ptrs[it]) { + bench_free_slot(it); + drained++; + } + } + if (drained) { + bench_report((bench_report_t) { + .phase = "drained", + .ns_per_op = sp_tm_read_timer(&timer) / drained, + .ctx = ctx, + .backend = backend, + }); + } + + backend->destroy(ctx); + results.num_backends++; +} + +s32 main(s32 argc, const c8** argv) { + sp_str_t workload_filter = argc > 1 ? sp_str_view(argv[1]) : sp_str_lit(""); + sp_str_t backend_filter = argc > 2 ? sp_str_view(argv[2]) : sp_str_lit(""); + + sp_mem_zero(state.ptrs, sizeof(state.ptrs)); + sp_mem_zero(state.sizes, sizeof(state.sizes)); + + sp_carr_for(workloads, w) { + const bench_workload_t* workload = &workloads[w]; + if (!sp_str_empty(workload_filter)) { + if (!sp_str_equal_cstr(workload_filter, workload->name)) { + continue; + } + } + + sp_log("> {.yellow}", sp_fmt_cstr(workload->name)); + sp_log( + "min={.cyan} max={.cyan} slots={.cyan} ops={.cyan}", + sp_fmt_uint(workload->lo), + sp_fmt_uint(sp_max(workload->lo, workload->hi)), + sp_fmt_uint(workload->slots), + sp_fmt_uint(workload->ops) + ); + + results = sp_zero_s(bench_results_t); + sp_carr_for(backends, b) { + const bench_backend_t* backend = &backends[b]; + if (!sp_str_empty(backend_filter)) { + if (!sp_str_equal(sp_str_view(backend->name), backend_filter)) { + continue; + } + } + bench_run(workload, backend); + } + bench_render_results(); + sp_log(""); + } + + return 0; +} diff --git a/test/bench/ubench.h b/test/bench/ubench.h new file mode 100644 index 0000000..956411b --- /dev/null +++ b/test/bench/ubench.h @@ -0,0 +1,1876 @@ + +#ifndef SP_BENCH_H +#define SP_BENCH_H + +#if defined(UBENCH_ENABLE_SQLITE) && defined(__linux__) && !defined(_GNU_SOURCE) +#define _GNU_SOURCE +#endif + +#ifndef SP_PRIVATE_HEADER +#define SP_PRIVATE_HEADER +#endif + +#include "sp.h" +SP_BEGIN_EXTERN_C() + +typedef u64 ubench_size_t; + +//////////// +// MACROS // +//////////// +#if defined(SP_CPP) + #define UBENCH_C_FUNC extern "C" +#else + #define UBENCH_C_FUNC +#endif + +#if defined(SP_MSVC) + #define UBENCH_UNUSED +#else + #define UBENCH_UNUSED SP_ATTRIBUTE(unused) +#endif + +#if defined(SP_CPP) + #define UBENCH_EXTERN extern "C" +#else + #define UBENCH_EXTERN extern +#endif + +#if defined(SP_MSVC) + #define UBENCH_DO_NOT_OPTIMIZE(x) \ + do { \ + _ReadWriteBarrier(); \ + ubench_do_nothing((void *)&(x)); \ + } while (0) + #define UBENCH_CLOBBER_MEMORY() _ReadWriteBarrier() + +#else + #define UBENCH_CLOBBER_MEMORY() __asm__ volatile("" : : : "memory") + + #if defined(SP_CLANG) + #define UBENCH_DO_NOT_OPTIMIZE(x) \ + __asm__ volatile("" : "+r,m"(x) : : "memory") + #else + #define UBENCH_DO_NOT_OPTIMIZE(x) \ + __asm__ volatile("" : "+m,r"(x) : : "memory") + #endif +#endif + + +typedef struct ubench_run_state_s { + s64 *ns; + s64 *pause_ns; + s64 size; + s64 sample; + s64 paused_ns; + s64 pause_start; + s64 bytes_processed; + s64 items_processed; + /* Auto-tuned per-sample batch size: each clock-bracketed sample executes + `batch` body invocations when the body uses UBENCH_LOOP. The runner + amortizes clock-call overhead so micro-bodies (sub-µs) become measurable. + batch_consumed is set non-zero by UBENCH_LOOP to signal that the body + opted into batching, so the runner knows whether to tune. */ + s64 batch; + s64 batch_consumed; +} ubench_run_state_t; + +struct ubench_benchmark_state_s; + +typedef void (*ubench_body_t)(void *fixture, struct ubench_run_state_s *ubs); +typedef void (*ubench_setup_t)(void *fixture); +typedef void (*ubench_teardown_t)(void *fixture); + +typedef struct ubench_fixture_ops_s { + ubench_setup_t setup; + ubench_teardown_t teardown; + size_t size; +} ubench_fixture_ops_t; + +typedef void (*ubench_dispatch_t)(struct ubench_benchmark_state_s *b, + struct ubench_run_state_s *ubs); + +typedef struct ubench_benchmark_state_s { + sp_str_t name; + ubench_body_t body; + const struct ubench_fixture_ops_s *ops; + ubench_dispatch_t dispatch; +} ubench_benchmark_state_t; + +typedef struct ubench_state_s { + ubench_benchmark_state_t* benchmarks; // @spader make this sp_da, get rid of len + ubench_size_t benchmarks_length; + f64 confidence; + sp_mem_t mem; +} ubench_state_t; + +typedef struct unbench_benchmark_config_s { + const c8* name; + ubench_body_t body; + const ubench_fixture_ops_t* ops; + ubench_dispatch_t dispatch; +} ubench_benchmark_config_t; + +SP_API struct ubench_state_s ubench_state; +SP_API void ubench_run_lifecycle(ubench_benchmark_state_t* b, ubench_run_state_t* run); +SP_API void ubench_invoke(ubench_benchmark_state_t* b, ubench_run_state_t* run); +SP_API s32 ubench_do_benchmark(ubench_run_state_t* const run); +SP_API void ubench_register_benchmark(sp_str_t name, ubench_body_t body, const ubench_fixture_ops_t* ops, ubench_dispatch_t dispatch); +SP_API void ubench_register_benchmark_s(ubench_benchmark_config_t config); +SP_API SP_INLINE void ubench_pause(ubench_run_state_t* const run); +SP_API SP_INLINE void ubench_resume(ubench_run_state_t* const run); +SP_API SP_INLINE void ubench_do_nothing(void* ptr); +SP_API sp_str_t sp_cpu_get_model_a(sp_mem_t mem); +SP_API u32 sp_cpu_get_thread_count(void); + +#define UBENCH_STATE() \ + struct ubench_state_s ubench_state = { \ + .benchmarks = SP_NULLPTR, \ + .benchmarks_length = 0, \ + .confidence = 2.5, \ + .mem = {sp_mem_os_on_alloc, SP_NULLPTR}} + +#define UBENCH_MAIN() \ + UBENCH_STATE(); \ + s32 main(s32 argc, const c8* const argv[]) { \ + return ubench_main(argc, argv); \ + } + + +typedef struct sqlite3 sqlite3; +typedef struct sqlite3_stmt sqlite3_stmt; +typedef long long int sqlite3_int64; +typedef void (*sqlite3_destructor_type)(void*); + +#define SQLITE_OK 0 +#define SQLITE_ROW 100 +#define SQLITE_DONE 101 +#define SQLITE_STATIC ((sqlite3_destructor_type)0) +#define SQLITE_TRANSIENT ((sqlite3_destructor_type)-1) + +int sqlite3_open(const char *filename, sqlite3 **ppDb); +int sqlite3_close(sqlite3*); +int sqlite3_exec(sqlite3*, const char *sql, int (*callback)(void*,int,char**,char**), void*, char **errmsg); +const char *sqlite3_errmsg(sqlite3*); +int sqlite3_prepare_v2(sqlite3 *db, const char *zSql, int nByte, sqlite3_stmt **ppStmt, const char **pzTail); +int sqlite3_step(sqlite3_stmt*); +int sqlite3_reset(sqlite3_stmt *pStmt); +int sqlite3_finalize(sqlite3_stmt *pStmt); +sqlite3_int64 sqlite3_last_insert_rowid(sqlite3*); +int sqlite3_bind_null(sqlite3_stmt*, int); +int sqlite3_bind_int(sqlite3_stmt*, int, int); +int sqlite3_bind_int64(sqlite3_stmt*, int, sqlite3_int64); +int sqlite3_bind_double(sqlite3_stmt*, int, double); +int sqlite3_bind_text(sqlite3_stmt*, int, const char*, int, void(*)(void*)); +sqlite3_int64 sqlite3_column_int64(sqlite3_stmt*, int iCol); + + +SP_IMP SP_INLINE s32 ubench_should_filter(const c8 *filter, const c8 *benchmark); +SP_IMP SP_INLINE s32 ubench_int64_cmp(const void* a, const void* b); +SP_IMP SP_INLINE f32 sp_sys_sqrtf(f32 x); +SP_IMP void ubench_fmt_tty_green(sp_io_writer_t *io, sp_fmt_arg_t *arg, sp_fmt_arg_t *params); +SP_IMP void ubench_fmt_tty_red(sp_io_writer_t *io, sp_fmt_arg_t *arg, sp_fmt_arg_t *params); +SP_IMP void ubench_fmt_tty_reset(sp_io_writer_t *io, sp_fmt_arg_t *arg, sp_fmt_arg_t *params); + +#define BENCH_STORE_SCHEMA \ + "CREATE TABLE IF NOT EXISTS machines (" \ + " id INTEGER PRIMARY KEY," \ + " fingerprint TEXT NOT NULL UNIQUE," \ + " hostname TEXT," \ + " os_name TEXT, os_version TEXT, arch TEXT," \ + " cpu_model TEXT, cpu_cores INTEGER, cpu_threads INTEGER," \ + " memory_bytes INTEGER);" \ + "CREATE TABLE IF NOT EXISTS runs (" \ + " id INTEGER PRIMARY KEY," \ + " machine_id INTEGER NOT NULL REFERENCES machines(id)," \ + " started_at TEXT NOT NULL," \ + " finished_at TEXT," \ + " executable_path TEXT," \ + " executable_size_bytes INTEGER," \ + " executable_mtime TEXT," \ + " confidence_threshold REAL," \ + " filter TEXT," \ + " has_perf_counters INTEGER," \ + " label TEXT," \ + " framework TEXT," \ + " metadata TEXT);" \ + "CREATE TABLE IF NOT EXISTS benchmarks (" \ + " id INTEGER PRIMARY KEY," \ + " name TEXT NOT NULL UNIQUE);" \ + "CREATE TABLE IF NOT EXISTS results (" \ + " id INTEGER PRIMARY KEY," \ + " run_id INTEGER NOT NULL REFERENCES runs(id)," \ + " benchmark_id INTEGER NOT NULL REFERENCES benchmarks(id)," \ + " iterations INTEGER NOT NULL," \ + " mean_ns REAL NOT NULL," \ + " median_ns REAL NOT NULL," \ + " min_ns REAL NOT NULL," \ + " max_ns REAL NOT NULL," \ + " stddev_ns REAL," \ + " stddev_pct REAL," \ + " ci_low_ns REAL," \ + " ci_high_ns REAL," \ + " ci_level_pct REAL," \ + " confidence_pct REAL," \ + " bytes_processed INTEGER," \ + " items_processed INTEGER," \ + " cycles_per_iter INTEGER," \ + " instructions_per_iter INTEGER," \ + " UNIQUE(run_id, benchmark_id));" \ + "CREATE INDEX IF NOT EXISTS idx_results_bench_run" \ + " ON results(benchmark_id, run_id);" + +#define BENCH_UNSET_I64 ((s64)-1) +#define BENCH_UNSET_F64 (-1.0) + +typedef struct bench_store bench_store; + +typedef struct { + c8 hostname[256]; + c8 os_name[64]; + c8 os_version[128]; + c8 arch[64]; + c8 cpu_model[256]; + s32 cpu_cores; + s32 cpu_threads; + s64 memory_bytes; +} bench_machine_info; + +typedef struct { + const c8 *executable_path; + s64 executable_size_bytes; + const c8 *executable_mtime; /* ISO 8601 UTC. */ + const c8 *filter; + const c8 *label; + const c8 *framework; + const c8 *metadata_json; + f64 confidence_threshold; /* < 0 => NULL. */ + s32 has_perf_counters; /* < 0 => NULL. */ +} bench_run_info; + +typedef struct { + s64 iterations; + f64 mean_ns; + f64 median_ns; + f64 min_ns; + f64 max_ns; + f64 stddev_ns; + f64 stddev_pct; + f64 ci_low_ns; + f64 ci_high_ns; + f64 ci_level_pct; + f64 confidence_pct; + s64 bytes_processed; + s64 items_processed; + s64 cycles_per_iter; + s64 instructions_per_iter; +} bench_result; + +SP_API bench_store* bench_store_open(const c8 *path); +SP_API void bench_store_close(bench_store* s); +SP_API s32 bench_collect_machine_info(bench_machine_info* out); +SP_API s64 bench_store_begin_run(bench_store* s, const bench_machine_info* mi, const bench_run_info* ri); +SP_API s32 bench_store_record(bench_store* s, s64 run_id, const c8 *bench_name, const bench_result* r); +SP_API s32 bench_store_end_run(bench_store* s, s64 run_id); +SP_API s64 bench_simple_begin_run( + bench_store* s, + const c8* framework, const c8* label, + const c8* executable_path, s64 executable_size_bytes, const c8* executable_mtime, + f64 confidence_threshold, + s32 has_perf_counters +); +SP_API s32 bench_simple_record( + bench_store* s, s64 run_id, + const c8* name, + s64 iterations, + f64 mean_ns, f64 median_ns, + f64 min_ns, f64 max_ns, + f64 stddev_ns, f64 stddev_pct, + f64 ci_low_ns, f64 ci_high_ns, f64 ci_level_pct, + f64 confidence_pct, + s64 bytes_processed, s64 items_processed, + s64 cycles_per_iter, s64 instructions_per_iter +); + +SP_IMP s32 bench__get_or_insert_machine(sqlite3* db, const bench_machine_info* m, sqlite3_int64* out_id); +SP_IMP void bench__read_first_field(const c8* path, const c8* prefix, c8* dst, u32 dst_size); +SP_IMP void bench__make_fingerprint(const bench_machine_info* m, c8* dst, u32 dst_size); +SP_IMP s32 bench__get_or_insert_benchmark(sqlite3* db, const c8* name, sqlite3_int64* out_id); +SP_IMP void bench__bind_text_or_null(sqlite3_stmt *stmt, int idx, const c8 *s); +SP_IMP void bench__bind_i64_or_null(sqlite3_stmt *stmt, int idx, s64 v); +SP_IMP void bench__bind_f64_or_null(sqlite3_stmt *stmt, int idx, f64 v); + +/////////////////////// +// UBENCH_DO_NOTHING // +/////////////////////// +#if defined(SP_MSVC) + UBENCH_C_FUNC void _ReadWriteBarrier(void); + + void ubench_do_nothing(void *ptr) { + (void)ptr; + _ReadWriteBarrier(); + } +#elif defined(SP_CLANG) + void ubench_do_nothing(void *ptr) { + _Pragma("clang diagnostic push") + _Pragma("clang diagnostic ignored \"-Wlanguage-extension-token\""); + __asm__ volatile("" : : "r"(ptr), "m"(ptr) : "memory"); + _Pragma("clang diagnostic pop"); + } +#else + void ubench_do_nothing(void *ptr) { + __asm__ volatile("" : : "r"(ptr), "m"(ptr) : "memory"); + } +#endif + + +//////////////////////// +// UBENCH_INITIALIZER // +//////////////////////// +#if defined(SP_CPP) + #if defined(SP_CLANG) + #define UBENCH_INITIALIZER_BEGIN_DISABLE_WARNINGS \ + _Pragma("clang diagnostic push") \ + _Pragma("clang diagnostic ignored \"-Wglobal-constructors\"") + + #define UBENCH_INITIALIZER_END_DISABLE_WARNINGS _Pragma("clang diagnostic pop") + #else + #define UBENCH_INITIALIZER_BEGIN_DISABLE_WARNINGS + #define UBENCH_INITIALIZER_END_DISABLE_WARNINGS + #endif + + #define UBENCH_INITIALIZER(f) \ + struct f##_cpp_struct { \ + f##_cpp_struct(); \ + }; \ + UBENCH_INITIALIZER_BEGIN_DISABLE_WARNINGS static f##_cpp_struct \ + f##_cpp_global UBENCH_INITIALIZER_END_DISABLE_WARNINGS; \ + f##_cpp_struct::f##_cpp_struct() + +#elif defined(SP_MSVC) + #define UBENCH_SYMBOL_PREFIX + + #if defined(SP_CLANG) + #define UBENCH_INITIALIZER_BEGIN_DISABLE_WARNINGS \ + _Pragma("clang diagnostic push") \ + _Pragma("clang diagnostic ignored \"-Wmissing-variable-declarations\"") + + #define UBENCH_INITIALIZER_END_DISABLE_WARNINGS _Pragma("clang diagnostic pop") + #else + #define UBENCH_INITIALIZER_BEGIN_DISABLE_WARNINGS + #define UBENCH_INITIALIZER_END_DISABLE_WARNINGS + #endif + + #pragma section(".CRT$XCU", read) + #define UBENCH_INITIALIZER(f) \ + static void __cdecl f(void); \ + UBENCH_INITIALIZER_BEGIN_DISABLE_WARNINGS __pragma( \ + comment(linker, "/include:" UBENCH_SYMBOL_PREFIX #f "_")) UBENCH_C_FUNC \ + __declspec(allocate(".CRT$XCU")) void(__cdecl * f##_)(void) = f; \ + UBENCH_INITIALIZER_END_DISABLE_WARNINGS static void __cdecl f(void) +#else + #define UBENCH_INITIALIZER(f) \ + static void f(void) SP_ATTRIBUTE(constructor); \ + static void f(void) +#endif + +////////////////////////////// +// UBENCH_SURPRESS_WARNINGS // +////////////////////////////// +#if defined(SP_CLANG) +#if __has_warning("-Wunsafe-buffer-usage") +#define UBENCH_SURPRESS_WARNINGS_BEGIN \ + _Pragma("clang diagnostic push") \ + _Pragma("clang diagnostic ignored \"-Wunsafe-buffer-usage\"") +#define UBENCH_SURPRESS_WARNINGS_END _Pragma("clang diagnostic pop") +#else +#define UBENCH_SURPRESS_WARNINGS_BEGIN +#define UBENCH_SURPRESS_WARNINGS_END +#endif +#elif defined(SP_GNUC) && __GNUC__ >= 8 && defined(SP_CPP) +#define UBENCH_SURPRESS_WARNINGS_BEGIN \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wclass-memaccess\"") +#define UBENCH_SURPRESS_WARNINGS_END _Pragma("GCC diagnostic pop") +#else +#define UBENCH_SURPRESS_WARNINGS_BEGIN +#define UBENCH_SURPRESS_WARNINGS_END +#endif + +#define UBENCH_DO_BENCHMARK() \ + while (ubench_do_benchmark(ubench_run_state) > 0) + +#define UBENCH_DO_NOTHING(x) \ + ubench_do_nothing(x) + +#define UBENCH_EX(SET, NAME) \ + UBENCH_SURPRESS_WARNINGS_BEGIN \ + static void ubench_##SET##_##NAME(void *, \ + struct ubench_run_state_s *); \ + UBENCH_INITIALIZER(ubench_register_##SET##_##NAME) { \ + ubench_register_benchmark( \ + sp_str_lit(#SET "." #NAME), \ + &ubench_##SET##_##NAME, \ + SP_NULLPTR, SP_NULLPTR \ + ); \ + } \ + UBENCH_SURPRESS_WARNINGS_END \ + void ubench_##SET##_##NAME(void *ubench_fixture_unused UBENCH_UNUSED, \ + struct ubench_run_state_s *ubench_run_state) + +/* The user body receives `ubench_run_state` as a parameter so that + UBENCH_LOOP, UBENCH_PAUSE, UBENCH_RESUME, UBENCH_SET_BYTES_PROCESSED, etc. + can resolve the symbol from inside a UBENCH(...) body. The parameter has + a fixed name and is unused by callers that don't need it, so this is a + silent extension to the macro contract. */ +#define UBENCH(SET, NAME) \ + static void ubench_run_##SET##_##NAME(struct ubench_run_state_s *); \ + UBENCH_EX(SET, NAME) { \ + UBENCH_DO_BENCHMARK() { ubench_run_##SET##_##NAME(ubench_run_state); } \ + } \ + void ubench_run_##SET##_##NAME( \ + struct ubench_run_state_s *ubench_run_state UBENCH_UNUSED) + +#define UBENCH_F_SETUP(FIXTURE) \ + static void ubench_f_setup_impl_##FIXTURE(struct FIXTURE *ubench_fixture); \ + static void ubench_f_setup_##FIXTURE(void *ubench_fixture_void) { \ + ubench_f_setup_impl_##FIXTURE((struct FIXTURE *)ubench_fixture_void); \ + } \ + static void ubench_f_setup_impl_##FIXTURE(struct FIXTURE *ubench_fixture) + +#define UBENCH_F_TEARDOWN(FIXTURE) \ + static void ubench_f_teardown_impl_##FIXTURE(struct FIXTURE *ubench_fixture);\ + static void ubench_f_teardown_##FIXTURE(void *ubench_fixture_void) { \ + ubench_f_teardown_impl_##FIXTURE((struct FIXTURE *)ubench_fixture_void); \ + } \ + static void ubench_f_teardown_impl_##FIXTURE(struct FIXTURE *ubench_fixture) + +#define UBENCH_EX_F(FIXTURE, NAME) \ + UBENCH_SURPRESS_WARNINGS_BEGIN \ + static void ubench_f_setup_##FIXTURE(void *); \ + static void ubench_f_teardown_##FIXTURE(void *); \ + static void ubench_run_ex_##FIXTURE##_##NAME(struct FIXTURE *, \ + struct ubench_run_state_s *); \ + static void ubench_f_##FIXTURE##_##NAME( \ + void *ubench_fixture_void, \ + struct ubench_run_state_s *ubench_run_state) { \ + ubench_run_ex_##FIXTURE##_##NAME((struct FIXTURE *)ubench_fixture_void, \ + ubench_run_state); \ + } \ + UBENCH_INITIALIZER(ubench_register_##FIXTURE##_##NAME) { \ + static const struct ubench_fixture_ops_s ubench_ops_##FIXTURE##_##NAME = { \ + .setup = &ubench_f_setup_##FIXTURE, \ + .teardown = &ubench_f_teardown_##FIXTURE, \ + .size = sizeof(struct FIXTURE)}; \ + ubench_register_benchmark(sp_str_lit(#FIXTURE "." #NAME), \ + &ubench_f_##FIXTURE##_##NAME, \ + &ubench_ops_##FIXTURE##_##NAME, \ + SP_NULLPTR); \ + } \ + UBENCH_SURPRESS_WARNINGS_END \ + void ubench_run_ex_##FIXTURE##_##NAME( \ + struct FIXTURE *ubench_fixture, \ + struct ubench_run_state_s *ubench_run_state) + +#define UBENCH_F(FIXTURE, NAME) \ + static void ubench_run_##FIXTURE##_##NAME(struct FIXTURE *, \ + struct ubench_run_state_s *); \ + UBENCH_EX_F(FIXTURE, NAME) { \ + UBENCH_DO_BENCHMARK() { \ + ubench_run_##FIXTURE##_##NAME(ubench_fixture, ubench_run_state); \ + } \ + } \ + void ubench_run_##FIXTURE##_##NAME( \ + struct FIXTURE *ubench_fixture, \ + struct ubench_run_state_s *ubench_run_state UBENCH_UNUSED) + +// Prevent 64-bit integer overflow when computing a timestamp by using a trick +// from Sokol: +// https://github.com/floooh/sokol/blob/189843bf4f86969ca4cc4b6d94e793a37c5128a7/sokol_time.h#L204 +SP_IMP SP_INLINE s64 ubench_mul_div(const s64 value, const s64 numer, const s64 denom) { + const s64 q = value / denom; + const s64 r = value % denom; + return q * numer + r * numer / denom; +} + +static SP_INLINE s64 ubench_ns(void) { +#if defined(SP_WIN32) + /* QPC frequency is constant for the lifetime of the process; query once. */ + static s64 qpc_freq = 0; + LARGE_INTEGER counter; + if (qpc_freq == 0) { + LARGE_INTEGER f; + QueryPerformanceFrequency(&f); + qpc_freq = f.QuadPart; + } + QueryPerformanceCounter(&counter); + return ubench_mul_div(counter.QuadPart, 1000000000, qpc_freq); +#elif defined(SP_LINUX) + /* Use a monotonic clock so NTP slew/step cannot corrupt deltas. Prefer + CLOCK_MONOTONIC_RAW where available (Linux >= 2.6.28) since it is also + immune to adjtimex frequency steering. */ + struct timespec ts; +#if defined(CLOCK_MONOTONIC_RAW) + const clockid_t cid = CLOCK_MONOTONIC_RAW; +#else + const clockid_t cid = CLOCK_MONOTONIC; +#endif + clock_gettime(cid, &ts); + return sp_cast(s64, ts.tv_sec) * 1000 * 1000 * 1000 + + ts.tv_nsec; +#elif defined(SP_MACOS) + return sp_cast(s64, clock_gettime_nsec_np(CLOCK_UPTIME_RAW)); +#else +#error Unsupported platform! +#endif +} + +void ubench_run_lifecycle(ubench_benchmark_state_t* b, ubench_run_state_t* run) { + if (b->ops != SP_NULLPTR) { + sp_mem_arena_marker_t scratch = sp_mem_begin_scratch_for(ubench_state.mem); + void *fixture = sp_alloc(scratch.mem, b->ops->size); + b->ops->setup(fixture); + b->body(fixture, run); + b->ops->teardown(fixture); + sp_mem_end_scratch(scratch); + } else { + b->body(SP_NULLPTR, run); + } +} + +void ubench_invoke(ubench_benchmark_state_t* b, ubench_run_state_t* run) { + if (b->dispatch != SP_NULLPTR) { + b->dispatch(b, run); + } else { + ubench_run_lifecycle(b, run); + } +} + +// @spader C string in the public API +void ubench_register_benchmark(sp_str_t name, ubench_body_t body, const ubench_fixture_ops_t* ops, ubench_dispatch_t dispatch) { + const ubench_size_t i = ubench_state.benchmarks_length++; + ubench_state.benchmarks = sp_ptr_cast( + ubench_benchmark_state_t*, + sp_realloc(ubench_state.mem, ubench_state.benchmarks, sizeof(ubench_benchmark_state_t) * ubench_state.benchmarks_length) + ); + + ubench_state.benchmarks[i].name = name; + ubench_state.benchmarks[i].body = body; + ubench_state.benchmarks[i].ops = ops; + ubench_state.benchmarks[i].dispatch = dispatch; +} + +void ubench_register_benchmark_s(ubench_benchmark_config_t config) { + ubench_register_benchmark(sp_cstr_as_str(config.name), config.body, config.ops, config.dispatch); +} + + +s32 ubench_do_benchmark(ubench_run_state_t* const run) { + const s64 curr_sample = run->sample++; + if (curr_sample > 0) { + run->pause_ns[curr_sample - 1] = run->paused_ns; + } + run->paused_ns = 0; + run->ns[curr_sample] = ubench_ns(); + return curr_sample < run->size ? 1 : 0; +} + +void ubench_pause(ubench_run_state_t* const run) { + run->pause_start = ubench_ns(); +} + +void ubench_resume(ubench_run_state_t* const run) { + run->paused_ns += ubench_ns() - run->pause_start; +} + +#define UBENCH_PAUSE() \ + ubench_pause(ubench_run_state) + +#define UBENCH_RESUME() \ + ubench_resume(ubench_run_state) + +#define UBENCH_SET_BYTES_PROCESSED(N) \ + (ubench_run_state->bytes_processed = (s64)(N)) + +#define UBENCH_SET_ITEMS_PROCESSED(N) \ + (ubench_run_state->items_processed = (s64)(N)) + +#define UBENCH_LOOP \ + for ( \ + s64 ubench_loop_i_ = ((ubench_run_state->batch_consumed = 1), \ + ubench_run_state->batch); \ + ubench_loop_i_ > 0; ubench_loop_i_-- \ + ) + +s32 ubench_should_filter(const c8 *filter, const c8 *benchmark) { + if (filter) { + const c8 *filter_cur = filter; + const c8 *benchmark_cur = benchmark; + const c8 *filter_wildcard = SP_NULLPTR; + + while (('\0' != *filter_cur) && ('\0' != *benchmark_cur)) { + if ('*' == *filter_cur) { + /* store the position of the wildcard */ + filter_wildcard = filter_cur; + + /* skip the wildcard character */ + filter_cur++; + + while (('\0' != *filter_cur) && ('\0' != *benchmark_cur)) { + if ('*' == *filter_cur) { + /* + we found another wildcard (filter is something like *foo*) so we + exit the current loop, and return to the parent loop to handle + the wildcard case + */ + break; + } else if (*filter_cur != *benchmark_cur) { + /* otherwise our filter didn't match, so reset it */ + filter_cur = filter_wildcard; + } + + /* move benchmark along */ + benchmark_cur++; + + /* move filter along */ + filter_cur++; + } + + if (('\0' == *filter_cur) && ('\0' == *benchmark_cur)) { + return 0; + } + + /* if the benchmarks have been exhausted, we don't have a match! */ + if ('\0' == *benchmark_cur) { + return 1; + } + } else { + if (*benchmark_cur != *filter_cur) { + /* benchmark doesn't match filter */ + return 1; + } else { + /* move our filter and benchmark forward */ + benchmark_cur++; + filter_cur++; + } + } + } + + if (('\0' != *filter_cur) || + (('\0' != *benchmark_cur) && + ((filter == filter_cur) || ('*' != filter_cur[-1])))) { + /* we have a mismatch! */ + return 1; + } + } + + return 0; +} + +s32 ubench_int64_cmp(const void *a, const void *b) { + const s64 aa = *sp_ptr_cast(const s64 *, a); + const s64 bb = *sp_ptr_cast(const s64 *, b); + return aa < bb ? -1 : (aa > bb ? 1 : 0); +} + +f32 sp_sys_sqrtf(f32 x) { + if (x < 0) return 0; + if (x == 0) return 0; + f32 guess = x / 2.0f; + for (s32 i = 0; i < 10; i++) { + guess = (guess + x / guess) / 2.0f; + } + return guess; +} + +void ubench_fmt_tty_green(sp_io_writer_t *io, sp_fmt_arg_t *arg, sp_fmt_arg_t *params) { + sp_unused(arg); sp_unused(params); + if (sp_os_is_tty(sp_sys_stdout)) { + sp_io_write_cstr(io, SP_ANSI_FG_GREEN, SP_NULLPTR); + } +} + +void ubench_fmt_tty_red(sp_io_writer_t *io, sp_fmt_arg_t *arg, sp_fmt_arg_t *params) { + sp_unused(arg); sp_unused(params); + if (sp_os_is_tty(sp_sys_stdout)) { + sp_io_write_cstr(io, SP_ANSI_FG_RED, SP_NULLPTR); + } +} + +void ubench_fmt_tty_reset(sp_io_writer_t *io, sp_fmt_arg_t *arg, sp_fmt_arg_t *params) { + sp_unused(arg); sp_unused(params); + if (sp_os_is_tty(sp_sys_stdout)) { + sp_io_write_cstr(io, SP_ANSI_RESET, SP_NULLPTR); + } +} + +#if defined(UBENCH_ENABLE_PERF_COUNTERS) && defined(SP_LINUX) +#include + +struct ubench_perf_s { + s32 group_fd; + s32 instr_fd; + /* Per-pair overhead introduced by the ioctl(RESET)+ioctl(ENABLE) ... + ioctl(DISABLE)+read() sequence itself, measured at startup with an empty + body. Subtracted from every measurement so reported counts approximate + just the user code. */ + u64 overhead_cycles; + u64 overhead_instructions; +}; + +static s32 ubench_perf_open_event(s32 leader, u32 config) { + struct perf_event_attr pea; + sp_mem_zero(&pea, sizeof(pea)); + pea.type = PERF_TYPE_HARDWARE; + pea.size = sizeof(pea); + pea.config = config; + pea.disabled = (leader == -1) ? 1 : 0; + pea.exclude_kernel = 1; + pea.exclude_hv = 1; + pea.read_format = PERF_FORMAT_GROUP; + return sp_cast(s32, + sp_syscall(SP_SYSCALL_NUM_PERF_EVENT_OPEN, &pea, 0, -1, leader, 0)); +} + +static void ubench_perf_start(struct ubench_perf_s *p) { + if (p->group_fd < 0) { + return; + } + sp_syscall(SP_SYSCALL_NUM_IOCTL, p->group_fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP); + sp_syscall(SP_SYSCALL_NUM_IOCTL, p->group_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP); +} + +static void ubench_perf_stop_raw(struct ubench_perf_s *p, + u64 *cycles, + u64 *instructions) { + struct { + u64 nr; + u64 values[2]; + } buf; + *cycles = 0; + *instructions = 0; + if (p->group_fd < 0) { + return; + } + sp_syscall(SP_SYSCALL_NUM_IOCTL, p->group_fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP); + if (sp_syscall(SP_SYSCALL_NUM_READ, p->group_fd, &buf, sizeof(buf)) == + sp_cast(s64, sizeof(buf)) && + buf.nr == 2) { + *cycles = buf.values[0]; + *instructions = buf.values[1]; + } +} + +static void ubench_perf_stop(struct ubench_perf_s *p, u64 *cycles, + u64 *instructions) { + ubench_perf_stop_raw(p, cycles, instructions); + /* Subtract per-pair overhead measured at init. Saturate to zero rather than + wrap around if a single ultra-cheap measurement happens to undercount. */ + *cycles = (*cycles > p->overhead_cycles) ? *cycles - p->overhead_cycles : 0; + *instructions = (*instructions > p->overhead_instructions) + ? *instructions - p->overhead_instructions + : 0; +} + +static void ubench_perf_init(struct ubench_perf_s *p) { + p->instr_fd = -1; + p->overhead_cycles = 0; + p->overhead_instructions = 0; + p->group_fd = ubench_perf_open_event(-1, PERF_COUNT_HW_CPU_CYCLES); + if (p->group_fd < 0) { + return; + } + p->instr_fd = + ubench_perf_open_event(p->group_fd, PERF_COUNT_HW_INSTRUCTIONS); + if (p->instr_fd < 0) { + sp_syscall(SP_SYSCALL_NUM_CLOSE, p->group_fd); + p->group_fd = -1; + return; + } + + /* Calibrate per-pair start/stop overhead: take the minimum of N empty + start/stop pairs, mirroring nanobench's mCalibratedOverhead. */ + { + u64 best_cycles = (u64)-1; + u64 best_instructions = (u64)-1; + s32 trial; + for (trial = 0; trial < 32; trial++) { + u64 c = 0, i = 0; + ubench_perf_start(p); + ubench_perf_stop_raw(p, &c, &i); + if (c < best_cycles) { + best_cycles = c; + } + if (i < best_instructions) { + best_instructions = i; + } + } + if (best_cycles == (u64)-1) { + best_cycles = 0; + } + if (best_instructions == (u64)-1) { + best_instructions = 0; + } + p->overhead_cycles = best_cycles; + p->overhead_instructions = best_instructions; + } +} + +static void ubench_perf_close(struct ubench_perf_s *p) { + if (p->group_fd >= 0) { + sp_syscall(SP_SYSCALL_NUM_CLOSE, p->instr_fd); + sp_syscall(SP_SYSCALL_NUM_CLOSE, p->group_fd); + p->group_fd = -1; + p->instr_fd = -1; + } +} +#endif + +SP_END_EXTERN_C() + +#endif // SP_BENCH_H + + + +#if defined SP_IMPLEMENTATION && !defined(SP_BENCH_IMPLEMENTATION) + #define SP_BENCH_IMPLEMENTATION +#endif + +#if defined(SP_BENCH_IMPLEMENTATION) + +#include +#include + +#if defined(SP_WIN32) +#if !defined(WIN32_LEAN_AND_MEAN) +#define WIN32_LEAN_AND_MEAN +#endif +#include +#include +#else +#include +#include +#endif + +#if defined(__APPLE__) +#include +#include +#endif + +SP_BEGIN_EXTERN_C() + +#if defined(UBENCH_ENABLE_SQLITE) +struct bench_store { + sqlite3 * db; + sqlite3_stmt* result_stmt; + sp_mem_arena_t* arena; + sp_mem_t mem; +}; + +void bench__read_first_field(const c8* path, const c8* prefix, c8* dst, u32 dst_size) { + FILE *f = fopen(path, "r"); + c8 line[1024]; + u32 plen = (u32)strlen(prefix); + dst[0] = '\0'; + if (!f) return; + while (fgets(line, sizeof(line), f)) { + if (strncmp(line, prefix, plen) == 0) { + const c8 *p = line + plen; + u32 i = 0; + while (*p == ' ' || *p == '\t' || *p == ':') p++; + while (*p && *p != '\n' && i + 1 < dst_size) dst[i++] = *p++; + dst[i] = '\0'; + break; + } + } + fclose(f); +} + +void bench__make_fingerprint(const bench_machine_info* m, c8* dst, u32 dst_size) { + s64 mem_gb = (m->memory_bytes + (1LL << 30) - 1) / (1LL << 30); + snprintf(dst, dst_size, "%s|%s|%s|%d|%lld", + m->os_name[0] ? m->os_name : "Unknown", + m->arch[0] ? m->arch : "unknown", + m->cpu_model[0] ? m->cpu_model : "unknown", + (int)m->cpu_threads, + (long long)mem_gb); +} + + +#if defined(SP_LINUX) + +static sp_str_t ubench_cpu_read_file_a(sp_mem_t mem, const c8 *path) { + sp_sys_fd_t fd = (sp_sys_fd_t)sp_syscall( + SP_SYSCALL_NUM_OPENAT, SP_AT_FDCWD, path, SP_O_RDONLY, 0); + if (fd < 0) return sp_zero_s(sp_str_t); + + u64 cap = 4096; + c8 *buf = sp_alloc_n(mem, c8, cap); + u64 len = 0; + for (;;) { + if (len == cap) { + u64 new_cap = cap * 2; + c8 *grown = sp_alloc_n(mem, c8, new_cap); + sp_mem_copy(grown, buf, len); + buf = grown; + cap = new_cap; + } + s64 n = sp_syscall(SP_SYSCALL_NUM_READ, fd, buf + len, cap - len); + if (n <= 0) break; + len += (u64)n; + } + sp_syscall(SP_SYSCALL_NUM_CLOSE, fd); + return (sp_str_t) { .data = buf, .len = (u32)len }; +} + +sp_str_t sp_cpu_get_model_a(sp_mem_t mem) { + sp_mem_arena_marker_t s = sp_mem_begin_scratch_for(mem); + sp_str_t cpuinfo = ubench_cpu_read_file_a(s.mem, "/proc/cpuinfo"); + sp_str_t result = sp_zero_s(sp_str_t); + + sp_da(sp_str_t) lines = sp_str_split_c8(s.mem, cpuinfo, '\n'); + sp_da_for(lines, i) { + sp_str_t line = lines[i]; + if (!sp_str_starts_with(line, sp_str_lit("model name")) && + !sp_str_starts_with(line, sp_str_lit("Hardware"))) { + continue; + } + s32 colon = sp_str_find_c8(line, ':'); + if (colon < 0) continue; + sp_str_t value = sp_str_sub(line, colon + 1, line.len - colon - 1); + result = sp_str_copy(mem, sp_str_trim(value)); + break; + } + + sp_mem_end_scratch(s); + return result; +} + +u32 sp_cpu_get_thread_count(void) { + /* glibc cpu_set_t is 1024 bits; the kernel pads to 8-byte multiples. */ + u8 mask[128] = sp_zero; + s64 rc = sp_syscall(SP_SYSCALL_NUM_SCHED_GETAFFINITY, 0, sizeof(mask), mask); + if (rc <= 0) return 1; + u32 count = 0; + for (u64 i = 0; i < (u64)rc; i++) { + u8 b = mask[i]; + while (b) { count += b & 1; b >>= 1; } + } + return count ? count : 1; +} + +#elif defined(SP_MACOS) + +sp_str_t sp_cpu_get_model_a(sp_mem_t mem) { + c8 buf[256] = sp_zero; + size_t len = sizeof(buf); + if (sysctlbyname("machdep.cpu.brand_string", buf, &len, NULL, 0) != 0) { + return sp_zero_s(sp_str_t); + } + /* sysctlbyname returns len including the trailing NUL on success. */ + if (len > 0 && buf[len - 1] == '\0') len--; + return sp_str_copy(mem, (sp_str_t){ .data = buf, .len = (u32)len }); +} + +u32 sp_cpu_get_thread_count(void) { + int v = 0; + size_t len = sizeof(v); + if (sysctlbyname("hw.logicalcpu", &v, &len, NULL, 0) == 0 && v > 0) { + return (u32)v; + } + return 1; +} + +#elif defined(SP_WIN32) + +sp_str_t sp_cpu_get_model_a(sp_mem_t mem) { + HKEY key; + c8 buf[256] = sp_zero; + DWORD len = (DWORD)sizeof(buf); + LONG rc; + if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, + "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", + 0, KEY_READ, &key) != ERROR_SUCCESS) { + return sp_zero_s(sp_str_t); + } + rc = RegQueryValueExA(key, "ProcessorNameString", NULL, NULL, + (LPBYTE)buf, &len); + RegCloseKey(key); + if (rc != ERROR_SUCCESS) return sp_zero_s(sp_str_t); + /* RegQueryValueExA returns len including the trailing NUL for REG_SZ. */ + if (len > 0 && buf[len - 1] == '\0') len--; + return sp_str_copy(mem, (sp_str_t){ .data = buf, .len = (u32)len }); +} + +u32 sp_cpu_get_thread_count(void) { + SYSTEM_INFO si; + GetNativeSystemInfo(&si); + return si.dwNumberOfProcessors > 0 ? (u32)si.dwNumberOfProcessors : 1; +} + +#else + #error "ubench: sp_cpu_* impl missing for this platform" +#endif + +SP_API s32 bench_collect_machine_info(bench_machine_info *m) { + memset(m, 0, sizeof(*m)); +#if defined(SP_WIN32) + { + DWORD n = (DWORD)sizeof(m->hostname); + SYSTEM_INFO si; + MEMORYSTATUSEX mem; + if (!GetComputerNameA(m->hostname, &n)) m->hostname[0] = '\0'; + snprintf(m->os_name, sizeof(m->os_name), "%s", "Windows"); + /* os_version is intentionally left blank: GetVersionExA is deprecated and + lies about the running OS, and RtlGetVersion needs a runtime ntdll + resolve. The fingerprint is fine without it. */ + GetNativeSystemInfo(&si); + switch (si.wProcessorArchitecture) { + case PROCESSOR_ARCHITECTURE_AMD64: snprintf(m->arch, sizeof(m->arch), "x86_64"); break; + case PROCESSOR_ARCHITECTURE_ARM64: snprintf(m->arch, sizeof(m->arch), "aarch64"); break; + case PROCESSOR_ARCHITECTURE_INTEL: snprintf(m->arch, sizeof(m->arch), "x86"); break; + default: snprintf(m->arch, sizeof(m->arch), "unknown"); + } + mem.dwLength = sizeof(mem); + if (GlobalMemoryStatusEx(&mem)) m->memory_bytes = (s64)mem.ullTotalPhys; + } +#else + { + struct utsname u; + if (uname(&u) == 0) { + snprintf(m->os_name, sizeof(m->os_name), "%s", u.sysname); + snprintf(m->os_version, sizeof(m->os_version), "%s", u.release); + snprintf(m->arch, sizeof(m->arch), "%s", u.machine); + } + if (gethostname(m->hostname, sizeof(m->hostname)) != 0) m->hostname[0] = '\0'; + m->hostname[sizeof(m->hostname) - 1] = '\0'; + } +#endif +#if defined(__linux__) + { + c8 buf[64]; + bench__read_first_field("/proc/meminfo", "MemTotal", buf, sizeof(buf)); + m->memory_bytes = (s64)atoll(buf) * 1024; + } +#elif defined(__APPLE__) + { + s64 bytes = 0; + size_t len = sizeof(bytes); + if (sysctlbyname("hw.memsize", &bytes, &len, NULL, 0) == 0) m->memory_bytes = bytes; + } +#endif + + { + sp_mem_arena_marker_t s = sp_mem_begin_scratch(); + sp_str_t model = sp_cpu_get_model_a(s.mem); + sp_cstr_copy_to_n(model.data, model.len, + m->cpu_model, sizeof(m->cpu_model)); + sp_mem_end_scratch(s); + } + m->cpu_threads = (s32)sp_cpu_get_thread_count(); + m->cpu_cores = m->cpu_threads; + return 0; +} + +SP_IMP s32 bench__get_or_insert_machine(sqlite3 *db, const bench_machine_info *m, sqlite3_int64 *out_id) { + c8 fingerprint[512]; + sqlite3_stmt *stmt = NULL; + s32 rc; + + bench__make_fingerprint(m, fingerprint, sizeof(fingerprint)); + + rc = sqlite3_prepare_v2(db, "SELECT id FROM machines WHERE fingerprint=?", + -1, &stmt, NULL); + if (rc != SQLITE_OK) goto fail; + sqlite3_bind_text(stmt, 1, fingerprint, -1, SQLITE_TRANSIENT); + if (sqlite3_step(stmt) == SQLITE_ROW) { + *out_id = sqlite3_column_int64(stmt, 0); + sqlite3_finalize(stmt); + return 0; + } + sqlite3_finalize(stmt); + + rc = sqlite3_prepare_v2( + db, + "INSERT INTO machines (fingerprint, hostname, os_name, os_version, " + "arch, cpu_model, cpu_cores, cpu_threads, memory_bytes) " + "VALUES (?,?,?,?,?,?,?,?,?)", + -1, &stmt, NULL); + if (rc != SQLITE_OK) goto fail; + sqlite3_bind_text (stmt, 1, fingerprint, -1, SQLITE_TRANSIENT); + sqlite3_bind_text (stmt, 2, m->hostname, -1, SQLITE_TRANSIENT); + sqlite3_bind_text (stmt, 3, m->os_name, -1, SQLITE_TRANSIENT); + sqlite3_bind_text (stmt, 4, m->os_version,-1, SQLITE_TRANSIENT); + sqlite3_bind_text (stmt, 5, m->arch, -1, SQLITE_TRANSIENT); + sqlite3_bind_text (stmt, 6, m->cpu_model, -1, SQLITE_TRANSIENT); + sqlite3_bind_int (stmt, 7, m->cpu_cores); + sqlite3_bind_int (stmt, 8, m->cpu_threads); + sqlite3_bind_int64(stmt, 9, m->memory_bytes); + if (sqlite3_step(stmt) != SQLITE_DONE) goto fail; + *out_id = sqlite3_last_insert_rowid(db); + sqlite3_finalize(stmt); + return 0; +fail: + fprintf(stderr, "bench machine: %s\n", sqlite3_errmsg(db)); + if (stmt) sqlite3_finalize(stmt); + return -1; +} + +s32 bench__get_or_insert_benchmark(sqlite3* db, const c8* name, sqlite3_int64* out_id) { + sqlite3_stmt *stmt = NULL; + s32 rc = sqlite3_prepare_v2(db, "SELECT id FROM benchmarks WHERE name=?", + -1, &stmt, NULL); + if (rc != SQLITE_OK) goto fail; + sqlite3_bind_text(stmt, 1, name, -1, SQLITE_TRANSIENT); + if (sqlite3_step(stmt) == SQLITE_ROW) { + *out_id = sqlite3_column_int64(stmt, 0); + sqlite3_finalize(stmt); + return 0; + } + sqlite3_finalize(stmt); + + rc = sqlite3_prepare_v2(db, "INSERT INTO benchmarks (name) VALUES (?)", + -1, &stmt, NULL); + if (rc != SQLITE_OK) goto fail; + sqlite3_bind_text(stmt, 1, name, -1, SQLITE_TRANSIENT); + if (sqlite3_step(stmt) != SQLITE_DONE) goto fail; + *out_id = sqlite3_last_insert_rowid(db); + sqlite3_finalize(stmt); + return 0; +fail: + fprintf(stderr, "bench benchmark: %s\n", sqlite3_errmsg(db)); + if (stmt) sqlite3_finalize(stmt); + return -1; +} + +void bench__bind_text_or_null(sqlite3_stmt *stmt, int idx, const c8 *s) { + if (s) { + sqlite3_bind_text(stmt, idx, s, -1, SQLITE_TRANSIENT); + } else { + sqlite3_bind_null(stmt, idx); + } +} + +void bench__bind_i64_or_null(sqlite3_stmt *stmt, int idx, s64 v) { + if (v == BENCH_UNSET_I64) { + sqlite3_bind_null(stmt, idx); + } else { + sqlite3_bind_int64(stmt, idx, v); + } +} + +void bench__bind_f64_or_null(sqlite3_stmt *stmt, int idx, f64 v) { + if (v <= BENCH_UNSET_F64) { + sqlite3_bind_null(stmt, idx); + } else { + sqlite3_bind_double(stmt, idx, v); + } +} + +bench_store* bench_store_open(const c8 *path) { + sp_mem_t mem = sp_mem_os_new(); + + bench_store* s = sp_alloc_type(mem, bench_store); + s->arena = sp_mem_arena_new(mem); + s->mem = sp_mem_arena_as_allocator(s->arena); + + if (sqlite3_open(path, &s->db) != SQLITE_OK) { + fprintf(stderr, "bench open: %s\n", sqlite3_errmsg(s->db)); + sqlite3_close(s->db); + goto error; + } + sqlite3_exec(s->db, "PRAGMA journal_mode=WAL", 0, 0, 0); + sqlite3_exec(s->db, "PRAGMA foreign_keys=ON", 0, 0, 0); + if (sqlite3_exec(s->db, BENCH_STORE_SCHEMA, 0, 0, 0) != SQLITE_OK) { + fprintf(stderr, "bench schema: %s\n", sqlite3_errmsg(s->db)); + sqlite3_close(s->db); + goto error; + } + + return s; + +error: + if (s) sp_mem_allocator_free(mem, s); + return SP_NULLPTR; +} + +void bench_store_close(bench_store* s) { + if (!s) return; + if (s->result_stmt) sqlite3_finalize(s->result_stmt); + if (s->db) sqlite3_close(s->db); + + sp_mem_t mem = sp_mem_os_new(); + sp_mem_allocator_free(mem, s); +} + +s64 bench_store_begin_run(bench_store* s, const bench_machine_info* mi, const bench_run_info* ri) { + sqlite3_int64 machine_id = 0, run_id = 0; + sqlite3_stmt *stmt = NULL; + c8 started_at[32]; + + if (!s || !s->db || !mi || !ri) return -1; + if (bench__get_or_insert_machine(s->db, mi, &machine_id) != 0) return -1; + + //bench__iso_time_now(started_at, sizeof(started_at)); + sp_mem_fixed_t mem = sp_mem_fixed(started_at, sizeof(started_at)); + sp_tm_epoch_to_iso8601(sp_mem_fixed_as_allocator(&mem), sp_tm_now_epoch()); + + if (sqlite3_prepare_v2( + s->db, + "INSERT INTO runs (machine_id, started_at, executable_path, " + "executable_size_bytes, executable_mtime, confidence_threshold, " + "filter, has_perf_counters, label, framework, metadata) " + "VALUES (?,?,?,?,?,?,?,?,?,?,?)", + -1, &stmt, NULL) != SQLITE_OK) goto fail; + + sqlite3_bind_int64(stmt, 1, machine_id); + sqlite3_bind_text (stmt, 2, started_at, -1, SQLITE_TRANSIENT); + bench__bind_text_or_null(stmt, 3, ri->executable_path); + if (ri->executable_size_bytes > 0) + sqlite3_bind_int64(stmt, 4, ri->executable_size_bytes); + else + sqlite3_bind_null(stmt, 4); + bench__bind_text_or_null(stmt, 5, ri->executable_mtime); + if (ri->confidence_threshold >= 0) + sqlite3_bind_double(stmt, 6, ri->confidence_threshold); + else + sqlite3_bind_null(stmt, 6); + bench__bind_text_or_null(stmt, 7, ri->filter); + if (ri->has_perf_counters >= 0) sqlite3_bind_int (stmt, 8, ri->has_perf_counters); + else sqlite3_bind_null(stmt, 8); + bench__bind_text_or_null(stmt, 9, ri->label); + bench__bind_text_or_null(stmt, 10, ri->framework); + bench__bind_text_or_null(stmt, 11, ri->metadata_json); + + if (sqlite3_step(stmt) != SQLITE_DONE) goto fail; + run_id = sqlite3_last_insert_rowid(s->db); + sqlite3_finalize(stmt); + + /* Batch all subsequent INSERTs into one transaction; bench_store_end_run + commits. The prepared INSERT is reused for every record. */ + sqlite3_exec(s->db, "BEGIN TRANSACTION", 0, 0, 0); + if (sqlite3_prepare_v2( + s->db, + "INSERT INTO results (run_id, benchmark_id, iterations, mean_ns, " + "median_ns, min_ns, max_ns, stddev_ns, stddev_pct, ci_low_ns, " + "ci_high_ns, ci_level_pct, confidence_pct, bytes_processed, " + "items_processed, cycles_per_iter, instructions_per_iter) " + "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", + -1, &s->result_stmt, NULL) != SQLITE_OK) goto fail; + + return (s64)run_id; +fail: + fprintf(stderr, "bench begin_run: %s\n", sqlite3_errmsg(s->db)); + if (stmt) sqlite3_finalize(stmt); + return -1; +} + +s32 bench_store_record(bench_store* s, s64 run_id, const c8* bench_name, const bench_result* r) { + sqlite3_int64 bench_id = 0; + if (!s || !s->result_stmt || !bench_name || !r) return -1; + if (bench__get_or_insert_benchmark(s->db, bench_name, &bench_id) != 0) return -1; + + sqlite3_reset(s->result_stmt); + sqlite3_bind_int64 (s->result_stmt, 1, run_id); + sqlite3_bind_int64 (s->result_stmt, 2, bench_id); + sqlite3_bind_int64 (s->result_stmt, 3, r->iterations); + sqlite3_bind_double(s->result_stmt, 4, r->mean_ns); + sqlite3_bind_double(s->result_stmt, 5, r->median_ns); + sqlite3_bind_double(s->result_stmt, 6, r->min_ns); + sqlite3_bind_double(s->result_stmt, 7, r->max_ns); + bench__bind_f64_or_null(s->result_stmt, 8, r->stddev_ns); + bench__bind_f64_or_null(s->result_stmt, 9, r->stddev_pct); + bench__bind_f64_or_null(s->result_stmt, 10, r->ci_low_ns); + bench__bind_f64_or_null(s->result_stmt, 11, r->ci_high_ns); + bench__bind_f64_or_null(s->result_stmt, 12, r->ci_level_pct); + bench__bind_f64_or_null(s->result_stmt, 13, r->confidence_pct); + bench__bind_i64_or_null(s->result_stmt, 14, r->bytes_processed); + bench__bind_i64_or_null(s->result_stmt, 15, r->items_processed); + bench__bind_i64_or_null(s->result_stmt, 16, r->cycles_per_iter); + bench__bind_i64_or_null(s->result_stmt, 17, r->instructions_per_iter); + + if (sqlite3_step(s->result_stmt) != SQLITE_DONE) { + fprintf(stderr, "bench record: %s\n", sqlite3_errmsg(s->db)); + return -1; + } + return 0; +} + +s32 bench_store_end_run(bench_store* s, s64 run_id) { + c8 finished_at[32]; + sqlite3_stmt *stmt = NULL; + if (!s || !s->db) return -1; + + if (s->result_stmt) { + sqlite3_finalize(s->result_stmt); + s->result_stmt = NULL; + sqlite3_exec(s->db, "COMMIT TRANSACTION", 0, 0, 0); + } + + sp_mem_fixed_t mem = sp_mem_fixed(finished_at, sizeof(finished_at)); + sp_tm_epoch_to_iso8601(sp_mem_fixed_as_allocator(&mem), sp_tm_now_epoch()); + sqlite3_prepare_v2(s->db, "UPDATE runs SET finished_at=? WHERE id=?", + -1, &stmt, NULL); + sqlite3_bind_text (stmt, 1, finished_at, -1, SQLITE_TRANSIENT); + sqlite3_bind_int64(stmt, 2, run_id); + sqlite3_step (stmt); + sqlite3_finalize(stmt); + return 0; +} + +s64 bench_simple_begin_run( + bench_store* s, + const c8* framework, const c8* label, + const c8* executable_path, s64 executable_size_bytes, const c8* executable_mtime, + f64 confidence_threshold, + s32 has_perf_counters +) { + bench_machine_info mi; + bench_run_info ri; + bench_collect_machine_info(&mi); + memset(&ri, 0, sizeof(ri)); + ri.framework = framework; + ri.label = label; + ri.executable_path = executable_path; + ri.executable_size_bytes = executable_size_bytes; + ri.executable_mtime = executable_mtime; + ri.confidence_threshold = confidence_threshold; + ri.has_perf_counters = has_perf_counters; + return bench_store_begin_run(s, &mi, &ri); +} + +s32 bench_simple_record( + bench_store* s, s64 run_id, + const c8* name, + s64 iterations, + f64 mean_ns, f64 median_ns, + f64 min_ns, f64 max_ns, + f64 stddev_ns, f64 stddev_pct, + f64 ci_low_ns, f64 ci_high_ns, f64 ci_level_pct, + f64 confidence_pct, + s64 bytes_processed, s64 items_processed, + s64 cycles_per_iter, s64 instructions_per_iter +) { + bench_result r; + r.iterations = iterations; + r.mean_ns = mean_ns; + r.median_ns = median_ns; + r.min_ns = min_ns; + r.max_ns = max_ns; + r.stddev_ns = stddev_ns; + r.stddev_pct = stddev_pct; + r.ci_low_ns = ci_low_ns; + r.ci_high_ns = ci_high_ns; + r.ci_level_pct = ci_level_pct; + r.confidence_pct = confidence_pct; + r.bytes_processed = bytes_processed; + r.items_processed = items_processed; + r.cycles_per_iter = cycles_per_iter; + r.instructions_per_iter = instructions_per_iter; + return bench_store_record(s, run_id, name, &r); +} +#endif + +SP_END_EXTERN_C() + +static SP_INLINE s32 ubench_main(s32 argc, const c8 *const argv[]); +s32 ubench_main(s32 argc, const c8 *const argv[]) { + u64 failed = 0; + ubench_size_t index = 0; + ubench_size_t *failed_benchmarks = SP_NULLPTR; + ubench_size_t failed_benchmarks_length = 0; + const c8 *filter = SP_NULLPTR; + u64 ran_benchmarks = 0; + + sp_fmt_register_decorator("green", ubench_fmt_tty_green, ubench_fmt_tty_reset); + sp_fmt_register_decorator("red", ubench_fmt_tty_red, ubench_fmt_tty_reset); + +#if defined(UBENCH_ENABLE_PERF_COUNTERS) && defined(SP_LINUX) + struct ubench_perf_s perf; + ubench_perf_init(&perf); +#endif +#if defined(UBENCH_ENABLE_SQLITE) + const c8 *db_path = "./ubench.db"; + bench_store *store = SP_NULLPTR; + s64 run_id = 0; +#endif + + /* loop through all arguments looking for our options */ + for (index = 1; index < sp_cast(ubench_size_t, argc); index++) { + /* Informational switches */ + const sp_str_t help_str = sp_str_lit("--help"); + const sp_str_t list_str = sp_str_lit("--list-benchmarks"); + /* Benchmark config switches */ + const sp_str_t filter_str = sp_str_lit("--filter="); +#if defined(UBENCH_ENABLE_SQLITE) + const sp_str_t output_str = sp_str_lit("--output="); +#endif + const sp_str_t confidence_str = sp_str_lit("--confidence="); + const sp_str_t arg = sp_cstr_as_str(argv[index]); + + if (sp_str_starts_with(arg, help_str)) { + sp_log("ubench.h - the single file benchmarking solution for C/C++!"); + sp_log("Command line Options:"); + sp_log(" --help Show this message and exit."); + sp_log(" --filter= Filter the benchmarks to run (EG. " + "MyBench*.a would run MyBenchmark.a but not MyBenchmark.b)."); + sp_log(" --list-benchmarks List benchmarks, one per line. " + "Output names can be passed to --filter."); +#if defined(UBENCH_ENABLE_SQLITE) + sp_log(" --output= SQLite database to write results to " + "(default ./ubench.db, 'none' disables)."); +#endif + sp_log(" --confidence= MdAPE (median absolute percent " + "error) cut-off above which a benchmark is reported as failed. " + "Defaults to 2.5%"); + goto cleanup; + } else if (sp_str_starts_with(arg, filter_str)) { + /* user wants to filter what benchmarks run! */ + filter = argv[index] + filter_str.len; +#if defined(UBENCH_ENABLE_SQLITE) + } else if (sp_str_starts_with(arg, output_str)) { + const c8 *value = argv[index] + output_str.len; + if (sp_cstr_equal(value, "none")) { + db_path = SP_NULLPTR; + } else { + db_path = value; + } +#endif + } else if (sp_str_starts_with(arg, list_str)) { + for (index = 0; index < ubench_state.benchmarks_length; index++) { + sp_log("{}", sp_fmt_str(ubench_state.benchmarks[index].name)); + } + + /* when printing the benchmark list, don't actually run the benchmarks */ + goto cleanup; + } else if (sp_str_starts_with(arg, confidence_str)) { + /* user wants to specify a different confidence */ + ubench_state.confidence = + sp_parse_f64(sp_cstr_as_str(argv[index] + confidence_str.len)); + + /* must be between 0 and 100 */ + if ((ubench_state.confidence < 0) || (ubench_state.confidence > 100)) { + sp_print_err( + "Confidence must be in the range [0..100] (you specified {})\n", + sp_fmt_float(ubench_state.confidence)); + goto cleanup; + } + } + } + + for (index = 0; index < ubench_state.benchmarks_length; index++) { + if (ubench_should_filter(filter, ubench_state.benchmarks[index].name.data)) { + continue; + } + + ran_benchmarks++; + } + + sp_log("{.green} Running {} benchmarks.", sp_fmt_cstr("[==========]"), sp_fmt_uint(ran_benchmarks)); + +#if defined(UBENCH_ENABLE_SQLITE) + if (db_path) { + store = bench_store_open(db_path); + if (store) { + bench_machine_info mi; + bench_run_info ri; + c8 exe_path_buf[4096]; + c8 exe_mtime_buf[32]; + s64 exe_size = 0; + s32 has_perf = 0; +#if defined(UBENCH_ENABLE_PERF_COUNTERS) && defined(SP_LINUX) + has_perf = perf.group_fd >= 0 ? 1 : 0; +#endif + exe_path_buf[0] = '\0'; + exe_mtime_buf[0] = '\0'; + + bench_collect_machine_info(&mi); + + /* Collect executable info via sp_fs_*; bench_run_info just holds + pointers into these stack buffers (live until bench_store_begin_run + returns, which is what binds the strings into SQLite). */ + { + sp_mem_arena_marker_t s = sp_mem_begin_scratch(); + sp_str_t exe = sp_fs_get_exe_path(s.mem); + sp_cstr_copy_to_n(exe.data, exe.len, + exe_path_buf, sizeof(exe_path_buf)); + if (exe_path_buf[0]) { + struct stat st; + if (stat(exe_path_buf, &st) == 0) { + exe_size = (s64)st.st_size; + sp_str_t mtime = sp_tm_epoch_to_iso8601( + s.mem, sp_fs_get_mod_time(exe)); + sp_cstr_copy_to_n(mtime.data, mtime.len, + exe_mtime_buf, sizeof(exe_mtime_buf)); + } + } + sp_mem_end_scratch(s); + } + + sp_mem_zero(&ri, sizeof(ri)); + ri.executable_path = exe_path_buf[0] ? exe_path_buf : SP_NULLPTR; + ri.executable_size_bytes = exe_size > 0 ? exe_size : BENCH_UNSET_I64; + ri.executable_mtime = exe_mtime_buf[0] ? exe_mtime_buf : SP_NULLPTR; + ri.filter = filter; + ri.framework = "ubench"; + ri.confidence_threshold = ubench_state.confidence; + ri.has_perf_counters = has_perf; + + run_id = bench_store_begin_run(store, &mi, &ri); + if (run_id < 0) { + bench_store_close(store); + store = SP_NULLPTR; + run_id = 0; + } + } + } +#endif + + for (index = 0; index < ubench_state.benchmarks_length; index++) { + s32 result = 0; + s64 kndex = 0; + s64 cal_ns = 0; + s64 epochs = 0; + /* Per-body times stored as doubles end-to-end so sub-nanosecond bodies + (UBENCH_LOOP-batched microbenchmarks) survive both the display path + and the SQL bind. The unit is nanoseconds. */ + f64 best_avg_ns = 0; + f64 best_min_ns = 0; + f64 best_median_ns = 0; + f64 best_max_ns = 0; + f64 best_deviation = 0; + f64 best_confidence = 0; +#if defined(UBENCH_ENABLE_PERF_COUNTERS) && defined(SP_LINUX) + u64 best_cycles = 0; + u64 best_instructions = 0; + u64 pass_cycles = 0; + u64 pass_instructions = 0; +#endif + struct ubench_run_state_s ubs; + +#define UBENCH_MIN_EPOCHS 16 +#define UBENCH_MAX_EPOCHS 500 + const s64 max_epochs = UBENCH_MAX_EPOCHS; + const s64 min_epochs = UBENCH_MIN_EPOCHS; + /* Add one extra timestamp slot: each sample stores the timestamp at its + start, plus one final timestamp after the last sample exits. */ + s64 ns[UBENCH_MAX_EPOCHS + 1]; + s64 pause_ns[UBENCH_MAX_EPOCHS + 1]; + /* Scratch for MdAPE computation (relative deviations in ppm). */ + s64 mdape_scratch[UBENCH_MAX_EPOCHS]; +#undef UBENCH_MAX_EPOCHS +#undef UBENCH_MIN_EPOCHS + + if (ubench_should_filter(filter, ubench_state.benchmarks[index].name.data)) { + continue; + } + + sp_str_t name = ubench_state.benchmarks[index].name; + sp_log("[ {:<9 .green}] {}", sp_fmt_cstr("RUN"), sp_fmt_str(name)); + + ubs.ns = ns; + ubs.pause_ns = pause_ns; + ubs.size = 1; + ubs.sample = 0; + ubs.paused_ns = 0; + ubs.pause_start = 0; + ubs.bytes_processed = 0; + ubs.items_processed = 0; + ubs.batch = 1; + ubs.batch_consumed = 0; + + /* CALIBRATION: one body invocation, batch=1, to estimate single-body + cost. The body announces UBENCH_LOOP usage by setting batch_consumed. */ + ubench_invoke(&ubench_state.benchmarks[index], &ubs); + cal_ns = ns[1] - ns[0] - pause_ns[0]; + if (cal_ns <= 0) { + cal_ns = 1; + } + + /* Auto-tune batch only if the body opted in via UBENCH_LOOP. The clock- + bracketed window is sized to ~1 ms regardless of body cost, giving + (1 ms / clock_overhead) ~= 10^4..10^6 amortization for sub-µs bodies. */ + ubs.batch = 1; + if (ubs.batch_consumed) { + const s64 target_batch_ns = 1 * 1000 * 1000; + s64 b = target_batch_ns / cal_ns; + if (b < 1) { + b = 1; + } + ubs.batch = b; + } + + /* Choose epoch count: target ~100 ms total wall-time across all samples, + but never fewer than min_epochs (so the median has a meaningful base). */ + { + const s64 target_total_ns = 100 * 1000 * 1000; + const s64 per_sample_ns = ubs.batch * cal_ns; + epochs = target_total_ns / (per_sample_ns > 0 ? per_sample_ns : 1); + if (epochs < min_epochs) { + epochs = min_epochs; + } + if (epochs > max_epochs) { + epochs = max_epochs; + } + } + + /* WARMUP: one throwaway sample at the chosen batch, to prime caches, + branch predictor, TLB, and demand-page any lazy allocations. */ + ubs.size = 1; + ubs.sample = 0; + ubs.paused_ns = 0; + ubs.pause_start = 0; + ubench_invoke(&ubench_state.benchmarks[index], &ubs); + + /* MEASUREMENT */ + ubs.size = epochs; + ubs.sample = 0; + ubs.paused_ns = 0; + ubs.pause_start = 0; +#if defined(UBENCH_ENABLE_PERF_COUNTERS) && defined(SP_LINUX) + ubench_perf_start(&perf); +#endif + ubench_invoke(&ubench_state.benchmarks[index], &ubs); +#if defined(UBENCH_ENABLE_PERF_COUNTERS) && defined(SP_LINUX) + ubench_perf_stop(&perf, &pass_cycles, &pass_instructions); +#endif + + /* Convert raw timestamps to per-batch deltas (in ns) in place. The /batch + split is deferred to f64-precision arithmetic below so that sub-ns + per-body times don't get truncated to zero. */ + for (kndex = 0; kndex < epochs; kndex++) { + s64 d = ns[kndex + 1] - ns[kndex] - pause_ns[kndex]; + if (d < 0) { + d = 0; + } + ns[kndex] = d; + } + + /* Mean per body in f64. */ + { + const f64 batch_d = sp_cast(f64, ubs.batch); + f64 sum = 0; + for (kndex = 0; kndex < epochs; kndex++) { + sum += sp_cast(f64, ns[kndex]); + } + best_avg_ns = sum / (sp_cast(f64, epochs) * batch_d); + + /* Sample stddev (kept for legacy reporting). */ + { + f64 var = 0; + for (kndex = 0; kndex < epochs; kndex++) { + const f64 v = + sp_cast(f64, ns[kndex]) / batch_d - best_avg_ns; + var += v * v; + } + var /= sp_cast(f64, epochs); + best_deviation = + (best_avg_ns > 0) + ? ((f64)sp_sys_sqrtf((f32)var) / best_avg_ns) * 100.0 + : 0.0; + } + } + + /* Sort raw per-batch samples to derive median, min, max. MdAPE is + scale-invariant ((x-med)/med), so it works on raw samples without /batch + — the batch factor cancels in the ratio. */ + sp_os_qsort(ns, sp_cast(ubench_size_t, epochs), sizeof(*ns), ubench_int64_cmp); + { + const s64 raw_median = ns[epochs / 2]; + const f64 batch_d = sp_cast(f64, ubs.batch); + best_min_ns = sp_cast(f64, ns[0]) / batch_d; + best_median_ns = sp_cast(f64, raw_median) / batch_d; + best_max_ns = sp_cast(f64, ns[epochs - 1]) / batch_d; + + /* MdAPE = median of |x - median| / median, in percent. Robust against + the heavy-tailed one-sided noise typical of microbenchmark + distributions (preemption, page faults, IRQs, frequency steps). + Replaces the prior Gaussian CI, which was structurally wrong here. */ + best_confidence = 0.0; + if (raw_median > 0) { + for (kndex = 0; kndex < epochs; kndex++) { + s64 v = ns[kndex] - raw_median; + if (v < 0) { + v = -v; + } + mdape_scratch[kndex] = (v * 1000000) / raw_median; + } + sp_os_qsort(mdape_scratch, sp_cast(ubench_size_t, epochs), sizeof(*mdape_scratch), ubench_int64_cmp); + { + const s64 mid = epochs / 2; + best_confidence = + sp_cast(f64, mdape_scratch[mid]) / 10000.0; + } + } + } +#if defined(UBENCH_ENABLE_PERF_COUNTERS) && defined(SP_LINUX) + { + const u64 total_bodies = + sp_cast(u64, epochs) * + sp_cast(u64, ubs.batch); + if (total_bodies > 0) { + best_cycles = pass_cycles / total_bodies; + best_instructions = pass_instructions / total_bodies; + } + } +#endif + + /* Flag the benchmark as failed if MdAPE exceeds the user threshold. */ + result = best_confidence > ubench_state.confidence; + + if (result) { + sp_log("MdAPE {}% exceeds maximum permitted {}%", + sp_fmt_float(best_confidence), + sp_fmt_float(ubench_state.confidence)); + } + + { + const f64 bps = (ubs.bytes_processed > 0 && best_avg_ns > 0) + ? sp_cast(f64, ubs.bytes_processed) * 1e9 / + sp_cast(f64, best_avg_ns) + : 0.0; + const f64 ips = (ubs.items_processed > 0 && best_avg_ns > 0) + ? sp_cast(f64, ubs.items_processed) * 1e9 / + sp_cast(f64, best_avg_ns) + : 0.0; + +#if defined(UBENCH_ENABLE_SQLITE) + if (store) { + bench_result br; + sp_mem_zero(&br, sizeof(br)); + br.iterations = epochs; + br.mean_ns = best_avg_ns; + br.median_ns = best_median_ns; + br.min_ns = best_min_ns; + br.max_ns = best_max_ns; + br.stddev_ns = best_deviation * best_avg_ns / 100.0; + br.stddev_pct = best_deviation; + br.ci_low_ns = BENCH_UNSET_F64; + br.ci_high_ns = BENCH_UNSET_F64; + br.ci_level_pct = BENCH_UNSET_F64; + br.confidence_pct = best_confidence; + br.bytes_processed = ubs.bytes_processed > 0 ? ubs.bytes_processed + : BENCH_UNSET_I64; + br.items_processed = ubs.items_processed > 0 ? ubs.items_processed + : BENCH_UNSET_I64; +#if defined(UBENCH_ENABLE_PERF_COUNTERS) && defined(SP_LINUX) + br.cycles_per_iter = perf.group_fd >= 0 ? (s64)best_cycles + : BENCH_UNSET_I64; + br.instructions_per_iter = perf.group_fd >= 0 ? (s64)best_instructions + : BENCH_UNSET_I64; +#else + br.cycles_per_iter = BENCH_UNSET_I64; + br.instructions_per_iter = BENCH_UNSET_I64; +#endif + bench_store_record(store, run_id, + ubench_state.benchmarks[index].name.data, &br); + } +#endif + + { + const c8 *unit = "us"; + f64 scale_div = 1.0; + + if (0 != result) { + const ubench_size_t failed_benchmark_index = failed_benchmarks_length++; + failed_benchmarks = sp_ptr_cast( + ubench_size_t *, + sp_realloc(ubench_state.mem, + sp_ptr_cast(void *, failed_benchmarks), + sizeof(ubench_size_t) * failed_benchmarks_length)); + failed_benchmarks[failed_benchmark_index] = index; + failed++; + } + + if (0 != result) { + sp_print("[{:^10 .red}] ", sp_fmt_cstr("FAILED")); + } else { + sp_print("[{:>9 .green} ] ", sp_fmt_cstr("OK")); + } + sp_print("{} (mean ", sp_fmt_str(ubench_state.benchmarks[index].name)); + + /* Auto-scale display: pick a unit so the mean prints in [1, 1000). */ + if (best_avg_ns >= 1e9) { + unit = "s"; + scale_div = 1e9; + } else if (best_avg_ns >= 1e6) { + unit = "ms"; + scale_div = 1e6; + } else if (best_avg_ns >= 1e3) { + unit = "us"; + scale_div = 1e3; + } else if (best_avg_ns >= 1.0) { + unit = "ns"; + scale_div = 1.0; + } else { + unit = "ps"; + scale_div = 1e-3; + } + sp_print("{:.3}{}, median {:.3}{}, min {:.3}{}, MdAPE {}%", + sp_fmt_float(best_avg_ns / scale_div), sp_fmt_cstr(unit), + sp_fmt_float(best_median_ns / scale_div), sp_fmt_cstr(unit), + sp_fmt_float(best_min_ns / scale_div), sp_fmt_cstr(unit), + sp_fmt_float(best_confidence)); + + if (bps > 0.0) { + const c8 *bps_unit; + f64 bps_scaled; + if (bps >= 1e9) { bps_unit = "GB/s"; bps_scaled = bps / 1e9; } + else if (bps >= 1e6) { bps_unit = "MB/s"; bps_scaled = bps / 1e6; } + else if (bps >= 1e3) { bps_unit = "KB/s"; bps_scaled = bps / 1e3; } + else { bps_unit = "B/s"; bps_scaled = bps; } + sp_print(", {:.3} {}", sp_fmt_float(bps_scaled), sp_fmt_cstr(bps_unit)); + } + if (ips > 0.0) { + const c8 *ips_unit; + f64 ips_scaled; + if (ips >= 1e9) { ips_unit = "G items/s"; ips_scaled = ips / 1e9; } + else if (ips >= 1e6) { ips_unit = "M items/s"; ips_scaled = ips / 1e6; } + else if (ips >= 1e3) { ips_unit = "K items/s"; ips_scaled = ips / 1e3; } + else { ips_unit = "items/s"; ips_scaled = ips; } + sp_print(", {:.3} {}", sp_fmt_float(ips_scaled), sp_fmt_cstr(ips_unit)); + } +#if defined(UBENCH_ENABLE_PERF_COUNTERS) && defined(SP_LINUX) + if (perf.group_fd >= 0) { + sp_print(", {} cycles, {} instructions", + sp_fmt_uint(best_cycles), sp_fmt_uint(best_instructions)); + } +#endif + sp_log(")"); + } + } + } + + sp_log("{.green} {} benchmarks ran.", + sp_fmt_cstr("[==========]"), sp_fmt_uint(ran_benchmarks)); + sp_log("[{:^10 .green}] {} benchmarks.", + sp_fmt_cstr("PASSED"), sp_fmt_uint(ran_benchmarks - failed)); + + if (0 != failed) { + sp_log("[{:^10 .red}] {} benchmarks, listed below:", + sp_fmt_cstr("FAILED"), sp_fmt_uint(failed)); + for (index = 0; index < failed_benchmarks_length; index++) { + sp_log("[{:^10 .red}] {}", + sp_fmt_cstr("FAILED"), + sp_fmt_str(ubench_state.benchmarks[failed_benchmarks[index]].name)); + } + } + +cleanup: + sp_free(ubench_state.mem, sp_ptr_cast(void *, failed_benchmarks)); + sp_free(ubench_state.mem, sp_ptr_cast(void *, ubench_state.benchmarks)); + +#if defined(UBENCH_ENABLE_PERF_COUNTERS) && defined(SP_LINUX) + ubench_perf_close(&perf); +#endif +#if defined(UBENCH_ENABLE_SQLITE) + if (store) { + if (run_id > 0) bench_store_end_run(store, run_id); + bench_store_close(store); + } +#endif + + return sp_cast(s32, failed); +} + +#endif // SP_BENCH_C diff --git a/test/mem.c b/test/mem.c index 2880bff..7d0af90 100644 --- a/test/mem.c +++ b/test/mem.c @@ -6,6 +6,7 @@ #include "mem/builtin.c" #include "mem/arena.c" #include "mem/fixed.c" +#include "mem/heap.c" #include "mem/slice.c" #ifdef MEM_TEST_OWNS_MAIN diff --git a/test/mem/heap.c b/test/mem/heap.c new file mode 100644 index 0000000..735b05a --- /dev/null +++ b/test/mem/heap.c @@ -0,0 +1,696 @@ +#include "mem.h" + +typedef enum { + HEAP_OP_NONE = 0, + HEAP_OP_ALLOC, + HEAP_OP_FREE, + HEAP_OP_REALLOC, + HEAP_OP_WRITE, +} heap_op_kind_t; + +typedef struct { + heap_op_kind_t kind; + u32 ref; + u32 src; + u64 size; + u64 offset; + u64 value; +} heap_op_t; + +typedef struct { + u32 a; + u32 b; +} heap_pair_t; + +typedef struct { + u64 offset; + u64 value; +} heap_data_t; + +typedef struct { + u32 ref; + u64 bucket; + u64 zeroed; + bool large; + heap_data_t data [4]; +} heap_ref_check_t; + +typedef struct { + heap_pair_t same [8]; + heap_pair_t different [8]; + heap_pair_t same_ptr [8]; + heap_pair_t different_ptr [8]; + heap_ref_check_t refs [8]; + u64 num_spans; + u64 num_large; + u64 recycled; + struct { u64 used; u64 reserved; } bytes; +} heap_expect_t; + +#define HEAP_MAX_REFS 32 + +typedef struct { + heap_op_t ops [32]; + heap_expect_t expect; +} heap_test_t; + +static u64 count_heap_spans(sp_mem_heap_t* heap) { + u64 n = 0; + sp_for(b, SP_MEM_HEAP_NUM_BUCKETS) { + for (sp_mem_heap_span_t* s = heap->buckets[b].partial; s; s = s->next) n++; + for (sp_mem_heap_span_t* s = heap->buckets[b].full; s; s = s->next) n++; + } + return n; +} + +static void run_heap_test(s32* utest_result, heap_test_t t) { + sp_mem_heap_t* heap = sp_mem_heap_new(); + void* ptrs [HEAP_MAX_REFS] = sp_zero; + + sp_carr_for(t.ops, i) { + heap_op_t* op = &t.ops[i]; + if (op->kind == HEAP_OP_NONE) break; + switch (op->kind) { + case HEAP_OP_NONE: break; + case HEAP_OP_ALLOC: + ptrs[op->ref] = sp_mem_heap_alloc(heap, op->size); + EXPECT_NE(ptrs[op->ref], SP_NULLPTR); + EXPECT_EQ((uintptr_t)ptrs[op->ref] & (SP_MEM_ALIGNMENT - 1), 0u); + break; + case HEAP_OP_FREE: + sp_mem_heap_free(heap, ptrs[op->ref]); + ptrs[op->ref] = SP_NULLPTR; + break; + case HEAP_OP_REALLOC: + ptrs[op->ref] = sp_mem_heap_realloc(heap, ptrs[op->src], op->size); + if (op->size) { + EXPECT_NE(ptrs[op->ref], SP_NULLPTR); + EXPECT_EQ((uintptr_t)ptrs[op->ref] & (SP_MEM_ALIGNMENT - 1), 0u); + } + else { + EXPECT_EQ(ptrs[op->ref], SP_NULLPTR); + } + break; + case HEAP_OP_WRITE: + ((u8*)ptrs[op->ref])[op->offset] = (u8)op->value; + break; + } + } + + heap_expect_t* e = &t.expect; + + sp_carr_for(e->same, i) { + heap_pair_t p = e->same[i]; + if (!p.a && !p.b) break; + sp_mem_heap_span_t* sa = sp_mem_heap_find_span(heap, ptrs[p.a]); + sp_mem_heap_span_t* sb = sp_mem_heap_find_span(heap, ptrs[p.b]); + EXPECT_NE(sa, SP_NULLPTR); + EXPECT_EQ(sa, sb); + } + + sp_carr_for(e->different, i) { + heap_pair_t p = e->different[i]; + if (!p.a && !p.b) break; + sp_mem_heap_span_t* sa = sp_mem_heap_find_span(heap, ptrs[p.a]); + sp_mem_heap_span_t* sb = sp_mem_heap_find_span(heap, ptrs[p.b]); + EXPECT_NE(sa, sb); + } + + sp_carr_for(e->same_ptr, i) { + heap_pair_t p = e->same_ptr[i]; + if (!p.a && !p.b) break; + EXPECT_EQ(ptrs[p.a], ptrs[p.b]); + } + + sp_carr_for(e->different_ptr, i) { + heap_pair_t p = e->different_ptr[i]; + if (!p.a && !p.b) break; + EXPECT_NE(ptrs[p.a], ptrs[p.b]); + } + + sp_carr_for(e->refs, i) { + heap_ref_check_t* r = &e->refs[i]; + if (!r->ref && !r->bucket && !r->zeroed && !r->large && !r->data[0].offset && !r->data[0].value) break; + + u8* p = (u8*)ptrs[r->ref]; + EXPECT_NE(p, SP_NULLPTR); + if (!p) continue; + + if (r->bucket) { + sp_mem_heap_span_t* s = sp_mem_heap_find_span(heap, p); + EXPECT_NE(s, SP_NULLPTR); + if (s) EXPECT_EQ((u64)s->bucket, (u64)sp_mem_heap_bucket_of(r->bucket)); + } + if (r->zeroed) { + sp_for(j, r->zeroed) EXPECT_EQ(p[j], 0u); + } + if (r->large) { + EXPECT_EQ(sp_mem_heap_find_span(heap, p), SP_NULLPTR); + } + sp_carr_for(r->data, j) { + heap_data_t* d = &r->data[j]; + if (!d->offset && !d->value) break; + EXPECT_EQ(p[d->offset], (u8)d->value); + } + } + + u64 num_large = 0; + for (sp_mem_heap_large_t* l = heap->larges; l; l = l->next) num_large++; + + u64 num_recycled = 0; + for (sp_mem_heap_span_t* s = heap->recycled; s; s = s->next) num_recycled++; + + EXPECT_EQ(count_heap_spans(heap), e->num_spans); + EXPECT_EQ(num_large, e->num_large); + EXPECT_EQ(num_recycled, e->recycled); + EXPECT_EQ(heap->bytes_used, e->bytes.used); + EXPECT_EQ(heap->bytes_reserved, e->bytes.reserved); + + sp_mem_heap_destroy(heap); +} + +UTEST_F(mem, heap_alloc_returns_aligned_nonnull) { + run_heap_test(&ur, (heap_test_t){ + .ops = { + { .kind = HEAP_OP_ALLOC, .ref = 0, .size = 16 }, + { .kind = HEAP_OP_ALLOC, .ref = 1, .size = 64 }, + { .kind = HEAP_OP_ALLOC, .ref = 2, .size = 1024 }, + { .kind = HEAP_OP_ALLOC, .ref = 3, .size = 8192 }, + }, + .expect = { + .num_spans = 3, + .num_large = 1, + .recycled = 12, + .bytes = { .used = 16 + 64 + 1344 + 8192, .reserved = 4096 + SP_MEM_HEAP_SEGMENT_SIZE + 12288 }, + }, + }); +} + +UTEST_F(mem, heap_alloc_is_zeroed) { + run_heap_test(&ur, (heap_test_t){ + .ops = { + { .kind = HEAP_OP_ALLOC, .ref = 0, .size = 128 }, + }, + .expect = { + .refs = { + { .ref = 0, .zeroed = 128 }, + }, + .num_spans = 1, + .recycled = 14, + .bytes = { .used = 128, .reserved = 4096 + SP_MEM_HEAP_SEGMENT_SIZE }, + }, + }); +} + +UTEST_F(mem, heap_alloc_distinct_pointers) { + run_heap_test(&ur, (heap_test_t){ + .ops = { + { .kind = HEAP_OP_ALLOC, .ref = 0, .size = 32 }, + { .kind = HEAP_OP_ALLOC, .ref = 1, .size = 32 }, + { .kind = HEAP_OP_ALLOC, .ref = 2, .size = 32 }, + }, + .expect = { + .different_ptr = { + { 0, 1 }, { 1, 2 }, { 0, 2 }, + }, + .num_spans = 1, + .recycled = 14, + .bytes = { .used = 3 * 32, .reserved = 4096 + SP_MEM_HEAP_SEGMENT_SIZE }, + }, + }); +} + +UTEST_F(mem, heap_bucket_of_sizes) { + EXPECT_EQ(sp_mem_heap_bucket_of(0), 0u); + EXPECT_EQ(sp_mem_heap_bucket_of(1), 0u); + EXPECT_EQ(sp_mem_heap_bucket_of(16), 0u); + EXPECT_EQ(sp_mem_heap_bucket_of(17), 1u); + EXPECT_EQ(sp_mem_heap_bucket_of(32), 1u); + EXPECT_EQ(sp_mem_heap_bucket_of(48), 2u); + EXPECT_EQ(sp_mem_heap_bucket_of(64), 3u); + EXPECT_EQ(sp_mem_heap_bucket_of(96), 4u); + EXPECT_EQ(sp_mem_heap_bucket_of(128), 5u); + EXPECT_EQ(sp_mem_heap_bucket_of(192), 6u); + EXPECT_EQ(sp_mem_heap_bucket_of(256), 7u); + EXPECT_EQ(sp_mem_heap_bucket_of(257), 8u); + EXPECT_EQ(sp_mem_heap_bucket_of(336), 8u); + EXPECT_EQ(sp_mem_heap_bucket_of(448), 9u); + EXPECT_EQ(sp_mem_heap_bucket_of(576), 10u); + EXPECT_EQ(sp_mem_heap_bucket_of(672), 11u); + EXPECT_EQ(sp_mem_heap_bucket_of(800), 12u); + EXPECT_EQ(sp_mem_heap_bucket_of(1008), 13u); + EXPECT_EQ(sp_mem_heap_bucket_of(1009), 14u); + EXPECT_EQ(sp_mem_heap_bucket_of(1344), 14u); + EXPECT_EQ(sp_mem_heap_bucket_of(1345), 15u); + EXPECT_EQ(sp_mem_heap_bucket_of(2016), 15u); + EXPECT_EQ(sp_mem_heap_bucket_of(2017), (u32)SP_MEM_HEAP_NUM_BUCKETS); + EXPECT_EQ(sp_mem_heap_bucket_of(4096), (u32)SP_MEM_HEAP_NUM_BUCKETS); +} + +UTEST_F(mem, heap_small_allocs_share_a_span) { + run_heap_test(&ur, (heap_test_t){ + .ops = { + { .kind = HEAP_OP_ALLOC, .ref = 0, .size = 64 }, + { .kind = HEAP_OP_ALLOC, .ref = 1, .size = 64 }, + { .kind = HEAP_OP_ALLOC, .ref = 2, .size = 64 }, + }, + .expect = { + .same = { + { 0, 1 }, { 1, 2 }, + }, + .refs = { + { .ref = 0, .bucket = 64 }, + { .ref = 1, .bucket = 64 }, + { .ref = 2, .bucket = 64 }, + }, + .num_spans = 1, + .recycled = 14, + .bytes = { .used = 3 * 64, .reserved = 4096 + SP_MEM_HEAP_SEGMENT_SIZE }, + }, + }); +} + +UTEST_F(mem, heap_different_buckets_use_different_spans) { + run_heap_test(&ur, (heap_test_t){ + .ops = { + { .kind = HEAP_OP_ALLOC, .ref = 0, .size = 32 }, + { .kind = HEAP_OP_ALLOC, .ref = 1, .size = 128 }, + }, + .expect = { + .different = { + { 0, 1 }, + }, + .refs = { + { .ref = 0, .bucket = 32 }, + { .ref = 1, .bucket = 128 }, + }, + .num_spans = 2, + .recycled = 13, + .bytes = { .used = 32 + 128, .reserved = 4096 + SP_MEM_HEAP_SEGMENT_SIZE }, + }, + }); +} + +UTEST_F(mem, heap_large_alloc_bypasses_spans) { + run_heap_test(&ur, (heap_test_t){ + .ops = { + { .kind = HEAP_OP_ALLOC, .ref = 0, .size = 8192 }, + }, + .expect = { + .refs = { + { .ref = 0, .large = true }, + }, + .num_spans = 0, + .num_large = 1, + .bytes = { .used = 8192, .reserved = 4096 + 12288 }, + }, + }); +} + +UTEST_F(mem, heap_packed_bucket_fits_four_1008_chunks) { + run_heap_test(&ur, (heap_test_t){ + .ops = { + { .kind = HEAP_OP_ALLOC, .ref = 0, .size = 1008 }, + { .kind = HEAP_OP_ALLOC, .ref = 1, .size = 1008 }, + { .kind = HEAP_OP_ALLOC, .ref = 2, .size = 1008 }, + { .kind = HEAP_OP_ALLOC, .ref = 3, .size = 1008 }, + { .kind = HEAP_OP_ALLOC, .ref = 4, .size = 1008 }, + }, + .expect = { + .same = { + { 0, 1 }, { 1, 2 }, { 2, 3 }, + }, + .different = { + { 3, 4 }, + }, + .num_spans = 2, + .recycled = 13, + .bytes = { .used = 5 * 1008, .reserved = 4096 + SP_MEM_HEAP_SEGMENT_SIZE }, + }, + }); +} + +UTEST_F(mem, heap_full_span_overflows_to_new_span) { + run_heap_test(&ur, (heap_test_t){ + .ops = { + { .kind = HEAP_OP_ALLOC, .ref = 0, .size = 800 }, + { .kind = HEAP_OP_ALLOC, .ref = 1, .size = 800 }, + { .kind = HEAP_OP_ALLOC, .ref = 2, .size = 800 }, + { .kind = HEAP_OP_ALLOC, .ref = 3, .size = 800 }, + { .kind = HEAP_OP_ALLOC, .ref = 4, .size = 800 }, + { .kind = HEAP_OP_ALLOC, .ref = 5, .size = 800 }, + }, + .expect = { + .same = { + { 0, 1 }, { 1, 2 }, { 2, 3 }, { 3, 4 }, + }, + .different = { + { 4, 5 }, + }, + .num_spans = 2, + .recycled = 13, + .bytes = { .used = 6 * 800, .reserved = 4096 + SP_MEM_HEAP_SEGMENT_SIZE }, + }, + }); +} + +UTEST_F(mem, heap_free_from_full_span_reuses_slot) { + run_heap_test(&ur, (heap_test_t){ + .ops = { + { .kind = HEAP_OP_ALLOC, .ref = 0, .size = 1008 }, + { .kind = HEAP_OP_ALLOC, .ref = 1, .size = 1008 }, + { .kind = HEAP_OP_ALLOC, .ref = 2, .size = 1008 }, + { .kind = HEAP_OP_ALLOC, .ref = 3, .size = 1008 }, + { .kind = HEAP_OP_FREE, .ref = 1 }, + { .kind = HEAP_OP_ALLOC, .ref = 4, .size = 1008 }, + }, + .expect = { + .same = { + { 0, 4 }, + }, + .num_spans = 1, + .recycled = 14, + .bytes = { .used = 4 * 1008, .reserved = 4096 + SP_MEM_HEAP_SEGMENT_SIZE }, + }, + }); +} + +UTEST_F(mem, heap_empty_span_is_recycled) { + run_heap_test(&ur, (heap_test_t){ + .ops = { + { .kind = HEAP_OP_ALLOC, .ref = 0, .size = 64 }, + { .kind = HEAP_OP_FREE, .ref = 0 }, + }, + .expect = { + .num_spans = 0, + .recycled = 15, + .bytes = { .used = 0, .reserved = 4096 + SP_MEM_HEAP_SEGMENT_SIZE }, + }, + }); +} + +UTEST_F(mem, heap_recycled_span_is_reused) { + run_heap_test(&ur, (heap_test_t){ + .ops = { + { .kind = HEAP_OP_ALLOC, .ref = 0, .size = 64 }, + { .kind = HEAP_OP_FREE, .ref = 0 }, + { .kind = HEAP_OP_ALLOC, .ref = 1, .size = 256 }, + }, + .expect = { + .refs = { + { .ref = 1, .bucket = 256 }, + }, + .num_spans = 1, + .recycled = 14, + .bytes = { .used = 256, .reserved = 4096 + SP_MEM_HEAP_SEGMENT_SIZE }, + }, + }); +} + +UTEST_F(mem, heap_chunk_reuse_is_zeroed) { + run_heap_test(&ur, (heap_test_t){ + .ops = { + { .kind = HEAP_OP_ALLOC, .ref = 0, .size = 32 }, + { .kind = HEAP_OP_WRITE, .ref = 0, .offset = 5, .value = 0xAB }, + { .kind = HEAP_OP_FREE, .ref = 0 }, + { .kind = HEAP_OP_ALLOC, .ref = 1, .size = 32 }, + }, + .expect = { + .refs = { + { .ref = 1, .zeroed = 32 }, + }, + .num_spans = 1, + .recycled = 14, + .bytes = { .used = 32, .reserved = 4096 + SP_MEM_HEAP_SEGMENT_SIZE }, + }, + }); +} + +UTEST_F(mem, heap_alloc_zero_returns_smallest_chunk) { + run_heap_test(&ur, (heap_test_t){ + .ops = { + { .kind = HEAP_OP_ALLOC, .ref = 0, .size = 0 }, + }, + .expect = { + .refs = { + { .ref = 0, .bucket = 16 }, + }, + .num_spans = 1, + .recycled = 14, + .bytes = { .used = 16, .reserved = 4096 + SP_MEM_HEAP_SEGMENT_SIZE }, + }, + }); +} + +UTEST_F(mem, heap_realloc_within_bucket_keeps_pointer) { + run_heap_test(&ur, (heap_test_t){ + .ops = { + { .kind = HEAP_OP_ALLOC, .ref = 0, .size = 20 }, + { .kind = HEAP_OP_WRITE, .ref = 0, .offset = 10, .value = 7 }, + { .kind = HEAP_OP_REALLOC, .ref = 1, .src = 0, .size = 25 }, + }, + .expect = { + .same_ptr = { + { 0, 1 }, + }, + .refs = { + { .ref = 1, .data = { { .offset = 10, .value = 7 } } }, + }, + .num_spans = 1, + .recycled = 14, + .bytes = { .used = 32, .reserved = 4096 + SP_MEM_HEAP_SEGMENT_SIZE }, + }, + }); +} + +UTEST_F(mem, heap_realloc_shrink_then_grow_reveals_zeroes) { + run_heap_test(&ur, (heap_test_t){ + .ops = { + { .kind = HEAP_OP_ALLOC, .ref = 0, .size = 32 }, + { .kind = HEAP_OP_WRITE, .ref = 0, .offset = 10, .value = 7 }, + { .kind = HEAP_OP_WRITE, .ref = 0, .offset = 30, .value = 9 }, + { .kind = HEAP_OP_REALLOC, .ref = 1, .src = 0, .size = 20 }, + { .kind = HEAP_OP_REALLOC, .ref = 2, .src = 1, .size = 31 }, + }, + .expect = { + .same_ptr = { + { 0, 2 }, + }, + .refs = { + { .ref = 2, .data = { { .offset = 10, .value = 7 }, { .offset = 30, .value = 0 } } }, + }, + .num_spans = 1, + .recycled = 14, + .bytes = { .used = 32, .reserved = 4096 + SP_MEM_HEAP_SEGMENT_SIZE }, + }, + }); +} + +UTEST_F(mem, heap_realloc_grows_and_preserves_bytes) { + run_heap_test(&ur, (heap_test_t){ + .ops = { + { .kind = HEAP_OP_ALLOC, .ref = 0, .size = 16 }, + { .kind = HEAP_OP_WRITE, .ref = 0, .offset = 3, .value = 7 }, + { .kind = HEAP_OP_REALLOC, .ref = 1, .src = 0, .size = 64 }, + }, + .expect = { + .different_ptr = { + { 0, 1 }, + }, + .refs = { + { .ref = 1, .data = { { .offset = 3, .value = 7 } } }, + }, + .num_spans = 1, + .recycled = 14, + .bytes = { .used = 64, .reserved = 4096 + SP_MEM_HEAP_SEGMENT_SIZE }, + }, + }); +} + +UTEST_F(mem, heap_realloc_large_in_place) { + run_heap_test(&ur, (heap_test_t){ + .ops = { + { .kind = HEAP_OP_ALLOC, .ref = 0, .size = 5000 }, + { .kind = HEAP_OP_WRITE, .ref = 0, .offset = 4999, .value = 9 }, + { .kind = HEAP_OP_REALLOC, .ref = 1, .src = 0, .size = 6000 }, + }, + .expect = { + .same_ptr = { + { 0, 1 }, + }, + .refs = { + { .ref = 1, .large = true, .data = { { .offset = 4999, .value = 9 }, { .offset = 5999, .value = 0 } } }, + }, + .num_spans = 0, + .num_large = 1, + .bytes = { .used = 6000, .reserved = 4096 + 8192 }, + }, + }); +} + +UTEST_F(mem, heap_realloc_large_to_small_moves) { + run_heap_test(&ur, (heap_test_t){ + .ops = { + { .kind = HEAP_OP_ALLOC, .ref = 0, .size = 5000 }, + { .kind = HEAP_OP_WRITE, .ref = 0, .offset = 10, .value = 3 }, + { .kind = HEAP_OP_REALLOC, .ref = 1, .src = 0, .size = 64 }, + }, + .expect = { + .different_ptr = { + { 0, 1 }, + }, + .refs = { + { .ref = 1, .bucket = 64, .data = { { .offset = 10, .value = 3 } } }, + }, + .num_spans = 1, + .num_large = 0, + .recycled = 14, + .bytes = { .used = 64, .reserved = 4096 + SP_MEM_HEAP_SEGMENT_SIZE }, + }, + }); +} + +UTEST_F(mem, heap_realloc_null_acts_as_alloc) { + run_heap_test(&ur, (heap_test_t){ + .ops = { + { .kind = HEAP_OP_REALLOC, .ref = 0, .src = 31, .size = 64 }, + }, + .expect = { + .num_spans = 1, + .recycled = 14, + .bytes = { .used = 64, .reserved = 4096 + SP_MEM_HEAP_SEGMENT_SIZE }, + }, + }); +} + +UTEST_F(mem, heap_realloc_zero_frees) { + run_heap_test(&ur, (heap_test_t){ + .ops = { + { .kind = HEAP_OP_ALLOC, .ref = 0, .size = 64 }, + { .kind = HEAP_OP_REALLOC, .ref = 1, .src = 0, .size = 0 }, + }, + .expect = { + .num_spans = 0, + .num_large = 0, + .recycled = 15, + .bytes = { .used = 0, .reserved = 4096 + SP_MEM_HEAP_SEGMENT_SIZE }, + }, + }); +} + +UTEST_F(mem, heap_bytes_accounting) { + run_heap_test(&ur, (heap_test_t){ + .ops = { + { .kind = HEAP_OP_ALLOC, .ref = 0, .size = 64 }, + { .kind = HEAP_OP_ALLOC, .ref = 1, .size = 8192 }, + }, + .expect = { + .num_spans = 1, + .num_large = 1, + .recycled = 14, + .bytes = { .used = 64 + 8192, .reserved = 4096 + SP_MEM_HEAP_SEGMENT_SIZE + 12288 }, + }, + }); +} + +UTEST_F(mem, heap_drained_heap_retains_segment) { + run_heap_test(&ur, (heap_test_t){ + .ops = { + { .kind = HEAP_OP_ALLOC, .ref = 0, .size = 64 }, + { .kind = HEAP_OP_ALLOC, .ref = 1, .size = 8192 }, + { .kind = HEAP_OP_FREE, .ref = 0 }, + { .kind = HEAP_OP_FREE, .ref = 1 }, + }, + .expect = { + .num_spans = 0, + .recycled = 15, + .bytes = { .used = 0, .reserved = 4096 + SP_MEM_HEAP_SEGMENT_SIZE }, + }, + }); +} + +UTEST_F(mem, heap_exhausted_segment_grows_new_segment) { + run_heap_test(&ur, (heap_test_t){ + .ops = { + { .kind = HEAP_OP_ALLOC, .ref = 0, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 1, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 2, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 3, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 4, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 5, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 6, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 7, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 8, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 9, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 10, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 11, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 12, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 13, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 14, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 15, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 16, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 17, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 18, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 19, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 20, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 21, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 22, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 23, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 24, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 25, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 26, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 27, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 28, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 29, .size = 2016 }, + { .kind = HEAP_OP_ALLOC, .ref = 30, .size = 2016 }, + }, + .expect = { + .different_ptr = { + { 0, 30 }, + }, + .num_spans = 16, + .recycled = 14, + .bytes = { .used = 31 * 2016, .reserved = 4096 + 2 * SP_MEM_HEAP_SEGMENT_SIZE }, + }, + }); +} + +UTEST_F(mem, heap_realloc_huge_size_fails_cleanly) { + sp_mem_heap_t* heap = sp_mem_heap_new(); + u8* p = sp_ptr_cast(u8*, sp_mem_heap_alloc(heap, 5000)); + EXPECT_NE(p, SP_NULLPTR); + if (p) { + p[100] = 42; + EXPECT_EQ(sp_mem_heap_realloc(heap, p, SP_LIMIT_U64_MAX - 16), SP_NULLPTR); + EXPECT_EQ(p[100], 42u); + EXPECT_EQ(heap->bytes_used, (u64)5000); + sp_mem_heap_free(heap, p); + } + EXPECT_EQ(sp_mem_heap_alloc(heap, SP_LIMIT_U64_MAX - 16), SP_NULLPTR); + sp_mem_heap_destroy(heap); +} + +UTEST_F(mem, heap_null_heap_ops_are_noops) { + u8 byte = 7; + EXPECT_EQ(sp_mem_heap_alloc(SP_NULLPTR, 64), SP_NULLPTR); + EXPECT_EQ(sp_mem_heap_realloc(SP_NULLPTR, &byte, 64), SP_NULLPTR); + EXPECT_EQ(sp_mem_heap_find_span(SP_NULLPTR, &byte), SP_NULLPTR); + sp_mem_heap_free(SP_NULLPTR, &byte); + EXPECT_EQ(byte, 7u); + sp_mem_heap_destroy(SP_NULLPTR); +} + +UTEST_F(mem, heap_as_allocator_routes_through_sp_alloc) { + sp_mem_heap_t* heap = sp_mem_heap_new(); + sp_mem_t mem = sp_mem_heap_as_allocator(heap); + + u8* p = sp_ptr_cast(u8*, sp_alloc(mem, 64)); + EXPECT_NE(p, SP_NULLPTR); + EXPECT_EQ((uintptr_t)p & (SP_MEM_ALIGNMENT - 1), 0u); + + u8* q = sp_ptr_cast(u8*, sp_realloc(mem, p, 128)); + EXPECT_NE(q, SP_NULLPTR); + + sp_free(mem, q); + sp_mem_heap_destroy(heap); +} diff --git a/test/mem/mem.h b/test/mem/mem.h index 2f73dee..8710174 100644 --- a/test/mem/mem.h +++ b/test/mem/mem.h @@ -9,7 +9,7 @@ #define SP_MEM_ALIGNMENT 16 #endif -#define EXPECT_ALIGNED(ptr) EXPECT_EQ(sp_align_up(ptr, SP_MEM_ALIGNMENT), ptr) +#define EXPECT_ALIGNED(ptr) EXPECT_EQ(sp_align_up(ptr, SP_MEM_ALIGNMENT), sp_uptr(ptr)) struct mem { u8 placeholder; diff --git a/test/tools/table.h b/test/tools/table.h new file mode 100644 index 0000000..77214ac --- /dev/null +++ b/test/tools/table.h @@ -0,0 +1,182 @@ +#ifndef SP_TABLE_H +#define SP_TABLE_H + +#include "sp.h" + +typedef struct { + sp_str_t header; + const c8* fmt; + sp_fmt_align_t align; +} sp_table_col_t; + +typedef struct { + sp_str_t value; + const c8* color; +} sp_table_cell_t; + +typedef struct { + sp_mem_t mem; + sp_da(sp_table_col_t) cols; + sp_da(sp_table_cell_t) cells; + const c8* color; +} sp_table_writer_t; + +SP_API void sp_table_init(sp_table_writer_t* table, sp_mem_t mem); +SP_API void sp_table_add_col(sp_table_writer_t* table, sp_table_col_t col); +SP_API void sp_table_begin(sp_table_writer_t* table); +SP_API void sp_table_color(sp_table_writer_t* table, const c8* ansi); +SP_API void sp_table_write_str(sp_table_writer_t* table, sp_str_t value); +SP_API void sp_table_write_cstr(sp_table_writer_t* table, const c8* value); +SP_API void sp_table_write_u64(sp_table_writer_t* table, u64 value); +SP_API void sp_table_write_s64(sp_table_writer_t* table, s64 value); +SP_API void sp_table_write_u32(sp_table_writer_t* table, u32 value); +SP_API void sp_table_write_s32(sp_table_writer_t* table, s32 value); +SP_API void sp_table_write_f64(sp_table_writer_t* table, f64 value); +SP_API void sp_table_write_f32(sp_table_writer_t* table, f32 value); +SP_API sp_str_t sp_table_render(sp_table_writer_t* table, sp_mem_t mem); +SP_API void sp_table_log(sp_table_writer_t* table); + +#endif + +#if defined(SP_TABLE_IMPLEMENTATION) && !defined(SP_TABLE_C) +#define SP_TABLE_C + +void sp_table_init(sp_table_writer_t* table, sp_mem_t mem) { + sp_mem_zero(table, sizeof(*table)); + table->mem = mem; + sp_da_init(mem, table->cols); + sp_da_init(mem, table->cells); +} + +void sp_table_add_col(sp_table_writer_t* table, sp_table_col_t col) { + SP_ASSERT(sp_da_empty(table->cells)); + if (!col.fmt) col.fmt = "{}"; + sp_da_push(table->cols, col); +} + +void sp_table_begin(sp_table_writer_t* table) { + SP_ASSERT(!sp_da_empty(table->cols)); + SP_ASSERT(sp_da_size(table->cells) % sp_da_size(table->cols) == 0); +} + +static const sp_table_col_t* sp_table_current_col(sp_table_writer_t* table) { + return &table->cols[sp_da_size(table->cells) % sp_da_size(table->cols)]; +} + +void sp_table_color(sp_table_writer_t* table, const c8* ansi) { + table->color = ansi; +} + +static void sp_table_push_cell(sp_table_writer_t* table, sp_str_t value) { + sp_table_cell_t cell = { + .value = value, + .color = table->color, + }; + sp_da_push(table->cells, cell); + table->color = SP_NULLPTR; +} + +void sp_table_write_str(sp_table_writer_t* table, sp_str_t value) { + sp_table_push_cell(table, sp_str_copy(table->mem, value)); +} + +void sp_table_write_cstr(sp_table_writer_t* table, const c8* value) { + sp_table_write_str(table, sp_str_view(value)); +} + +void sp_table_write_u64(sp_table_writer_t* table, u64 value) { + sp_table_push_cell(table, sp_fmt(table->mem, sp_table_current_col(table)->fmt, sp_fmt_uint(value)).value); +} + +void sp_table_write_s64(sp_table_writer_t* table, s64 value) { + sp_table_push_cell(table, sp_fmt(table->mem, sp_table_current_col(table)->fmt, sp_fmt_int(value)).value); +} + +void sp_table_write_u32(sp_table_writer_t* table, u32 value) { + sp_table_write_u64(table, (u64)value); +} + +void sp_table_write_s32(sp_table_writer_t* table, s32 value) { + sp_table_write_s64(table, (s64)value); +} + +void sp_table_write_f64(sp_table_writer_t* table, f64 value) { + sp_table_push_cell(table, sp_fmt(table->mem, sp_table_current_col(table)->fmt, sp_fmt_float(value)).value); +} + +void sp_table_write_f32(sp_table_writer_t* table, f32 value) { + sp_table_write_f64(table, (f64)value); +} + +static const c8* sp_table_pad_fmt(sp_fmt_align_t align) { + switch (align) { + case SP_FMT_ALIGN_NONE: return "{:<$}"; + case SP_FMT_ALIGN_LEFT: return "{:<$}"; + case SP_FMT_ALIGN_CENTER: return "{:^$}"; + case SP_FMT_ALIGN_RIGHT: return "{:>$}"; + } + return "{:<$}"; +} + +static void sp_table_render_cell(sp_io_writer_t* io, const sp_table_col_t* col, sp_table_cell_t cell, u32 width, bool last) { + if (cell.color) sp_io_write_cstr(io, cell.color, SP_NULLPTR); + if (last && (col->align == SP_FMT_ALIGN_NONE || col->align == SP_FMT_ALIGN_LEFT)) { + sp_io_write_str(io, cell.value, SP_NULLPTR); + } + else { + sp_fmt_io(io, sp_table_pad_fmt(col->align), sp_fmt_uint(width), sp_fmt_str(cell.value)); + } + if (cell.color) sp_io_write_cstr(io, SP_ANSI_RESET, SP_NULLPTR); +} + +sp_str_t sp_table_render(sp_table_writer_t* table, sp_mem_t mem) { + u32 num_cols = (u32)sp_da_size(table->cols); + SP_ASSERT(num_cols); + SP_ASSERT(sp_da_size(table->cells) % num_cols == 0); + u32 num_rows = (u32)(sp_da_size(table->cells) / num_cols); + + u32* widths = sp_alloc_n(mem, u32, num_cols); + sp_for(col, num_cols) { + widths[col] = table->cols[col].header.len; + } + sp_da_for(table->cells, it) { + u32 col = (u32)(it % num_cols); + widths[col] = sp_max(widths[col], table->cells[it].value.len); + } + + sp_io_dyn_mem_writer_t builder = sp_zero; + sp_io_dyn_mem_writer_init(mem, &builder); + + sp_io_write_cstr(&builder.base, SP_ANSI_FG_BRIGHT_BLACK, SP_NULLPTR); + sp_for(col, num_cols) { + if (col) sp_io_write_cstr(&builder.base, " ", SP_NULLPTR); + sp_table_cell_t header = { .value = table->cols[col].header }; + sp_table_render_cell(&builder.base, &table->cols[col], header, widths[col], col == num_cols - 1); + } + sp_io_write_cstr(&builder.base, SP_ANSI_RESET, SP_NULLPTR); + // sp_io_write_c8(&builder.base, '\n'); + // sp_for(col, num_cols) { + // if (col) sp_io_write_cstr(&builder.base, " ", SP_NULLPTR); + // sp_for(it, widths[col]) { + // sp_io_write_c8(&builder.base, '-'); + // } + // } + + sp_for(row, num_rows) { + sp_io_write_c8(&builder.base, '\n'); + sp_for(col, num_cols) { + if (col) sp_io_write_cstr(&builder.base, " ", SP_NULLPTR); + sp_table_render_cell(&builder.base, &table->cols[col], table->cells[row * num_cols + col], widths[col], col == num_cols - 1); + } + } + + return sp_io_dyn_mem_writer_as_str(&builder); +} + +void sp_table_log(sp_table_writer_t* table) { + sp_mem_arena_marker_t scratch = sp_mem_begin_scratch(); + sp_log("{}", sp_fmt_str(sp_table_render(table, scratch.mem))); + sp_mem_end_scratch(scratch); +} + +#endif diff --git a/test/bench/ht.c b/tools/wip/bench/ht.c similarity index 100% rename from test/bench/ht.c rename to tools/wip/bench/ht.c diff --git a/test/bench/stb_ds.h b/tools/wip/bench/stb_ds.h similarity index 100% rename from test/bench/stb_ds.h rename to tools/wip/bench/stb_ds.h