Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 61 additions & 1 deletion deps/toptalk/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ TEST_WINDOW = test-tcp-window
TEST_VIDEO = test-video-detect
TEST_RTSP = test-rtsp-tap

# Benchmark targets
BENCH_DECODE = bench-decode
BENCH_MALLOC = bench-malloc
BENCH_ROTATION = bench-rotation
BENCH_SORT = bench-sort

SRC = \
decode.c \
intervals.c \
Expand All @@ -25,7 +31,8 @@ HEADERS = \
tcp_rtt.h \
tcp_window.h \
video_detect.h \
video_metrics.h
video_metrics.h \
bench_common.h

ifndef INTERVAL_COUNT
INTERVAL_COUNT = 8
Expand Down Expand Up @@ -109,6 +116,58 @@ $(TEST_RTSP): $(LIB) test_rtsp_tap.c
@echo Building $(TEST_RTSP)
$(CC) -o $(TEST_RTSP) test_rtsp_tap.c $(LIB) $(LDLIBS) $(LDFLAGS) $(CFLAGS)

# Benchmark targets - built without sanitizers for accurate timing
# These need a clean library build without ASAN
BENCH_CFLAGS := -g -O2 -Wall -pedantic -std=c11 $(DEFINES) -fPIC -fno-omit-frame-pointer
BENCH_LDFLAGS := -lrt -lpthread $(PKGCONFIG_PCAP) $(PKGCONFIG_CURSES)

# Library built without sanitizers for benchmarks
BENCH_LIB = toptalk-bench.a

$(BENCH_LIB): $(SRC) $(HEADERS) Makefile
@echo Building $(BENCH_LIB) without sanitizers
$(CC) -c $(SRC) $(BENCH_CFLAGS)
gcc-ar cr $(BENCH_LIB) *.o
@echo -e "$(BENCH_LIB) OK\n"

$(BENCH_DECODE): $(BENCH_LIB) bench_decode.c bench_common.c bench_common.h
@echo Building $(BENCH_DECODE)
$(CC) -o $(BENCH_DECODE) bench_decode.c bench_common.c $(BENCH_LIB) $(LDLIBS) $(BENCH_LDFLAGS) $(BENCH_CFLAGS)

$(BENCH_MALLOC): $(BENCH_LIB) bench_malloc.c bench_common.c bench_common.h
@echo Building $(BENCH_MALLOC)
$(CC) -o $(BENCH_MALLOC) bench_malloc.c bench_common.c $(BENCH_LIB) $(LDLIBS) $(BENCH_LDFLAGS) $(BENCH_CFLAGS)

$(BENCH_ROTATION): $(BENCH_LIB) bench_rotation.c bench_common.c bench_common.h
@echo Building $(BENCH_ROTATION)
$(CC) -o $(BENCH_ROTATION) bench_rotation.c bench_common.c $(BENCH_LIB) $(LDLIBS) $(BENCH_LDFLAGS) $(BENCH_CFLAGS)

$(BENCH_SORT): $(BENCH_LIB) bench_sort.c bench_common.c bench_common.h
@echo Building $(BENCH_SORT)
$(CC) -o $(BENCH_SORT) bench_sort.c bench_common.c $(BENCH_LIB) $(LDLIBS) $(BENCH_LDFLAGS) $(BENCH_CFLAGS)

BENCH_REGRESSION = bench-regression

$(BENCH_REGRESSION): $(BENCH_LIB) bench_regression.c bench_common.c bench_common.h
@echo Building $(BENCH_REGRESSION)
$(CC) -o $(BENCH_REGRESSION) bench_regression.c bench_common.c timeywimey.c $(BENCH_LIB) $(LDLIBS) $(BENCH_LDFLAGS) $(BENCH_CFLAGS)

.PHONY: bench
bench: $(BENCH_DECODE) $(BENCH_MALLOC) $(BENCH_ROTATION) $(BENCH_SORT)
@echo "Running decode benchmark..."
@./$(BENCH_DECODE)
@echo "Running malloc benchmark..."
@./$(BENCH_MALLOC)
@echo "Running rotation benchmark..."
@./$(BENCH_ROTATION)
@echo "Running sort benchmark..."
@./$(BENCH_SORT)

.PHONY: bench-test
bench-test: $(BENCH_REGRESSION)
@echo "Running performance regression tests..."
@./$(BENCH_REGRESSION)

.PHONY: test
test: $(TEST) $(TEST_RTT) $(TEST_WINDOW) $(TEST_VIDEO) $(TEST_RTSP)
@echo "Running RTT unit tests (no root required)..."
Expand All @@ -133,4 +192,5 @@ clang-analyze: clean
.PHONY: clean
clean:
rm $(LIB) $(PROG) $(TEST) $(TEST_RTT) $(TEST_WINDOW) $(TEST_VIDEO) $(TEST_RTSP) *.o *.a || true
rm $(BENCH_DECODE) $(BENCH_MALLOC) $(BENCH_ROTATION) $(BENCH_SORT) $(BENCH_REGRESSION) || true
rm *.gcno *.gcov *.gcda || true
90 changes: 90 additions & 0 deletions deps/toptalk/bench_common.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* bench_common.c - Benchmark utilities implementation
*/

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#include "bench_common.h"

/* Get CPU frequency from /proc/cpuinfo (Linux) */
uint64_t bench_get_cpu_freq(void)
{
FILE *f = fopen("/proc/cpuinfo", "r");
if (!f) {
/* Fallback: assume 3 GHz */
return 3000000000ULL;
}

char line[256];
double mhz = 0.0;

while (fgets(line, sizeof(line), f)) {
if (strncmp(line, "cpu MHz", 7) == 0) {
char *colon = strchr(line, ':');
if (colon) {
mhz = atof(colon + 1);
break;
}
}
}
fclose(f);

if (mhz < 100.0) {
/* Fallback: assume 3 GHz */
return 3000000000ULL;
}

return (uint64_t)(mhz * 1e6);
}

void bench_run(const char *name,
void (*fn)(void *arg),
void *arg,
uint64_t iterations,
struct bench_result *result)
{
uint64_t freq = bench_get_cpu_freq();
uint64_t start, end, total = 0;

/* Warm up - run a few iterations to prime caches */
for (uint64_t i = 0; i < 100 && i < iterations; i++) {
fn(arg);
}

/* Timed run */
start = bench_start();
for (uint64_t i = 0; i < iterations; i++) {
fn(arg);
}
end = bench_end();
total = bench_cycles(start, end);

/* Fill in results */
result->name = name;
result->iterations = iterations;
result->total_cycles = total;
result->cycles_per_op = (double)total / (double)iterations;
result->ns_per_op = bench_cycles_to_ns(total, freq) / (double)iterations;
}

void bench_report_header(void)
{
printf("\n%-40s %12s %12s %12s\n",
"Benchmark", "Iterations", "Cycles/op", "ns/op");
printf("%-40s %12s %12s %12s\n",
"----------------------------------------",
"------------", "------------", "------------");
}

void bench_report(const struct bench_result *result)
{
printf("%-40s %12lu %12.1f %12.1f\n",
result->name,
result->iterations,
result->cycles_per_op,
result->ns_per_op);
}
122 changes: 122 additions & 0 deletions deps/toptalk/bench_common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/*
* bench_common.h - Benchmark utilities for performance measurement
*
* Provides:
* - Cycle-accurate timing using rdtsc (x86_64)
* - Wall-clock timing utilities
* - Benchmark runner and reporting
*/

#ifndef BENCH_COMMON_H
#define BENCH_COMMON_H

#include <stdint.h>
#include <stdio.h>

/* Benchmark result structure */
struct bench_result {
const char *name;
uint64_t iterations;
uint64_t total_cycles;
double cycles_per_op;
double ns_per_op;
};

/*
* Read CPU timestamp counter (x86_64).
* Returns current cycle count. Use bench_cycles() to compute elapsed.
*/
static inline uint64_t bench_start(void)
{
uint32_t lo, hi;
/* Serialize to ensure timing is accurate */
__asm__ volatile (
"cpuid\n\t"
"rdtsc\n\t"
: "=a" (lo), "=d" (hi)
: "a" (0)
: "rbx", "rcx"
);
return ((uint64_t)hi << 32) | lo;
}

/*
* Read timestamp counter at end of measurement.
* Uses rdtscp for better serialization on modern CPUs.
*/
static inline uint64_t bench_end(void)
{
uint32_t lo, hi;
__asm__ volatile (
"rdtscp\n\t"
"mov %%eax, %0\n\t"
"mov %%edx, %1\n\t"
"cpuid\n\t"
: "=r" (lo), "=r" (hi)
:
: "rax", "rbx", "rcx", "rdx"
);
return ((uint64_t)hi << 32) | lo;
}

/*
* Compute elapsed cycles between start and end.
*/
static inline uint64_t bench_cycles(uint64_t start, uint64_t end)
{
return end - start;
}

/*
* Get approximate CPU frequency in Hz.
* Uses /proc/cpuinfo on Linux.
*/
uint64_t bench_get_cpu_freq(void);

/*
* Convert cycles to nanoseconds given CPU frequency.
*/
static inline double bench_cycles_to_ns(uint64_t cycles, uint64_t freq_hz)
{
return (double)cycles * 1e9 / (double)freq_hz;
}

/*
* Run a benchmark function multiple times and collect statistics.
*
* name: Benchmark name for reporting
* fn: Function to benchmark (called with arg)
* arg: Argument passed to fn
* iterations: Number of times to call fn
* result: Output benchmark results
*/
void bench_run(const char *name,
void (*fn)(void *arg),
void *arg,
uint64_t iterations,
struct bench_result *result);

/*
* Print benchmark results in a formatted table.
*/
void bench_report(const struct bench_result *result);

/*
* Print header for benchmark report table.
*/
void bench_report_header(void);

/*
* Prevent compiler from optimizing away a value.
* Use to ensure benchmark results are "used".
*/
#define BENCH_DONT_OPTIMIZE(val) \
__asm__ volatile ("" : : "r,m" (val) : "memory")

/*
* Memory barrier to prevent reordering.
*/
#define BENCH_BARRIER() \
__asm__ volatile ("" ::: "memory")

#endif /* BENCH_COMMON_H */
Loading
Loading