Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(profiling): start porting memalloc to C++ #12519

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <vector>

#define PY_SSIZE_T_CLEAN
#include <Python.h>
Expand All @@ -10,9 +11,8 @@
#include "_memalloc_reentrant.h"
#include "_memalloc_tb.h"
#include "_pymacro.h"
#include "_utils.h"

typedef struct
struct memalloc_context_t
{
PyMemAllocatorEx pymem_allocator_obj;
/* The domain we are tracking */
Expand All @@ -21,7 +21,7 @@ typedef struct
uint16_t max_events;
/* The maximum number of frames collected in stack traces */
uint16_t max_nframe;
} memalloc_context_t;
};

/* We only support being started once, so we use a global context for the whole
module. If we ever want to be started multiple twice, we'd need a more
Expand All @@ -30,13 +30,13 @@ typedef struct
static memalloc_context_t global_memalloc_ctx;

/* Allocation tracker */
typedef struct
struct alloc_tracker_t
{
/* List of traceback */
traceback_array_t allocs;
/* Total number of allocations */
/* List of sampled allocations */
std::vector<traceback_t*> allocs;
/* Total number of allocations (sampled or not) */
uint64_t alloc_count;
} alloc_tracker_t;
};

/* A string containing "object" */
static PyObject* object_string = NULL;
Expand Down Expand Up @@ -118,6 +118,13 @@ memalloc_assert_gil()
}
}

static inline uint64_t
random_range(uint64_t max)
{
/* Return a random number between [0, max[ */
return (uint64_t)((double)rand() / ((double)RAND_MAX + 1) * max);
}

static void
memalloc_add_event(memalloc_context_t* ctx, void* ptr, size_t size)
{
Expand All @@ -142,26 +149,26 @@ memalloc_add_event(memalloc_context_t* ctx, void* ptr, size_t size)
}

/* Determine if we can capture or if we need to sample */
if (global_alloc_tracker->allocs.count < ctx->max_events) {
if (global_alloc_tracker->allocs.size() < ctx->max_events) {
/* Buffer is not full, fill it */
traceback_t* tb = memalloc_get_traceback(ctx->max_nframe, ptr, size, ctx->domain);
if (tb) {
traceback_array_append(&global_alloc_tracker->allocs, tb);
global_alloc_tracker->allocs.push_back(tb);
}
} else {
/* Sampling mode using a reservoir sampling algorithm: replace a random
* traceback with this one */
uint64_t r = random_range(alloc_count);

// In addition to event size, need to check that the tab is in a good state
if (r < ctx->max_events && global_alloc_tracker->allocs.tab != NULL) {
if (r < ctx->max_events) { // && global_alloc_tracker->allocs.tab != NULL) {
/* Replace a random traceback with this one */
traceback_t* tb = memalloc_get_traceback(ctx->max_nframe, ptr, size, ctx->domain);

// Need to check not only that the tb returned
if (tb) {
traceback_free(global_alloc_tracker->allocs.tab[r]);
global_alloc_tracker->allocs.tab[r] = tb;
traceback_free(global_alloc_tracker->allocs[r]);
global_alloc_tracker->allocs[r] = tb;
}
}
}
Expand Down Expand Up @@ -232,17 +239,17 @@ memalloc_realloc(void* ctx, void* ptr, size_t new_size)
static alloc_tracker_t*
alloc_tracker_new()
{
alloc_tracker_t* alloc_tracker = PyMem_RawMalloc(sizeof(alloc_tracker_t));
alloc_tracker->alloc_count = 0;
traceback_array_init(&alloc_tracker->allocs);
alloc_tracker_t* alloc_tracker = new alloc_tracker_t;
return alloc_tracker;
}

static void
alloc_tracker_free(alloc_tracker_t* alloc_tracker)
{
traceback_array_wipe(&alloc_tracker->allocs);
PyMem_RawFree(alloc_tracker);
for (auto tb : alloc_tracker->allocs) {
traceback_free(tb);
}
delete alloc_tracker;
}

PyDoc_STRVAR(memalloc_start__doc__,
Expand Down Expand Up @@ -369,11 +376,11 @@ memalloc_heap_py(PyObject* Py_UNUSED(module), PyObject* Py_UNUSED(args))
return memalloc_heap();
}

typedef struct
struct IterEventsState
{
PyObject_HEAD alloc_tracker_t* alloc_tracker;
uint32_t seq_index;
} IterEventsState;
};

PyDoc_STRVAR(iterevents__doc__,
"iter_events()\n"
Expand Down Expand Up @@ -420,7 +427,7 @@ iterevents_new(PyTypeObject* type, PyObject* Py_UNUSED(args), PyObject* Py_UNUSE

PyObject* iter_and_count = PyTuple_New(3);
PyTuple_SET_ITEM(iter_and_count, 0, (PyObject*)iestate);
PyTuple_SET_ITEM(iter_and_count, 1, PyLong_FromUnsignedLong(iestate->alloc_tracker->allocs.count));
PyTuple_SET_ITEM(iter_and_count, 1, PyLong_FromUnsignedLong(iestate->alloc_tracker->allocs.size()));
PyTuple_SET_ITEM(iter_and_count, 2, PyLong_FromUnsignedLongLong(iestate->alloc_tracker->alloc_count));

return iter_and_count;
Expand All @@ -439,8 +446,8 @@ iterevents_dealloc(IterEventsState* iestate)
static PyObject*
iterevents_next(IterEventsState* iestate)
{
if (iestate->seq_index < iestate->alloc_tracker->allocs.count) {
traceback_t* tb = iestate->alloc_tracker->allocs.tab[iestate->seq_index];
if (iestate->seq_index < iestate->alloc_tracker->allocs.size()) {
traceback_t* tb = iestate->alloc_tracker->allocs[iestate->seq_index];
iestate->seq_index++;

PyObject* tb_size_domain = PyTuple_New(3);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,31 +1,32 @@
#include <math.h>
#include <stdlib.h>
#include <vector>

#define PY_SSIZE_T_CLEAN
#include "_memalloc_debug.h"
#include "_memalloc_heap.h"
#include "_memalloc_reentrant.h"
#include "_memalloc_tb.h"

typedef struct
struct heap_tracker_t
{
/* Granularity of the heap profiler in bytes */
uint64_t sample_size;
/* Current sample size of the heap profiler in bytes */
uint64_t current_sample_size;
/* Tracked allocations */
traceback_array_t allocs;
std::vector<traceback_t*> allocs;
/* Allocated memory counter in bytes */
uint64_t allocated_memory;
/* True if the heap tracker is frozen */
bool frozen;
/* Contains the ongoing heap allocation/deallocation while frozen */
struct
{
traceback_array_t allocs;
ptr_array_t frees;
std::vector<traceback_t*> allocs;
std::vector<void*> frees;
} freezer;
} heap_tracker_t;
};

static char g_crash_on_mutex_pass_str[] = "_DD_PROFILING_MEMHEAP_CRASH_ON_MUTEX_PASS";
static memlock_t g_memheap_lock;
Expand Down Expand Up @@ -88,9 +89,7 @@ heap_tracker_next_sample_size(uint32_t sample_size)
static void
heap_tracker_init(heap_tracker_t* heap_tracker)
{
traceback_array_init(&heap_tracker->allocs);
traceback_array_init(&heap_tracker->freezer.allocs);
ptr_array_init(&heap_tracker->freezer.frees);
// vectors are already initialized
heap_tracker->allocated_memory = 0;
heap_tracker->frozen = false;
heap_tracker->sample_size = 0;
Expand All @@ -100,9 +99,15 @@ heap_tracker_init(heap_tracker_t* heap_tracker)
static void
heap_tracker_wipe(heap_tracker_t* heap_tracker)
{
traceback_array_wipe(&heap_tracker->allocs);
traceback_array_wipe(&heap_tracker->freezer.allocs);
ptr_array_wipe(&heap_tracker->freezer.frees);
for (auto tb : heap_tracker->allocs) {
traceback_free(tb);
}
heap_tracker->allocs.clear();
for (auto tb : heap_tracker->freezer.allocs) {
traceback_free(tb);
}
heap_tracker->freezer.allocs.clear();
heap_tracker->freezer.frees.clear();
}

static void
Expand All @@ -125,13 +130,13 @@ heap_tracker_untrack_thawed(heap_tracker_t* heap_tracker, void* ptr)
of the time this is where the untracked ptr is (the most recent object
get de-allocated first usually). This might be a good enough
trade-off. */
for (TRACEBACK_ARRAY_COUNT_TYPE i = heap_tracker->allocs.count; i > 0; i--) {
traceback_t** tb = &heap_tracker->allocs.tab[i - 1];

if (ptr == (*tb)->ptr) {
for (auto it = heap_tracker->allocs.rbegin(); it != heap_tracker->allocs.rend(); it++) {
if (ptr == (*it)->ptr) {
/* Free the traceback */
traceback_free(*tb);
traceback_array_remove(&heap_tracker->allocs, tb);
traceback_free(*it);
/* it.base() would return the "base" index prior to this element,
so (++it).base() gives us the address of this element */
heap_tracker->allocs.erase((++it).base());
break;
}
}
Expand All @@ -140,23 +145,19 @@ heap_tracker_untrack_thawed(heap_tracker_t* heap_tracker, void* ptr)
static void
heap_tracker_thaw(heap_tracker_t* heap_tracker)
{
/* Add the frozen allocs at the end */
traceback_array_splice(&heap_tracker->allocs,
heap_tracker->allocs.count,
0,
heap_tracker->freezer.allocs.tab,
heap_tracker->freezer.allocs.count);
heap_tracker->allocs.insert(
heap_tracker->allocs.end(), heap_tracker->freezer.allocs.begin(), heap_tracker->freezer.allocs.end());

/* Handle the frees: we need to handle the frees after we merge the allocs
array together to be sure that there's no free in the freezer matching
an alloc that is also in the freezer; heap_tracker_untrack_thawed does
not care about the freezer, by definition. */
for (MEMALLOC_HEAP_PTR_ARRAY_COUNT_TYPE i = 0; i < heap_tracker->freezer.frees.count; i++)
heap_tracker_untrack_thawed(heap_tracker, heap_tracker->freezer.frees.tab[i]);
for (auto ptr : heap_tracker->freezer.frees)
heap_tracker_untrack_thawed(heap_tracker, ptr);

/* Reset the count to zero so we can reused the array and overwrite previous values */
heap_tracker->freezer.allocs.count = 0;
heap_tracker->freezer.frees.count = 0;
heap_tracker->freezer.allocs.clear();
heap_tracker->freezer.frees.clear();

heap_tracker->frozen = false;
}
Expand Down Expand Up @@ -197,8 +198,8 @@ memalloc_heap_untrack(void* ptr)
can do since reporting an error is not an option here. What's gonna
free more than 2^64 pointers anyway?!
*/
if (global_heap_tracker.freezer.frees.count < MEMALLOC_HEAP_PTR_ARRAY_MAX_COUNT)
ptr_array_append(&global_heap_tracker.freezer.frees, ptr);
if (global_heap_tracker.freezer.frees.size() < MEMALLOC_HEAP_PTR_ARRAY_MAX_COUNT)
global_heap_tracker.freezer.frees.push_back(ptr);
} else
heap_tracker_untrack_thawed(&global_heap_tracker, ptr);

Expand Down Expand Up @@ -234,7 +235,7 @@ memalloc_heap_track(uint16_t max_nframe, void* ptr, size_t size, PyMemAllocatorD
/* Check if we can add more samples: the sum of the freezer + alloc tracker
cannot be greater than what the alloc tracker can handle: when the alloc
tracker is thawed, all the allocs in the freezer will be moved there!*/
if (global_heap_tracker.freezer.allocs.count + global_heap_tracker.allocs.count >= TRACEBACK_ARRAY_MAX_COUNT) {
if (global_heap_tracker.freezer.allocs.size() + global_heap_tracker.allocs.size() >= TRACEBACK_ARRAY_MAX_COUNT) {
memlock_unlock(&g_memheap_lock);
return false;
}
Expand All @@ -248,9 +249,9 @@ memalloc_heap_track(uint16_t max_nframe, void* ptr, size_t size, PyMemAllocatorD
traceback_t* tb = memalloc_get_traceback(max_nframe, ptr, global_heap_tracker.allocated_memory, domain);
if (tb) {
if (global_heap_tracker.frozen)
traceback_array_append(&global_heap_tracker.freezer.allocs, tb);
global_heap_tracker.freezer.allocs.push_back(tb);
else
traceback_array_append(&global_heap_tracker.allocs, tb);
global_heap_tracker.allocs.push_back(tb);

/* Reset the counter to 0 */
global_heap_tracker.allocated_memory = 0;
Expand All @@ -277,11 +278,10 @@ memalloc_heap()

heap_tracker_freeze(&global_heap_tracker);

PyObject* heap_list = PyList_New(global_heap_tracker.allocs.count);

for (TRACEBACK_ARRAY_COUNT_TYPE i = 0; i < global_heap_tracker.allocs.count; i++) {
traceback_t* tb = global_heap_tracker.allocs.tab[i];
PyObject* heap_list = PyList_New(global_heap_tracker.allocs.size());

for (size_t i = 0; i < global_heap_tracker.allocs.size(); i++) {
auto tb = global_heap_tracker.allocs[i];
PyObject* tb_and_size = PyTuple_New(2);
PyTuple_SET_ITEM(tb_and_size, 0, traceback_to_tuple(tb));
PyTuple_SET_ITEM(tb_and_size, 1, PyLong_FromSize_t(tb->size));
Expand Down
3 changes: 0 additions & 3 deletions ddtrace/profiling/collector/_memalloc_heap.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@

#include <Python.h>

#include "_utils.h"

/* The maximum heap sample size is the maximum value we can store in a heap_tracker_t.allocated_memory */
#define MAX_HEAP_SAMPLE_SIZE UINT32_MAX

Expand All @@ -27,6 +25,5 @@ memalloc_heap_untrack(void* ptr);

#define MEMALLOC_HEAP_PTR_ARRAY_COUNT_TYPE uint64_t
#define MEMALLOC_HEAP_PTR_ARRAY_MAX_COUNT UINT64_MAX
DO_ARRAY(void*, ptr, MEMALLOC_HEAP_PTR_ARRAY_COUNT_TYPE, DO_NOTHING)

#endif
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ memalloc_tb_init(uint16_t max_nframe)

/* Allocate a buffer that can handle the largest traceback possible.
This will be used a temporary buffer when converting stack traces. */
traceback_buffer = PyMem_RawMalloc(TRACEBACK_SIZE(max_nframe));
traceback_buffer = static_cast<traceback_t*>(PyMem_RawMalloc(TRACEBACK_SIZE(max_nframe)));

if (traceback_buffer == NULL)
return -1;
Expand All @@ -81,7 +81,7 @@ memalloc_tb_init(uint16_t max_nframe)
void
memalloc_tb_deinit(void)
{
PyMem_RawFree(traceback_buffer);
PyMem_RawFree(static_cast<void*>(traceback_buffer));
}

void
Expand All @@ -94,7 +94,7 @@ traceback_free(traceback_t* tb)
Py_DECREF(tb->frames[nframe].filename);
Py_DECREF(tb->frames[nframe].name);
}
PyMem_RawFree(tb);
PyMem_RawFree(static_cast<void*>(tb));
}

/* Convert PyFrameObject to a frame_t that we can store in memory */
Expand Down Expand Up @@ -167,8 +167,7 @@ memalloc_frame_to_traceback(PyFrameObject* pyframe, uint16_t max_nframe)
}

size_t traceback_size = TRACEBACK_SIZE(traceback_buffer->nframe);
traceback_t* traceback = PyMem_RawMalloc(traceback_size);

traceback_t* traceback = static_cast<traceback_t*>(PyMem_RawMalloc(traceback_size));
if (traceback)
memcpy(traceback, traceback_buffer, traceback_size);

Expand Down
Loading