Skip to content

Commit daea1fe

Browse files
committed
range-diff: add configurable memory limit for cost matrix
When comparing large commit ranges (e.g., 250,000+ commits), range-diff attempts to allocate an n×n cost matrix that can exhaust available memory. For example, with 256,784 commits (n = 513,568), the matrix would require approximately 256GB of memory (513,568² × 4 bytes), causing either immediate segmentation faults due to integer overflow or system hangs. Add a memory limit check in get_correspondences() before allocating the cost matrix. This check uses the total size in bytes (n² × sizeof(int)) and compares it against a configurable maximum, preventing both excessive memory usage and integer overflow issues. The limit is configurable via a new --max-memory option that accepts human-readable sizes (e.g., "1G", "500M"). The default is 4GB, which allows comparing ranges of approximately 32,000 commits - generous for real-world use cases while preventing impractical operations. When the limit is exceeded, range-diff now displays a clear error message showing both the requested memory size and the maximum allowed, formatted in human-readable units for better user experience. Example usage: git range-diff --max-memory=1G branch1...branch2 git range-diff --max-memory=500M base..topic1 base..topic2 This approach was chosen over alternatives: - Pre-counting commits: Would require spawning additional git processes and reading all commits twice - Limiting by commit count: Less precise than actual memory usage - Streaming approach: Would require significant refactoring of the current algorithm This issue was previously discussed in: https://lore.kernel.org/git/[email protected]/ Signed-off-by: Paulo Casaretto <[email protected]>
1 parent c44beea commit daea1fe

File tree

3 files changed

+50
-10
lines changed

3 files changed

+50
-10
lines changed

builtin/range-diff.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "parse-options.h"
77
#include "range-diff.h"
88
#include "config.h"
9+
#include "parse.h"
910

1011

1112
static const char * const builtin_range_diff_usage[] = {
@@ -15,6 +16,23 @@ N_("git range-diff [<options>] <base> <old-tip> <new-tip>"),
1516
NULL
1617
};
1718

19+
static int parse_max_memory(const struct option *opt, const char *arg, int unset)
20+
{
21+
size_t *max_memory = opt->value;
22+
uintmax_t val;
23+
24+
if (unset) {
25+
*max_memory = 0;
26+
return 0;
27+
}
28+
29+
if (!git_parse_unsigned(arg, &val, SIZE_MAX))
30+
return error(_("invalid max-memory value: %s"), arg);
31+
32+
*max_memory = (size_t)val;
33+
return 0;
34+
}
35+
1836
int cmd_range_diff(int argc,
1937
const char **argv,
2038
const char *prefix,
@@ -25,6 +43,7 @@ int cmd_range_diff(int argc,
2543
struct strvec diff_merges_arg = STRVEC_INIT;
2644
struct range_diff_options range_diff_opts = {
2745
.creation_factor = RANGE_DIFF_CREATION_FACTOR_DEFAULT,
46+
.max_memory = RANGE_DIFF_MAX_MEMORY_DEFAULT,
2847
.diffopt = &diffopt,
2948
.other_arg = &other_arg
3049
};
@@ -33,6 +52,10 @@ int cmd_range_diff(int argc,
3352
OPT_INTEGER(0, "creation-factor",
3453
&range_diff_opts.creation_factor,
3554
N_("percentage by which creation is weighted")),
55+
OPT_CALLBACK(0, "max-memory", &range_diff_opts.max_memory,
56+
N_("size"),
57+
N_("maximum memory for cost matrix (default 4G)"),
58+
parse_max_memory),
3659
OPT_BOOL(0, "no-dual-color", &simple_color,
3760
N_("use simple diff colors")),
3861
OPT_PASSTHRU_ARGV(0, "notes", &other_arg,

range-diff.c

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "apply.h"
2222
#include "revision.h"
2323

24+
2425
struct patch_util {
2526
/* For the search for an exact match */
2627
struct hashmap_entry e;
@@ -287,8 +288,8 @@ static void find_exact_matches(struct string_list *a, struct string_list *b)
287288
}
288289

289290
static int diffsize_consume(void *data,
290-
char *line UNUSED,
291-
unsigned long len UNUSED)
291+
char *line UNUSED,
292+
unsigned long len UNUSED)
292293
{
293294
(*(int *)data)++;
294295
return 0;
@@ -325,13 +326,24 @@ static int diffsize(const char *a, const char *b)
325326
}
326327

327328
static void get_correspondences(struct string_list *a, struct string_list *b,
328-
int creation_factor)
329+
int creation_factor, size_t max_memory)
329330
{
330331
int n = a->nr + b->nr;
331332
int *cost, c, *a2b, *b2a;
332333
int i, j;
333-
334-
ALLOC_ARRAY(cost, st_mult(n, n));
334+
size_t cost_size = st_mult(n, n);
335+
size_t cost_bytes = sizeof(int) * cost_size;
336+
if (cost_bytes >= max_memory) {
337+
struct strbuf cost_str = STRBUF_INIT;
338+
struct strbuf max_str = STRBUF_INIT;
339+
strbuf_humanise_bytes(&cost_str, cost_bytes);
340+
strbuf_humanise_bytes(&max_str, max_memory);
341+
die(_("range-diff: unable to compute the range-diff, since it "
342+
"exceeds the maximum memory for the cost matrix: %s "
343+
"(%zu bytes) needed, %s (%zu bytes) available"),
344+
cost_str.buf, cost_bytes, max_str.buf, max_memory);
345+
}
346+
ALLOC_ARRAY(cost, cost_size);
335347
ALLOC_ARRAY(a2b, n);
336348
ALLOC_ARRAY(b2a, n);
337349

@@ -351,7 +363,8 @@ static void get_correspondences(struct string_list *a, struct string_list *b,
351363
}
352364

353365
c = a_util->matching < 0 ?
354-
a_util->diffsize * creation_factor / 100 : COST_MAX;
366+
a_util->diffsize * creation_factor / 100 :
367+
COST_MAX;
355368
for (j = b->nr; j < n; j++)
356369
cost[i + n * j] = c;
357370
}
@@ -360,7 +373,8 @@ static void get_correspondences(struct string_list *a, struct string_list *b,
360373
struct patch_util *util = b->items[j].util;
361374

362375
c = util->matching < 0 ?
363-
util->diffsize * creation_factor / 100 : COST_MAX;
376+
util->diffsize * creation_factor / 100 :
377+
COST_MAX;
364378
for (i = a->nr; i < n; i++)
365379
cost[i + n * j] = c;
366380
}
@@ -539,7 +553,7 @@ static void output(struct string_list *a, struct string_list *b,
539553
if (i < a->nr && a_util->matching < 0) {
540554
if (!range_diff_opts->right_only)
541555
output_pair_header(&opts, patch_no_width,
542-
&buf, &dashes, a_util, NULL);
556+
&buf, &dashes, a_util, NULL);
543557
i++;
544558
continue;
545559
}
@@ -548,7 +562,7 @@ static void output(struct string_list *a, struct string_list *b,
548562
while (j < b->nr && b_util->matching < 0) {
549563
if (!range_diff_opts->left_only)
550564
output_pair_header(&opts, patch_no_width,
551-
&buf, &dashes, NULL, b_util);
565+
&buf, &dashes, NULL, b_util);
552566
b_util = ++j < b->nr ? b->items[j].util : NULL;
553567
}
554568

@@ -591,7 +605,8 @@ int show_range_diff(const char *range1, const char *range2,
591605
if (!res) {
592606
find_exact_matches(&branch1, &branch2);
593607
get_correspondences(&branch1, &branch2,
594-
range_diff_opts->creation_factor);
608+
range_diff_opts->creation_factor,
609+
range_diff_opts->max_memory);
595610
output(&branch1, &branch2, range_diff_opts);
596611
}
597612

range-diff.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "strvec.h"
66

77
#define RANGE_DIFF_CREATION_FACTOR_DEFAULT 60
8+
#define RANGE_DIFF_MAX_MEMORY_DEFAULT (4UL * 1024 * 1024 * 1024) /* 4GB */
89

910
/*
1011
* A much higher value than the default, when we KNOW we are comparing
@@ -17,6 +18,7 @@ struct range_diff_options {
1718
unsigned dual_color:1;
1819
unsigned left_only:1, right_only:1;
1920
unsigned include_merges:1;
21+
size_t max_memory;
2022
const struct diff_options *diffopt; /* may be NULL */
2123
const struct strvec *other_arg; /* may be NULL */
2224
};

0 commit comments

Comments
 (0)