From 04ae00062d2ec301828e8df9931817be4b2f8653 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Wed, 2 Nov 2022 23:01:40 +0100 Subject: [PATCH 1/3] line-log: free diff queue when processing non-merge commits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When processing a non-merge commit, the line-level log first asks the tree-diff machinery whether any of the files in the given line ranges were modified between the current commit and its parent, and if some of them were, then it loads the contents of those files from both commits to see whether their line ranges were modified and/or need to be adjusted. Alas, it doesn't free() the diff queue holding the results of that query and the contents of those files once its done. This can add up to a substantial amount of leaked memory, especially when the file in question is big and is frequently modified: a user reported "Out of memory, malloc failed" errors with a 2MB text file that was modified ~2800 times [1] (I estimate the leak would use up almost 11GB memory in that case). Free that diff queue to plug this memory leak. However, instead of simply open-coding the necessary three lines, add them as a helper function to the diff API, because it will be useful elsewhere as well. [1] https://public-inbox.org/git/CAFOPqVXz2XwzX8vGU7wLuqb2ZuwTuOFAzBLRM_QPk+NJa=eC-g@mail.gmail.com/ Signed-off-by: SZEDER Gábor Signed-off-by: Taylor Blau --- diff.c | 7 +++++++ diffcore.h | 1 + line-log.c | 1 + 3 files changed, 9 insertions(+) diff --git a/diff.c b/diff.c index 35e46dd968..ef94175163 100644 --- a/diff.c +++ b/diff.c @@ -5773,6 +5773,13 @@ void diff_free_filepair(struct diff_filepair *p) free(p); } +void diff_free_queue(struct diff_queue_struct *q) +{ + for (int i = 0; i < q->nr; i++) + diff_free_filepair(q->queue[i]); + free(q->queue); +} + const char *diff_aligned_abbrev(const struct object_id *oid, int len) { int abblen; diff --git a/diffcore.h b/diffcore.h index badc2261c2..9b588a1ee1 100644 --- a/diffcore.h +++ b/diffcore.h @@ -162,6 +162,7 @@ struct diff_filepair *diff_queue(struct diff_queue_struct *, struct diff_filespec *, struct diff_filespec *); void diff_q(struct diff_queue_struct *, struct diff_filepair *); +void diff_free_queue(struct diff_queue_struct *q); /* dir_rename_relevance: the reason we want rename information for a dir */ enum dir_rename_relevance { diff --git a/line-log.c b/line-log.c index 51d93310a4..7a74daf2e8 100644 --- a/line-log.c +++ b/line-log.c @@ -1195,6 +1195,7 @@ static int process_ranges_ordinary_commit(struct rev_info *rev, struct commit *c if (parent) add_line_range(rev, parent, parent_range); free_line_log_data(parent_range); + diff_free_queue(&queue); return changed; } From ef84222fa9b3ed163bacd6dc59a1d855dfbfbd75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Wed, 2 Nov 2022 23:01:41 +0100 Subject: [PATCH 2/3] line-log: free the diff queues' arrays when processing merge commits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When processing merge commits, the line-level log first creates an array of diff queues, each comparing the merge commit with one of its parents, to check whether any of the files in the given line ranges were modified. Alas, when freeing these queues it only frees the filepairs in the queues, but not the queues' internal arrays holding pointers to those filepairs. Use the diff_free_queue() helper function introduced in the previous commit to free the diff queues' internal arrays as well. Signed-off-by: SZEDER Gábor Signed-off-by: Taylor Blau --- line-log.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/line-log.c b/line-log.c index 7a74daf2e8..a7f3e7f6ce 100644 --- a/line-log.c +++ b/line-log.c @@ -1089,10 +1089,8 @@ static struct diff_filepair *diff_filepair_dup(struct diff_filepair *pair) static void free_diffqueues(int n, struct diff_queue_struct *dq) { - int i, j; - for (i = 0; i < n; i++) - for (j = 0; j < dq[i].nr; j++) - diff_free_filepair(dq[i].queue[j]); + for (int i = 0; i < n; i++) + diff_free_queue(&dq[i]); free(dq); } From 586d8b5052f6b98c262c872f54216e39f3d56625 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Wed, 2 Nov 2022 23:01:42 +0100 Subject: [PATCH 3/3] diff.c: use diff_free_queue() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use diff_free_queue() instead of open-coding it. This shortens the code and make it less repetitive. Note that the second hunk in diff_flush() is interesting, because the 'free_queue' label separates the loop freeing the queue's filepairs from free()-ing the queue's internal array. This is somewhat suspicious, but it was not an issue before: there is only one place from where we jump to this label with a goto, and that is protected by an 'if (!q->nr && ...)' condition, i.e. we only skipped the loop freeing the filepairs when there were no filepairs in the queue to begin with. Signed-off-by: SZEDER Gábor Signed-off-by: Taylor Blau --- diff.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/diff.c b/diff.c index ef94175163..03e6ffb5e4 100644 --- a/diff.c +++ b/diff.c @@ -6337,13 +6337,9 @@ static int diff_get_patch_id(struct diff_options *options, struct object_id *oid int diff_flush_patch_id(struct diff_options *options, struct object_id *oid, int diff_header_only) { struct diff_queue_struct *q = &diff_queued_diff; - int i; int result = diff_get_patch_id(options, oid, diff_header_only); - for (i = 0; i < q->nr; i++) - diff_free_filepair(q->queue[i]); - - free(q->queue); + diff_free_queue(q); DIFF_QUEUE_CLEAR(q); return result; @@ -6612,10 +6608,8 @@ void diff_flush(struct diff_options *options) if (output_format & DIFF_FORMAT_CALLBACK) options->format_callback(q, options, options->format_callback_data); - for (i = 0; i < q->nr; i++) - diff_free_filepair(q->queue[i]); free_queue: - free(q->queue); + diff_free_queue(q); DIFF_QUEUE_CLEAR(q); diff_free(options);