зеркало из https://github.com/microsoft/git.git
Merge branch 'ds/blame-on-bloom'
"git blame" learns to take advantage of the "changed-paths" Bloom filter stored in the commit-graph file. * ds/blame-on-bloom: test-bloom: check that we have expected arguments test-bloom: fix some whitespace issues blame: drop unused parameter from maybe_changed_path blame: use changed-path Bloom filters tests: write commit-graph with Bloom filters revision: complicated pathspecs disable filters
This commit is contained in:
Коммит
6d56d4c7dc
131
blame.c
131
blame.c
|
@ -9,6 +9,8 @@
|
|||
#include "blame.h"
|
||||
#include "alloc.h"
|
||||
#include "commit-slab.h"
|
||||
#include "bloom.h"
|
||||
#include "commit-graph.h"
|
||||
|
||||
define_commit_slab(blame_suspects, struct blame_origin *);
|
||||
static struct blame_suspects blame_suspects;
|
||||
|
@ -1246,13 +1248,74 @@ static int fill_blob_sha1_and_mode(struct repository *r,
|
|||
return -1;
|
||||
}
|
||||
|
||||
struct blame_bloom_data {
|
||||
/*
|
||||
* Changed-path Bloom filter keys. These can help prevent
|
||||
* computing diffs against first parents, but we need to
|
||||
* expand the list as code is moved or files are renamed.
|
||||
*/
|
||||
struct bloom_filter_settings *settings;
|
||||
struct bloom_key **keys;
|
||||
int nr;
|
||||
int alloc;
|
||||
};
|
||||
|
||||
static int bloom_count_queries = 0;
|
||||
static int bloom_count_no = 0;
|
||||
static int maybe_changed_path(struct repository *r,
|
||||
struct blame_origin *origin,
|
||||
struct blame_bloom_data *bd)
|
||||
{
|
||||
int i;
|
||||
struct bloom_filter *filter;
|
||||
|
||||
if (!bd)
|
||||
return 1;
|
||||
|
||||
if (origin->commit->generation == GENERATION_NUMBER_INFINITY)
|
||||
return 1;
|
||||
|
||||
filter = get_bloom_filter(r, origin->commit, 0);
|
||||
|
||||
if (!filter)
|
||||
return 1;
|
||||
|
||||
bloom_count_queries++;
|
||||
for (i = 0; i < bd->nr; i++) {
|
||||
if (bloom_filter_contains(filter,
|
||||
bd->keys[i],
|
||||
bd->settings))
|
||||
return 1;
|
||||
}
|
||||
|
||||
bloom_count_no++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void add_bloom_key(struct blame_bloom_data *bd,
|
||||
const char *path)
|
||||
{
|
||||
if (!bd)
|
||||
return;
|
||||
|
||||
if (bd->nr >= bd->alloc) {
|
||||
bd->alloc *= 2;
|
||||
REALLOC_ARRAY(bd->keys, bd->alloc);
|
||||
}
|
||||
|
||||
bd->keys[bd->nr] = xmalloc(sizeof(struct bloom_key));
|
||||
fill_bloom_key(path, strlen(path), bd->keys[bd->nr], bd->settings);
|
||||
bd->nr++;
|
||||
}
|
||||
|
||||
/*
|
||||
* We have an origin -- check if the same path exists in the
|
||||
* parent and return an origin structure to represent it.
|
||||
*/
|
||||
static struct blame_origin *find_origin(struct repository *r,
|
||||
struct commit *parent,
|
||||
struct blame_origin *origin)
|
||||
struct blame_origin *origin,
|
||||
struct blame_bloom_data *bd)
|
||||
{
|
||||
struct blame_origin *porigin;
|
||||
struct diff_options diff_opts;
|
||||
|
@ -1286,10 +1349,18 @@ static struct blame_origin *find_origin(struct repository *r,
|
|||
|
||||
if (is_null_oid(&origin->commit->object.oid))
|
||||
do_diff_cache(get_commit_tree_oid(parent), &diff_opts);
|
||||
else
|
||||
else {
|
||||
int compute_diff = 1;
|
||||
if (origin->commit->parents &&
|
||||
!oidcmp(&parent->object.oid,
|
||||
&origin->commit->parents->item->object.oid))
|
||||
compute_diff = maybe_changed_path(r, origin, bd);
|
||||
|
||||
if (compute_diff)
|
||||
diff_tree_oid(get_commit_tree_oid(parent),
|
||||
get_commit_tree_oid(origin->commit),
|
||||
"", &diff_opts);
|
||||
}
|
||||
diffcore_std(&diff_opts);
|
||||
|
||||
if (!diff_queued_diff.nr) {
|
||||
|
@ -1341,7 +1412,8 @@ static struct blame_origin *find_origin(struct repository *r,
|
|||
*/
|
||||
static struct blame_origin *find_rename(struct repository *r,
|
||||
struct commit *parent,
|
||||
struct blame_origin *origin)
|
||||
struct blame_origin *origin,
|
||||
struct blame_bloom_data *bd)
|
||||
{
|
||||
struct blame_origin *porigin = NULL;
|
||||
struct diff_options diff_opts;
|
||||
|
@ -1366,6 +1438,7 @@ static struct blame_origin *find_rename(struct repository *r,
|
|||
struct diff_filepair *p = diff_queued_diff.queue[i];
|
||||
if ((p->status == 'R' || p->status == 'C') &&
|
||||
!strcmp(p->two->path, origin->path)) {
|
||||
add_bloom_key(bd, p->one->path);
|
||||
porigin = get_origin(parent, p->one->path);
|
||||
oidcpy(&porigin->blob_oid, &p->one->oid);
|
||||
porigin->mode = p->one->mode;
|
||||
|
@ -2332,6 +2405,11 @@ static void distribute_blame(struct blame_scoreboard *sb, struct blame_entry *bl
|
|||
|
||||
#define MAXSG 16
|
||||
|
||||
typedef struct blame_origin *(*blame_find_alg)(struct repository *,
|
||||
struct commit *,
|
||||
struct blame_origin *,
|
||||
struct blame_bloom_data *);
|
||||
|
||||
static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin, int opt)
|
||||
{
|
||||
struct rev_info *revs = sb->revs;
|
||||
|
@ -2356,8 +2434,7 @@ static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin,
|
|||
* common cases, then we look for renames in the second pass.
|
||||
*/
|
||||
for (pass = 0; pass < 2 - sb->no_whole_file_rename; pass++) {
|
||||
struct blame_origin *(*find)(struct repository *, struct commit *, struct blame_origin *);
|
||||
find = pass ? find_rename : find_origin;
|
||||
blame_find_alg find = pass ? find_rename : find_origin;
|
||||
|
||||
for (i = 0, sg = first_scapegoat(revs, commit, sb->reverse);
|
||||
i < num_sg && sg;
|
||||
|
@ -2369,7 +2446,7 @@ static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin,
|
|||
continue;
|
||||
if (parse_commit(p))
|
||||
continue;
|
||||
porigin = find(sb->repo, p, origin);
|
||||
porigin = find(sb->repo, p, origin, sb->bloom_data);
|
||||
if (!porigin)
|
||||
continue;
|
||||
if (oideq(&porigin->blob_oid, &origin->blob_oid)) {
|
||||
|
@ -2809,3 +2886,45 @@ struct blame_entry *blame_entry_prepend(struct blame_entry *head,
|
|||
blame_origin_incref(o);
|
||||
return new_head;
|
||||
}
|
||||
|
||||
void setup_blame_bloom_data(struct blame_scoreboard *sb,
|
||||
const char *path)
|
||||
{
|
||||
struct blame_bloom_data *bd;
|
||||
|
||||
if (!sb->repo->objects->commit_graph)
|
||||
return;
|
||||
|
||||
if (!sb->repo->objects->commit_graph->bloom_filter_settings)
|
||||
return;
|
||||
|
||||
bd = xmalloc(sizeof(struct blame_bloom_data));
|
||||
|
||||
bd->settings = sb->repo->objects->commit_graph->bloom_filter_settings;
|
||||
|
||||
bd->alloc = 4;
|
||||
bd->nr = 0;
|
||||
ALLOC_ARRAY(bd->keys, bd->alloc);
|
||||
|
||||
add_bloom_key(bd, path);
|
||||
|
||||
sb->bloom_data = bd;
|
||||
}
|
||||
|
||||
void cleanup_scoreboard(struct blame_scoreboard *sb)
|
||||
{
|
||||
if (sb->bloom_data) {
|
||||
int i;
|
||||
for (i = 0; i < sb->bloom_data->nr; i++) {
|
||||
free(sb->bloom_data->keys[i]->hashes);
|
||||
free(sb->bloom_data->keys[i]);
|
||||
}
|
||||
free(sb->bloom_data->keys);
|
||||
FREE_AND_NULL(sb->bloom_data);
|
||||
|
||||
trace2_data_intmax("blame", sb->repo,
|
||||
"bloom/queries", bloom_count_queries);
|
||||
trace2_data_intmax("blame", sb->repo,
|
||||
"bloom/response-no", bloom_count_no);
|
||||
}
|
||||
}
|
||||
|
|
6
blame.h
6
blame.h
|
@ -100,6 +100,8 @@ struct blame_entry {
|
|||
int unblamable;
|
||||
};
|
||||
|
||||
struct blame_bloom_data;
|
||||
|
||||
/*
|
||||
* The current state of the blame assignment.
|
||||
*/
|
||||
|
@ -156,6 +158,7 @@ struct blame_scoreboard {
|
|||
void(*found_guilty_entry)(struct blame_entry *, void *);
|
||||
|
||||
void *found_guilty_entry_data;
|
||||
struct blame_bloom_data *bloom_data;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -180,6 +183,9 @@ void init_scoreboard(struct blame_scoreboard *sb);
|
|||
void setup_scoreboard(struct blame_scoreboard *sb,
|
||||
const char *path,
|
||||
struct blame_origin **orig);
|
||||
void setup_blame_bloom_data(struct blame_scoreboard *sb,
|
||||
const char *path);
|
||||
void cleanup_scoreboard(struct blame_scoreboard *sb);
|
||||
|
||||
struct blame_entry *blame_entry_prepend(struct blame_entry *head,
|
||||
long start, long end,
|
||||
|
|
|
@ -1061,6 +1061,14 @@ parse_done:
|
|||
string_list_clear(&ignore_revs_file_list, 0);
|
||||
string_list_clear(&ignore_rev_list, 0);
|
||||
setup_scoreboard(&sb, path, &o);
|
||||
|
||||
/*
|
||||
* Changed-path Bloom filters are disabled when looking
|
||||
* for copies.
|
||||
*/
|
||||
if (!(opt & PICKAXE_BLAME_COPY))
|
||||
setup_blame_bloom_data(&sb, path);
|
||||
|
||||
lno = sb.num_lines;
|
||||
|
||||
if (lno && !range_list.nr)
|
||||
|
@ -1164,5 +1172,7 @@ parse_done:
|
|||
printf("num get patch: %d\n", sb.num_get_patch);
|
||||
printf("num commits: %d\n", sb.num_commits);
|
||||
}
|
||||
|
||||
cleanup_scoreboard(&sb);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1700,9 +1700,7 @@ int cmd_commit(int argc, const char **argv, const char *prefix)
|
|||
"new_index file. Check that disk is not full and quota is\n"
|
||||
"not exceeded, and then \"git restore --staged :/\" to recover."));
|
||||
|
||||
if (git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) &&
|
||||
write_commit_graph_reachable(the_repository->objects->odb, 0, NULL))
|
||||
return 1;
|
||||
git_test_write_commit_graph_or_die();
|
||||
|
||||
repo_rerere(the_repository, 0);
|
||||
run_command_v_opt(argv_gc_auto, RUN_GIT_CMD);
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
#include "branch.h"
|
||||
#include "commit-reach.h"
|
||||
#include "wt-status.h"
|
||||
#include "commit-graph.h"
|
||||
|
||||
#define DEFAULT_TWOHEAD (1<<0)
|
||||
#define DEFAULT_OCTOPUS (1<<1)
|
||||
|
@ -1700,9 +1701,11 @@ int cmd_merge(int argc, const char **argv, const char *prefix)
|
|||
head_commit);
|
||||
}
|
||||
|
||||
if (squash)
|
||||
if (squash) {
|
||||
finish(head_commit, remoteheads, NULL, NULL);
|
||||
else
|
||||
|
||||
git_test_write_commit_graph_or_die();
|
||||
} else
|
||||
write_merge_state(remoteheads);
|
||||
|
||||
if (merge_was_ok)
|
||||
|
|
|
@ -19,6 +19,20 @@
|
|||
#include "bloom.h"
|
||||
#include "commit-slab.h"
|
||||
|
||||
void git_test_write_commit_graph_or_die(void)
|
||||
{
|
||||
int flags = 0;
|
||||
if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0))
|
||||
return;
|
||||
|
||||
if (git_env_bool(GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS, 0))
|
||||
flags = COMMIT_GRAPH_WRITE_BLOOM_FILTERS;
|
||||
|
||||
if (write_commit_graph_reachable(the_repository->objects->odb,
|
||||
flags, NULL))
|
||||
die("failed to write commit-graph under GIT_TEST_COMMIT_GRAPH");
|
||||
}
|
||||
|
||||
#define GRAPH_SIGNATURE 0x43475048 /* "CGPH" */
|
||||
#define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
|
||||
#define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
|
||||
|
|
|
@ -12,6 +12,15 @@
|
|||
#define GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD "GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD"
|
||||
#define GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS "GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS"
|
||||
|
||||
/*
|
||||
* This method is only used to enhance coverage of the commit-graph
|
||||
* feature in the test suite with the GIT_TEST_COMMIT_GRAPH and
|
||||
* GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS environment variables. Do not
|
||||
* call this method oustide of a builtin, and only if you know what
|
||||
* you are doing!
|
||||
*/
|
||||
void git_test_write_commit_graph_or_die(void);
|
||||
|
||||
struct commit;
|
||||
struct bloom_filter_settings;
|
||||
|
||||
|
|
17
revision.c
17
revision.c
|
@ -650,6 +650,20 @@ static void trace2_bloom_filter_statistics_atexit(void)
|
|||
jw_release(&jw);
|
||||
}
|
||||
|
||||
static int forbid_bloom_filters(struct pathspec *spec)
|
||||
{
|
||||
if (spec->has_wildcard)
|
||||
return 1;
|
||||
if (spec->nr > 1)
|
||||
return 1;
|
||||
if (spec->magic & ~PATHSPEC_LITERAL)
|
||||
return 1;
|
||||
if (spec->nr && (spec->items[0].magic & ~PATHSPEC_LITERAL))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void prepare_to_use_bloom_filter(struct rev_info *revs)
|
||||
{
|
||||
struct pathspec_item *pi;
|
||||
|
@ -661,6 +675,9 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
|
|||
if (!revs->commits)
|
||||
return;
|
||||
|
||||
if (forbid_bloom_filters(&revs->prune_data))
|
||||
return;
|
||||
|
||||
repo_parse_commit(revs->repo, revs->commits->item);
|
||||
|
||||
if (!revs->repo->objects->commit_graph)
|
||||
|
|
|
@ -43,10 +43,21 @@ static void get_bloom_filter_for_commit(const struct object_id *commit_oid)
|
|||
print_bloom_filter(filter);
|
||||
}
|
||||
|
||||
static const char *bloom_usage = "\n"
|
||||
" test-tool bloom get_murmer3 <string>\n"
|
||||
" test-tool bloom generate_filter <string> [<string>...]\n"
|
||||
" test-tool get_filter_for_commit <commit-hex>\n";
|
||||
|
||||
int cmd__bloom(int argc, const char **argv)
|
||||
{
|
||||
if (argc < 2)
|
||||
usage(bloom_usage);
|
||||
|
||||
if (!strcmp(argv[1], "get_murmur3")) {
|
||||
uint32_t hashed = murmur3_seeded(0, argv[2], strlen(argv[2]));
|
||||
uint32_t hashed;
|
||||
if (argc < 3)
|
||||
usage(bloom_usage);
|
||||
hashed = murmur3_seeded(0, argv[2], strlen(argv[2]));
|
||||
printf("Murmur3 Hash with seed=0:0x%08x\n", hashed);
|
||||
}
|
||||
|
||||
|
@ -56,9 +67,8 @@ int cmd__bloom(int argc, const char **argv)
|
|||
filter.len = (settings.bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD;
|
||||
filter.data = xcalloc(filter.len, sizeof(unsigned char));
|
||||
|
||||
if (!argv[2]){
|
||||
die("at least one input string expected");
|
||||
}
|
||||
if (argc - 1 < i)
|
||||
usage(bloom_usage);
|
||||
|
||||
while (argv[i]) {
|
||||
add_string_to_filter(argv[i], &filter);
|
||||
|
@ -71,6 +81,8 @@ int cmd__bloom(int argc, const char **argv)
|
|||
if (!strcmp(argv[1], "get_filter_for_commit")) {
|
||||
struct object_id oid;
|
||||
const char *end;
|
||||
if (argc < 3)
|
||||
usage(bloom_usage);
|
||||
if (parse_oid_hex(argv[2], &oid, &end))
|
||||
die("cannot parse oid '%s'", argv[2]);
|
||||
init_bloom_filters();
|
||||
|
|
Загрузка…
Ссылка в новой задаче