git-pickaxe: get rid of wasteful find_origin().

After finding out which path in the parent to scan to pass
blames, using get_tree_entry() to extract the blob information
again was quite wasteful, since diff-tree already gave us that
information.  Separate the function to create an origin out as
get_origin().

You'll never know what is more efficient unless you try and/or
think hard.  I somehow thought that extracting one known path
out of commit's tree is cheaper than running a diff-tree for the
current path between the commit and its parent, but it is not
the case.  In real, non-toy projects, most commits do not touch
the path you are interested in, and if the path is a few levels
away from the toplevel, whole-subdirectory comparison logic
diff-tree allows us to skip opening lower subdirectories.

This commit rewrites find_origin() function to use a single-path
diff-tree to see if the parent has the same blob as the current
suspect, which is cheaper than extracting the blob information
using get_tree_entry() and comparing it with what the current
suspect has.  This shaves about 6% overhead when annotating
kernel/sched.c in the Linux kernel repository on my machine.
The saving rises to 25% for arch/i386/kernel/Makefile.

Signed-off-by: Junio C Hamano <junkio@cox.net>
This commit is contained in:
Junio C Hamano 2006-10-21 02:56:33 -07:00
Родитель 46014766bd
Коммит f6c0e19102
1 изменённых файлов: 101 добавлений и 37 удалений

Просмотреть файл

@ -140,48 +140,103 @@ static void coalesce(struct scoreboard *sb)
}
}
static void free_origin(struct origin *o)
static struct origin *get_origin(struct scoreboard *sb,
struct commit *commit,
const char *path)
{
free(o);
}
static struct origin *find_origin(struct scoreboard *sb,
struct commit *commit,
const char *path)
{
struct blame_entry *ent;
struct blame_entry *e;
struct origin *o;
unsigned mode;
char type[10];
for (ent = sb->ent; ent; ent = ent->next) {
if (ent->suspect->commit == commit &&
!strcmp(ent->suspect->path, path))
return ent->suspect;
for (e = sb->ent; e; e = e->next) {
if (e->suspect->commit == commit &&
!strcmp(e->suspect->path, path))
return e->suspect;
}
o = xcalloc(1, sizeof(*o) + strlen(path) + 1);
o->commit = commit;
strcpy(o->path, path);
if (get_tree_entry(commit->object.sha1, path, o->blob_sha1, &mode))
goto err_out;
if (sha1_object_info(o->blob_sha1, type, NULL) ||
strcmp(type, blob_type))
goto err_out;
return o;
err_out:
free_origin(o);
return NULL;
}
static struct origin *find_rename(struct scoreboard *sb,
static int fill_blob_sha1(struct origin *origin)
{
unsigned mode;
char type[10];
if (!is_null_sha1(origin->blob_sha1))
return 0;
if (get_tree_entry(origin->commit->object.sha1,
origin->path,
origin->blob_sha1, &mode))
goto error_out;
if (sha1_object_info(origin->blob_sha1, type, NULL) ||
strcmp(type, blob_type))
goto error_out;
return 0;
error_out:
hashclr(origin->blob_sha1);
return -1;
}
static struct origin *find_origin(struct scoreboard *sb,
struct commit *parent,
struct origin *origin)
{
struct origin *porigin = NULL;
struct diff_options diff_opts;
int i;
const char *paths[1];
const char *paths[2];
/* See if the origin->path is different between parent
* and origin first. Most of the time they are the
* same and diff-tree is fairly efficient about this.
*/
diff_setup(&diff_opts);
diff_opts.recursive = 1;
diff_opts.detect_rename = 0;
diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
paths[0] = origin->path;
paths[1] = NULL;
diff_tree_setup_paths(paths, &diff_opts);
if (diff_setup_done(&diff_opts) < 0)
die("diff-setup");
diff_tree_sha1(parent->tree->object.sha1,
origin->commit->tree->object.sha1,
"", &diff_opts);
diffcore_std(&diff_opts);
/* It is either one entry that says "modified", or "created",
* or nothing.
*/
if (!diff_queued_diff.nr) {
/* The path is the same as parent */
porigin = get_origin(sb, parent, origin->path);
hashcpy(porigin->blob_sha1, origin->blob_sha1);
}
else if (diff_queued_diff.nr != 1)
die("internal error in pickaxe::find_origin");
else {
struct diff_filepair *p = diff_queued_diff.queue[0];
switch (p->status) {
default:
die("internal error in pickaxe::find_origin (%c)",
p->status);
case 'M':
porigin = get_origin(sb, parent, origin->path);
hashcpy(porigin->blob_sha1, p->one->sha1);
break;
case 'A':
case 'T':
/* Did not exist in parent, or type changed */
break;
}
}
diff_flush(&diff_opts);
if (porigin)
return porigin;
/* Otherwise we would look for a rename */
diff_setup(&diff_opts);
diff_opts.recursive = 1;
@ -191,16 +246,17 @@ static struct origin *find_rename(struct scoreboard *sb,
diff_tree_setup_paths(paths, &diff_opts);
if (diff_setup_done(&diff_opts) < 0)
die("diff-setup");
diff_tree_sha1(origin->commit->tree->object.sha1,
parent->tree->object.sha1,
diff_tree_sha1(parent->tree->object.sha1,
origin->commit->tree->object.sha1,
"", &diff_opts);
diffcore_std(&diff_opts);
for (i = 0; i < diff_queued_diff.nr; i++) {
struct diff_filepair *p = diff_queued_diff.queue[i];
if ((p->status == 'R' || p->status == 'C') &&
!strcmp(p->one->path, origin->path)) {
porigin = find_origin(sb, parent, p->two->path);
!strcmp(p->two->path, origin->path)) {
porigin = get_origin(sb, parent, p->one->path);
hashcpy(porigin->blob_sha1, p->one->sha1);
break;
}
}
@ -705,6 +761,15 @@ static int find_copy_in_parent(struct scoreboard *sb,
"", &diff_opts);
diffcore_std(&diff_opts);
/*
* NEEDSWORK: This loop is wasteful in that it opens the same
* blob in the parent number of times. We should swap the
* loop inside out, which would require keeping track of
* "best blame so far" for blame entries that the current
* "target" is being suspected.
*/
for (e = sb->ent; e; e = e->next) {
struct blame_entry split[3];
if (e->guilty || cmp_suspect(e->suspect, target))
@ -724,8 +789,8 @@ static int find_copy_in_parent(struct scoreboard *sb,
if (porigin && !strcmp(p->one->path, porigin->path))
/* find_move already dealt with this path */
continue;
norigin = find_origin(sb, parent, p->one->path);
norigin = get_origin(sb, parent, p->one->path);
hashcpy(norigin->blob_sha1, p->one->sha1);
blob = read_sha1_file(norigin->blob_sha1, type,
(unsigned long *) &file_p.size);
file_p.ptr = blob;
@ -735,6 +800,7 @@ static int find_copy_in_parent(struct scoreboard *sb,
}
find_copy_in_blob(sb, e, norigin, this, &file_p);
copy_split_if_better(sb, split, this);
free(blob);
}
if (split[1].suspect &&
blame_copy_score < ent_score(sb, &split[1]))
@ -762,9 +828,7 @@ static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt)
if (parse_commit(p))
continue;
porigin = find_origin(sb, parent->item, origin->path);
if (!porigin)
porigin = find_rename(sb, parent->item, origin);
porigin = find_origin(sb, parent->item, origin);
if (!porigin)
continue;
if (!hashcmp(porigin->blob_sha1, origin->blob_sha1)) {
@ -1423,8 +1487,8 @@ int cmd_pickaxe(int argc, const char **argv, const char *prefix)
*/
prepare_revision_walk(&revs);
o = find_origin(&sb, sb.final, path);
if (!o)
o = get_origin(&sb, sb.final, path);
if (fill_blob_sha1(o))
die("no such path %s in %s", path, final_commit_name);
sb.final_buf = read_sha1_file(o->blob_sha1, type, &sb.final_buf_size);