2005-04-24 06:04:40 +04:00
|
|
|
#include "cache.h"
|
|
|
|
#include "commit.h"
|
2006-04-11 05:14:54 +04:00
|
|
|
#include "diff.h"
|
2006-02-26 03:19:46 +03:00
|
|
|
#include "revision.h"
|
2006-09-05 08:50:12 +04:00
|
|
|
#include "list-objects.h"
|
2006-05-19 01:19:20 +04:00
|
|
|
#include "builtin.h"
|
2007-10-22 09:47:56 +04:00
|
|
|
#include "log-tree.h"
|
2008-05-04 14:36:54 +04:00
|
|
|
#include "graph.h"
|
2009-03-26 07:55:24 +03:00
|
|
|
#include "bisect.h"
|
2005-05-31 05:46:32 +04:00
|
|
|
|
2005-05-26 05:29:09 +04:00
|
|
|
static const char rev_list_usage[] =
|
2008-07-13 17:36:15 +04:00
|
|
|
"git rev-list [OPTION] <commit-id>... [ -- paths... ]\n"
|
2005-10-30 12:03:45 +03:00
|
|
|
" limiting output:\n"
|
|
|
|
" --max-count=nr\n"
|
|
|
|
" --max-age=epoch\n"
|
|
|
|
" --min-age=epoch\n"
|
|
|
|
" --sparse\n"
|
|
|
|
" --no-merges\n"
|
2006-01-27 12:39:24 +03:00
|
|
|
" --remove-empty\n"
|
2005-10-30 12:03:45 +03:00
|
|
|
" --all\n"
|
2008-02-28 10:24:25 +03:00
|
|
|
" --branches\n"
|
|
|
|
" --tags\n"
|
|
|
|
" --remotes\n"
|
2006-09-06 08:39:02 +04:00
|
|
|
" --stdin\n"
|
2007-11-11 10:29:41 +03:00
|
|
|
" --quiet\n"
|
2005-10-30 12:03:45 +03:00
|
|
|
" ordering output:\n"
|
|
|
|
" --topo-order\n"
|
2006-02-16 09:05:33 +03:00
|
|
|
" --date-order\n"
|
2008-03-19 09:16:28 +03:00
|
|
|
" --reverse\n"
|
2005-10-30 12:03:45 +03:00
|
|
|
" formatting output:\n"
|
|
|
|
" --parents\n"
|
2008-04-04 10:01:47 +04:00
|
|
|
" --children\n"
|
2006-02-19 14:32:31 +03:00
|
|
|
" --objects | --objects-edge\n"
|
2005-10-30 12:03:45 +03:00
|
|
|
" --unpacked\n"
|
|
|
|
" --header | --pretty\n"
|
2006-02-10 22:56:42 +03:00
|
|
|
" --abbrev=nr | --no-abbrev\n"
|
2006-04-07 08:32:36 +04:00
|
|
|
" --abbrev-commit\n"
|
2007-04-05 18:53:07 +04:00
|
|
|
" --left-right\n"
|
2005-10-30 12:03:45 +03:00
|
|
|
" special purpose:\n"
|
2007-03-22 08:15:54 +03:00
|
|
|
" --bisect\n"
|
2007-10-22 09:47:56 +04:00
|
|
|
" --bisect-vars\n"
|
|
|
|
" --bisect-all"
|
2005-10-30 12:03:45 +03:00
|
|
|
;
|
2005-05-26 05:29:09 +04:00
|
|
|
|
2009-04-06 23:28:36 +04:00
|
|
|
static void finish_commit(struct commit *commit, void *data);
|
|
|
|
static void show_commit(struct commit *commit, void *data)
|
2005-06-02 20:19:53 +04:00
|
|
|
{
|
2009-04-07 00:28:00 +04:00
|
|
|
struct rev_list_info *info = data;
|
|
|
|
struct rev_info *revs = info->revs;
|
2009-04-06 23:28:36 +04:00
|
|
|
|
|
|
|
graph_show_commit(revs->graph);
|
2008-05-04 14:36:54 +04:00
|
|
|
|
2009-04-07 00:28:00 +04:00
|
|
|
if (info->show_timestamp)
|
2006-03-22 11:22:00 +03:00
|
|
|
printf("%lu ", commit->date);
|
2009-04-07 00:28:00 +04:00
|
|
|
if (info->header_prefix)
|
|
|
|
fputs(info->header_prefix, stdout);
|
2008-05-25 11:07:21 +04:00
|
|
|
|
2009-04-06 23:28:36 +04:00
|
|
|
if (!revs->graph) {
|
2008-05-25 11:07:21 +04:00
|
|
|
if (commit->object.flags & BOUNDARY)
|
|
|
|
putchar('-');
|
|
|
|
else if (commit->object.flags & UNINTERESTING)
|
|
|
|
putchar('^');
|
2009-04-06 23:28:36 +04:00
|
|
|
else if (revs->left_right) {
|
2008-05-25 11:07:21 +04:00
|
|
|
if (commit->object.flags & SYMMETRIC_LEFT)
|
|
|
|
putchar('<');
|
|
|
|
else
|
|
|
|
putchar('>');
|
|
|
|
}
|
2006-10-23 04:32:47 +04:00
|
|
|
}
|
2009-04-06 23:28:36 +04:00
|
|
|
if (revs->abbrev_commit && revs->abbrev)
|
|
|
|
fputs(find_unique_abbrev(commit->object.sha1, revs->abbrev),
|
2006-04-16 10:48:27 +04:00
|
|
|
stdout);
|
2006-04-07 08:32:36 +04:00
|
|
|
else
|
|
|
|
fputs(sha1_to_hex(commit->object.sha1), stdout);
|
2009-04-06 23:28:36 +04:00
|
|
|
if (revs->print_parents) {
|
2005-06-02 20:19:53 +04:00
|
|
|
struct commit_list *parents = commit->parents;
|
|
|
|
while (parents) {
|
2007-07-09 06:05:31 +04:00
|
|
|
printf(" %s", sha1_to_hex(parents->item->object.sha1));
|
2005-06-02 20:19:53 +04:00
|
|
|
parents = parents->next;
|
|
|
|
}
|
|
|
|
}
|
2009-04-06 23:28:36 +04:00
|
|
|
if (revs->children.name) {
|
2008-04-04 10:01:47 +04:00
|
|
|
struct commit_list *children;
|
|
|
|
|
2009-04-06 23:28:36 +04:00
|
|
|
children = lookup_decoration(&revs->children, &commit->object);
|
2008-04-04 10:01:47 +04:00
|
|
|
while (children) {
|
|
|
|
printf(" %s", sha1_to_hex(children->item->object.sha1));
|
|
|
|
children = children->next;
|
|
|
|
}
|
|
|
|
}
|
2009-04-06 23:28:36 +04:00
|
|
|
show_decorations(revs, commit);
|
|
|
|
if (revs->commit_format == CMIT_FMT_ONELINE)
|
2005-08-09 09:15:40 +04:00
|
|
|
putchar(' ');
|
|
|
|
else
|
|
|
|
putchar('\n');
|
|
|
|
|
2009-04-06 23:28:36 +04:00
|
|
|
if (revs->verbose_header && commit->buffer) {
|
2008-10-09 23:12:12 +04:00
|
|
|
struct strbuf buf = STRBUF_INIT;
|
2009-04-06 23:28:36 +04:00
|
|
|
pretty_print_commit(revs->commit_format, commit,
|
|
|
|
&buf, revs->abbrev, NULL, NULL,
|
|
|
|
revs->date_mode, 0);
|
|
|
|
if (revs->graph) {
|
2008-05-04 14:36:54 +04:00
|
|
|
if (buf.len) {
|
2009-04-06 23:28:36 +04:00
|
|
|
if (revs->commit_format != CMIT_FMT_ONELINE)
|
|
|
|
graph_show_oneline(revs->graph);
|
2008-05-04 14:36:54 +04:00
|
|
|
|
2009-04-06 23:28:36 +04:00
|
|
|
graph_show_commit_msg(revs->graph, &buf);
|
2008-05-04 14:36:54 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Add a newline after the commit message.
|
|
|
|
*
|
|
|
|
* Usually, this newline produces a blank
|
|
|
|
* padding line between entries, in which case
|
|
|
|
* we need to add graph padding on this line.
|
|
|
|
*
|
|
|
|
* However, the commit message may not end in a
|
|
|
|
* newline. In this case the newline simply
|
|
|
|
* ends the last line of the commit message,
|
|
|
|
* and we don't need any graph output. (This
|
|
|
|
* always happens with CMIT_FMT_ONELINE, and it
|
|
|
|
* happens with CMIT_FMT_USERFORMAT when the
|
|
|
|
* format doesn't explicitly end in a newline.)
|
|
|
|
*/
|
|
|
|
if (buf.len && buf.buf[buf.len - 1] == '\n')
|
2009-04-06 23:28:36 +04:00
|
|
|
graph_show_padding(revs->graph);
|
2008-05-04 14:36:54 +04:00
|
|
|
putchar('\n');
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* If the message buffer is empty, just show
|
|
|
|
* the rest of the graph output for this
|
|
|
|
* commit.
|
|
|
|
*/
|
2009-04-06 23:28:36 +04:00
|
|
|
if (graph_show_remainder(revs->graph))
|
2008-05-04 14:36:54 +04:00
|
|
|
putchar('\n');
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (buf.len)
|
2009-04-07 00:28:00 +04:00
|
|
|
printf("%s%c", buf.buf, info->hdr_termination);
|
2008-05-04 14:36:54 +04:00
|
|
|
}
|
2007-09-10 14:35:06 +04:00
|
|
|
strbuf_release(&buf);
|
2008-05-04 14:36:54 +04:00
|
|
|
} else {
|
2009-04-06 23:28:36 +04:00
|
|
|
if (graph_show_remainder(revs->graph))
|
2008-05-04 14:36:54 +04:00
|
|
|
putchar('\n');
|
2005-07-05 03:36:48 +04:00
|
|
|
}
|
2007-06-29 21:40:46 +04:00
|
|
|
maybe_flush_or_die(stdout, "stdout");
|
2009-04-06 23:28:36 +04:00
|
|
|
finish_commit(commit, data);
|
2007-11-11 10:29:41 +03:00
|
|
|
}
|
|
|
|
|
2009-04-06 23:28:36 +04:00
|
|
|
static void finish_commit(struct commit *commit, void *data)
|
2007-11-11 10:29:41 +03:00
|
|
|
{
|
2006-06-18 05:47:58 +04:00
|
|
|
if (commit->parents) {
|
|
|
|
free_commit_list(commit->parents);
|
|
|
|
commit->parents = NULL;
|
|
|
|
}
|
2006-08-28 08:19:39 +04:00
|
|
|
free(commit->buffer);
|
|
|
|
commit->buffer = NULL;
|
2005-06-06 19:39:40 +04:00
|
|
|
}
|
|
|
|
|
show_object(): push path_name() call further down
In particular, pushing the "path_name()" call _into_ the show() function
would seem to allow
- more clarity into who "owns" the name (ie now when we free the name in
the show_object callback, it's because we generated it ourselves by
calling path_name())
- not calling path_name() at all, either because we don't care about the
name in the first place, or because we are actually happy walking the
linked list of "struct name_path *" and the last component.
Now, I didn't do that latter optimization, because it would require some
more coding, but especially looking at "builtin-pack-objects.c", we really
don't even want the whole pathname, we really would be better off with the
list of path components.
Why? We use that name for two things:
- add_preferred_base_object(), which actually _wants_ to traverse the
path, and now does it by looking for '/' characters!
- for 'name_hash()', which only cares about the last 16 characters of a
name, so again, generating the full name seems to be just unnecessary
work.
Anyway, so I didn't look any closer at those things, but it did convince
me that the "show_object()" calling convention was crazy, and we're
actually better off doing _less_ in list-objects.c, and giving people
access to the internal data structures so that they can decide whether
they want to generate a path-name or not.
This patch does that, and then for people who did use the name (even if
they might do something more clever in the future), it just does the
straightforward "name = path_name(path, component); .. free(name);" thing.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-04-11 05:15:26 +04:00
|
|
|
static void finish_object(struct object *obj, const struct name_path *path, const char *name)
|
2007-11-11 10:29:41 +03:00
|
|
|
{
|
process_{tree,blob}: show objects without buffering
Here's a less trivial thing, and slightly more dubious one.
I was looking at that "struct object_array objects", and wondering why we
do that. I have honestly totally forgotten. Why not just call the "show()"
function as we encounter the objects? Rather than add the objects to the
object_array, and then at the very end going through the array and doing a
'show' on all, just do things more incrementally.
Now, there are possible downsides to this:
- the "buffer using object_array" _can_ in theory result in at least
better I-cache usage (two tight loops rather than one more spread out
one). I don't think this is a real issue, but in theory..
- this _does_ change the order of the objects printed. Instead of doing a
"process_tree(revs, commit->tree, &objects, NULL, "");" in the loop
over the commits (which puts all the root trees _first_ in the object
list, this patch just adds them to the list of pending objects, and
then we'll traverse them in that order (and thus show each root tree
object together with the objects we discover under it)
I _think_ the new ordering actually makes more sense, but the object
ordering is actually a subtle thing when it comes to packing
efficiency, so any change in order is going to have implications for
packing. Good or bad, I dunno.
- There may be some reason why we did it that odd way with the object
array, that I have simply forgotten.
Anyway, now that we don't buffer up the objects before showing them
that may actually result in lower memory usage during that whole
traverse_commit_list() phase.
This is seriously not very deeply tested. It makes sense to me, it seems
to pass all the tests, it looks ok, but...
Does anybody remember why we did that "object_array" thing? It used to be
an "object_list" a long long time ago, but got changed into the array due
to better memory usage patterns (those linked lists of obejcts are
horrible from a memory allocation standpoint). But I wonder why we didn't
do this back then. Maybe there's a reason for it.
Or maybe there _used_ to be a reason, and no longer is.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-04-11 04:27:58 +04:00
|
|
|
if (obj->type == OBJ_BLOB && !has_sha1_file(obj->sha1))
|
|
|
|
die("missing blob object '%s'", sha1_to_hex(obj->sha1));
|
2007-11-11 10:29:41 +03:00
|
|
|
}
|
|
|
|
|
show_object(): push path_name() call further down
In particular, pushing the "path_name()" call _into_ the show() function
would seem to allow
- more clarity into who "owns" the name (ie now when we free the name in
the show_object callback, it's because we generated it ourselves by
calling path_name())
- not calling path_name() at all, either because we don't care about the
name in the first place, or because we are actually happy walking the
linked list of "struct name_path *" and the last component.
Now, I didn't do that latter optimization, because it would require some
more coding, but especially looking at "builtin-pack-objects.c", we really
don't even want the whole pathname, we really would be better off with the
list of path components.
Why? We use that name for two things:
- add_preferred_base_object(), which actually _wants_ to traverse the
path, and now does it by looking for '/' characters!
- for 'name_hash()', which only cares about the last 16 characters of a
name, so again, generating the full name seems to be just unnecessary
work.
Anyway, so I didn't look any closer at those things, but it did convince
me that the "show_object()" calling convention was crazy, and we're
actually better off doing _less_ in list-objects.c, and giving people
access to the internal data structures so that they can decide whether
they want to generate a path-name or not.
This patch does that, and then for people who did use the name (even if
they might do something more clever in the future), it just does the
straightforward "name = path_name(path, component); .. free(name);" thing.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-04-11 05:15:26 +04:00
|
|
|
static void show_object(struct object *obj, const struct name_path *path, const char *component)
|
2005-06-25 09:56:58 +04:00
|
|
|
{
|
show_object(): push path_name() call further down
In particular, pushing the "path_name()" call _into_ the show() function
would seem to allow
- more clarity into who "owns" the name (ie now when we free the name in
the show_object callback, it's because we generated it ourselves by
calling path_name())
- not calling path_name() at all, either because we don't care about the
name in the first place, or because we are actually happy walking the
linked list of "struct name_path *" and the last component.
Now, I didn't do that latter optimization, because it would require some
more coding, but especially looking at "builtin-pack-objects.c", we really
don't even want the whole pathname, we really would be better off with the
list of path components.
Why? We use that name for two things:
- add_preferred_base_object(), which actually _wants_ to traverse the
path, and now does it by looking for '/' characters!
- for 'name_hash()', which only cares about the last 16 characters of a
name, so again, generating the full name seems to be just unnecessary
work.
Anyway, so I didn't look any closer at those things, but it did convince
me that the "show_object()" calling convention was crazy, and we're
actually better off doing _less_ in list-objects.c, and giving people
access to the internal data structures so that they can decide whether
they want to generate a path-name or not.
This patch does that, and then for people who did use the name (even if
they might do something more clever in the future), it just does the
straightforward "name = path_name(path, component); .. free(name);" thing.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-04-11 05:15:26 +04:00
|
|
|
char *name = path_name(path, component);
|
2006-09-05 08:50:12 +04:00
|
|
|
/* An object with name "foo\n0000000..." can be used to
|
2008-09-13 21:18:36 +04:00
|
|
|
* confuse downstream "git pack-objects" very badly.
|
2006-09-05 08:50:12 +04:00
|
|
|
*/
|
process_{tree,blob}: show objects without buffering
Here's a less trivial thing, and slightly more dubious one.
I was looking at that "struct object_array objects", and wondering why we
do that. I have honestly totally forgotten. Why not just call the "show()"
function as we encounter the objects? Rather than add the objects to the
object_array, and then at the very end going through the array and doing a
'show' on all, just do things more incrementally.
Now, there are possible downsides to this:
- the "buffer using object_array" _can_ in theory result in at least
better I-cache usage (two tight loops rather than one more spread out
one). I don't think this is a real issue, but in theory..
- this _does_ change the order of the objects printed. Instead of doing a
"process_tree(revs, commit->tree, &objects, NULL, "");" in the loop
over the commits (which puts all the root trees _first_ in the object
list, this patch just adds them to the list of pending objects, and
then we'll traverse them in that order (and thus show each root tree
object together with the objects we discover under it)
I _think_ the new ordering actually makes more sense, but the object
ordering is actually a subtle thing when it comes to packing
efficiency, so any change in order is going to have implications for
packing. Good or bad, I dunno.
- There may be some reason why we did it that odd way with the object
array, that I have simply forgotten.
Anyway, now that we don't buffer up the objects before showing them
that may actually result in lower memory usage during that whole
traverse_commit_list() phase.
This is seriously not very deeply tested. It makes sense to me, it seems
to pass all the tests, it looks ok, but...
Does anybody remember why we did that "object_array" thing? It used to be
an "object_list" a long long time ago, but got changed into the array due
to better memory usage patterns (those linked lists of obejcts are
horrible from a memory allocation standpoint). But I wonder why we didn't
do this back then. Maybe there's a reason for it.
Or maybe there _used_ to be a reason, and no longer is.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-04-11 04:27:58 +04:00
|
|
|
const char *ep = strchr(name, '\n');
|
Make sure quickfetch is not fooled with a previous, incomplete fetch.
This updates git-rev-list --objects to be a bit more careful
when listing a blob object to make sure the blob actually
exists, and uses it to make sure the quick-fetch optimization we
introduced earlier is not fooled by a previous incomplete fetch.
The quick-fetch optimization works by running this command:
git rev-list --objects <<commit-list>> --not --all
where <<commit-list>> is a list of commits that we are going to
fetch from the other side. If there is any object missing to
complete the <<commit-list>>, the rev-list would fail and die
(say, the commit was in our repository, but its tree wasn't --
then it will barf while trying to list the blobs the tree
contains because it cannot read that tree).
Usually we do not have the objects (otherwise why would we
fetching?), but in one important special case we do: when the
remote repository is used as an alternate object store
(i.e. pointed by .git/objects/info/alternates). We could check
.git/objects/info/alternates to see if the remote we are
interacting with is one of them (or is used as an alternate,
recursively, by one of them), but that check is more cumbersome
than it is worth.
The above check however did not catch missing blob, because
object listing code did not read nor check blob objects, knowing
that blobs do not contain any further references to other
objects. This commit fixes it with practically unmeasurable
overhead.
I've benched this with
git rev-list --objects --all >/dev/null
in the kernel repository, with three different implementations
of the "check-blob".
- Checking with has_sha1_file() has negligible (unmeasurable)
performance penalty.
- Checking with sha1_object_info() makes it somewhat slower,
perhaps by 5%.
- Checking with read_sha1_file() to cause a fully re-validation
is prohibitively expensive (about 4 times as much runtime).
In my original patch, I had this as a command line option, but
the overhead is small enough that it is not really worth it.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-04-16 11:42:29 +04:00
|
|
|
|
show_object(): push path_name() call further down
In particular, pushing the "path_name()" call _into_ the show() function
would seem to allow
- more clarity into who "owns" the name (ie now when we free the name in
the show_object callback, it's because we generated it ourselves by
calling path_name())
- not calling path_name() at all, either because we don't care about the
name in the first place, or because we are actually happy walking the
linked list of "struct name_path *" and the last component.
Now, I didn't do that latter optimization, because it would require some
more coding, but especially looking at "builtin-pack-objects.c", we really
don't even want the whole pathname, we really would be better off with the
list of path components.
Why? We use that name for two things:
- add_preferred_base_object(), which actually _wants_ to traverse the
path, and now does it by looking for '/' characters!
- for 'name_hash()', which only cares about the last 16 characters of a
name, so again, generating the full name seems to be just unnecessary
work.
Anyway, so I didn't look any closer at those things, but it did convince
me that the "show_object()" calling convention was crazy, and we're
actually better off doing _less_ in list-objects.c, and giving people
access to the internal data structures so that they can decide whether
they want to generate a path-name or not.
This patch does that, and then for people who did use the name (even if
they might do something more clever in the future), it just does the
straightforward "name = path_name(path, component); .. free(name);" thing.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-04-11 05:15:26 +04:00
|
|
|
finish_object(obj, path, name);
|
2006-09-05 08:50:12 +04:00
|
|
|
if (ep) {
|
process_{tree,blob}: show objects without buffering
Here's a less trivial thing, and slightly more dubious one.
I was looking at that "struct object_array objects", and wondering why we
do that. I have honestly totally forgotten. Why not just call the "show()"
function as we encounter the objects? Rather than add the objects to the
object_array, and then at the very end going through the array and doing a
'show' on all, just do things more incrementally.
Now, there are possible downsides to this:
- the "buffer using object_array" _can_ in theory result in at least
better I-cache usage (two tight loops rather than one more spread out
one). I don't think this is a real issue, but in theory..
- this _does_ change the order of the objects printed. Instead of doing a
"process_tree(revs, commit->tree, &objects, NULL, "");" in the loop
over the commits (which puts all the root trees _first_ in the object
list, this patch just adds them to the list of pending objects, and
then we'll traverse them in that order (and thus show each root tree
object together with the objects we discover under it)
I _think_ the new ordering actually makes more sense, but the object
ordering is actually a subtle thing when it comes to packing
efficiency, so any change in order is going to have implications for
packing. Good or bad, I dunno.
- There may be some reason why we did it that odd way with the object
array, that I have simply forgotten.
Anyway, now that we don't buffer up the objects before showing them
that may actually result in lower memory usage during that whole
traverse_commit_list() phase.
This is seriously not very deeply tested. It makes sense to me, it seems
to pass all the tests, it looks ok, but...
Does anybody remember why we did that "object_array" thing? It used to be
an "object_list" a long long time ago, but got changed into the array due
to better memory usage patterns (those linked lists of obejcts are
horrible from a memory allocation standpoint). But I wonder why we didn't
do this back then. Maybe there's a reason for it.
Or maybe there _used_ to be a reason, and no longer is.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-04-11 04:27:58 +04:00
|
|
|
printf("%s %.*s\n", sha1_to_hex(obj->sha1),
|
|
|
|
(int) (ep - name),
|
|
|
|
name);
|
2005-06-25 09:56:58 +04:00
|
|
|
}
|
2006-09-05 08:50:12 +04:00
|
|
|
else
|
process_{tree,blob}: show objects without buffering
Here's a less trivial thing, and slightly more dubious one.
I was looking at that "struct object_array objects", and wondering why we
do that. I have honestly totally forgotten. Why not just call the "show()"
function as we encounter the objects? Rather than add the objects to the
object_array, and then at the very end going through the array and doing a
'show' on all, just do things more incrementally.
Now, there are possible downsides to this:
- the "buffer using object_array" _can_ in theory result in at least
better I-cache usage (two tight loops rather than one more spread out
one). I don't think this is a real issue, but in theory..
- this _does_ change the order of the objects printed. Instead of doing a
"process_tree(revs, commit->tree, &objects, NULL, "");" in the loop
over the commits (which puts all the root trees _first_ in the object
list, this patch just adds them to the list of pending objects, and
then we'll traverse them in that order (and thus show each root tree
object together with the objects we discover under it)
I _think_ the new ordering actually makes more sense, but the object
ordering is actually a subtle thing when it comes to packing
efficiency, so any change in order is going to have implications for
packing. Good or bad, I dunno.
- There may be some reason why we did it that odd way with the object
array, that I have simply forgotten.
Anyway, now that we don't buffer up the objects before showing them
that may actually result in lower memory usage during that whole
traverse_commit_list() phase.
This is seriously not very deeply tested. It makes sense to me, it seems
to pass all the tests, it looks ok, but...
Does anybody remember why we did that "object_array" thing? It used to be
an "object_list" a long long time ago, but got changed into the array due
to better memory usage patterns (those linked lists of obejcts are
horrible from a memory allocation standpoint). But I wonder why we didn't
do this back then. Maybe there's a reason for it.
Or maybe there _used_ to be a reason, and no longer is.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-04-11 04:27:58 +04:00
|
|
|
printf("%s %s\n", sha1_to_hex(obj->sha1), name);
|
show_object(): push path_name() call further down
In particular, pushing the "path_name()" call _into_ the show() function
would seem to allow
- more clarity into who "owns" the name (ie now when we free the name in
the show_object callback, it's because we generated it ourselves by
calling path_name())
- not calling path_name() at all, either because we don't care about the
name in the first place, or because we are actually happy walking the
linked list of "struct name_path *" and the last component.
Now, I didn't do that latter optimization, because it would require some
more coding, but especially looking at "builtin-pack-objects.c", we really
don't even want the whole pathname, we really would be better off with the
list of path components.
Why? We use that name for two things:
- add_preferred_base_object(), which actually _wants_ to traverse the
path, and now does it by looking for '/' characters!
- for 'name_hash()', which only cares about the last 16 characters of a
name, so again, generating the full name seems to be just unnecessary
work.
Anyway, so I didn't look any closer at those things, but it did convince
me that the "show_object()" calling convention was crazy, and we're
actually better off doing _less_ in list-objects.c, and giving people
access to the internal data structures so that they can decide whether
they want to generate a path-name or not.
This patch does that, and then for people who did use the name (even if
they might do something more clever in the future), it just does the
straightforward "name = path_name(path, component); .. free(name);" thing.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-04-11 05:15:26 +04:00
|
|
|
free(name);
|
2005-06-25 09:56:58 +04:00
|
|
|
}
|
|
|
|
|
2006-09-06 12:42:23 +04:00
|
|
|
static void show_edge(struct commit *commit)
|
|
|
|
{
|
|
|
|
printf("-%s\n", sha1_to_hex(commit->object.sha1));
|
|
|
|
}
|
|
|
|
|
2009-02-21 11:26:01 +03:00
|
|
|
static inline int log2i(int n)
|
|
|
|
{
|
|
|
|
int log2 = 0;
|
|
|
|
|
|
|
|
for (; n > 1; n >>= 1)
|
|
|
|
log2++;
|
|
|
|
|
|
|
|
return log2;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int exp2i(int n)
|
|
|
|
{
|
|
|
|
return 1 << n;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Estimate the number of bisect steps left (after the current step)
|
|
|
|
*
|
|
|
|
* For any x between 0 included and 2^n excluded, the probability for
|
|
|
|
* n - 1 steps left looks like:
|
|
|
|
*
|
|
|
|
* P(2^n + x) == (2^n - x) / (2^n + x)
|
|
|
|
*
|
|
|
|
* and P(2^n + x) < 0.5 means 2^n < 3x
|
|
|
|
*/
|
|
|
|
static int estimate_bisect_steps(int all)
|
|
|
|
{
|
|
|
|
int n, x, e;
|
|
|
|
|
|
|
|
if (all < 3)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
n = log2i(all);
|
|
|
|
e = exp2i(n);
|
|
|
|
x = all - e;
|
|
|
|
|
|
|
|
return (e < 3 * x) ? n : n - 1;
|
|
|
|
}
|
|
|
|
|
2009-03-30 08:59:59 +04:00
|
|
|
static void show_tried_revs(struct commit_list *tried, int stringed)
|
2009-03-26 07:55:49 +03:00
|
|
|
{
|
|
|
|
printf("bisect_tried='");
|
|
|
|
for (;tried; tried = tried->next) {
|
|
|
|
char *format = tried->next ? "%s|" : "%s";
|
|
|
|
printf(format, sha1_to_hex(tried->item->object.sha1));
|
|
|
|
}
|
2009-03-30 08:59:59 +04:00
|
|
|
printf(stringed ? "' &&\n" : "'\n");
|
2009-03-26 07:55:49 +03:00
|
|
|
}
|
|
|
|
|
2009-04-07 07:08:42 +04:00
|
|
|
int show_bisect_vars(struct rev_list_info *info, int reaches, int all)
|
2009-03-26 07:55:30 +03:00
|
|
|
{
|
2009-04-07 07:08:42 +04:00
|
|
|
int cnt, flags = info->bisect_show_flags;
|
2009-03-30 08:59:59 +04:00
|
|
|
char hex[41] = "", *format;
|
2009-03-26 07:55:49 +03:00
|
|
|
struct commit_list *tried;
|
2009-04-07 00:28:00 +04:00
|
|
|
struct rev_info *revs = info->revs;
|
2009-03-26 07:55:30 +03:00
|
|
|
|
2009-03-29 13:55:43 +04:00
|
|
|
if (!revs->commits && !(flags & BISECT_SHOW_TRIED))
|
2009-03-26 07:55:30 +03:00
|
|
|
return 1;
|
|
|
|
|
2009-03-29 13:55:43 +04:00
|
|
|
revs->commits = filter_skipped(revs->commits, &tried, flags & BISECT_SHOW_ALL);
|
2009-03-26 07:55:49 +03:00
|
|
|
|
2009-03-26 07:55:30 +03:00
|
|
|
/*
|
2009-03-26 07:55:41 +03:00
|
|
|
* revs->commits can reach "reaches" commits among
|
2009-03-26 07:55:30 +03:00
|
|
|
* "all" commits. If it is good, then there are
|
|
|
|
* (all-reaches) commits left to be bisected.
|
|
|
|
* On the other hand, if it is bad, then the set
|
|
|
|
* to bisect is "reaches".
|
|
|
|
* A bisect set of size N has (N-1) commits further
|
|
|
|
* to test, as we already know one bad one.
|
|
|
|
*/
|
|
|
|
cnt = all - reaches;
|
|
|
|
if (cnt < reaches)
|
|
|
|
cnt = reaches;
|
2009-03-26 07:55:35 +03:00
|
|
|
|
2009-03-26 07:55:49 +03:00
|
|
|
if (revs->commits)
|
|
|
|
strcpy(hex, sha1_to_hex(revs->commits->item->object.sha1));
|
2009-03-26 07:55:30 +03:00
|
|
|
|
2009-03-29 13:55:43 +04:00
|
|
|
if (flags & BISECT_SHOW_ALL) {
|
2009-04-07 00:28:00 +04:00
|
|
|
traverse_commit_list(revs, show_commit, show_object, info);
|
2009-03-26 07:55:30 +03:00
|
|
|
printf("------\n");
|
|
|
|
}
|
|
|
|
|
2009-03-29 13:55:43 +04:00
|
|
|
if (flags & BISECT_SHOW_TRIED)
|
2009-03-30 08:59:59 +04:00
|
|
|
show_tried_revs(tried, flags & BISECT_SHOW_STRINGED);
|
|
|
|
format = (flags & BISECT_SHOW_STRINGED) ?
|
|
|
|
"bisect_rev=%s &&\n"
|
|
|
|
"bisect_nr=%d &&\n"
|
|
|
|
"bisect_good=%d &&\n"
|
|
|
|
"bisect_bad=%d &&\n"
|
|
|
|
"bisect_all=%d &&\n"
|
|
|
|
"bisect_steps=%d\n"
|
|
|
|
:
|
|
|
|
"bisect_rev=%s\n"
|
|
|
|
"bisect_nr=%d\n"
|
|
|
|
"bisect_good=%d\n"
|
|
|
|
"bisect_bad=%d\n"
|
|
|
|
"bisect_all=%d\n"
|
|
|
|
"bisect_steps=%d\n";
|
|
|
|
printf(format,
|
2009-03-26 07:55:30 +03:00
|
|
|
hex,
|
|
|
|
cnt - 1,
|
|
|
|
all - reaches - 1,
|
|
|
|
reaches - 1,
|
|
|
|
all,
|
|
|
|
estimate_bisect_steps(all));
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2006-07-29 09:44:25 +04:00
|
|
|
int cmd_rev_list(int argc, const char **argv, const char *prefix)
|
2005-04-24 06:04:40 +04:00
|
|
|
{
|
2009-04-06 23:28:36 +04:00
|
|
|
struct rev_info revs;
|
2009-04-07 00:28:00 +04:00
|
|
|
struct rev_list_info info;
|
2006-02-27 19:54:36 +03:00
|
|
|
int i;
|
2006-09-06 08:39:02 +04:00
|
|
|
int read_from_stdin = 0;
|
2009-03-26 07:55:17 +03:00
|
|
|
int bisect_list = 0;
|
2007-03-22 08:15:54 +03:00
|
|
|
int bisect_show_vars = 0;
|
2007-10-22 09:47:56 +04:00
|
|
|
int bisect_find_all = 0;
|
2007-11-11 10:29:41 +03:00
|
|
|
int quiet = 0;
|
2005-04-24 06:04:40 +04:00
|
|
|
|
2008-05-14 21:46:53 +04:00
|
|
|
git_config(git_default_config, NULL);
|
2006-07-29 09:44:25 +04:00
|
|
|
init_revisions(&revs, prefix);
|
2006-04-16 10:48:27 +04:00
|
|
|
revs.abbrev = 0;
|
|
|
|
revs.commit_format = CMIT_FMT_UNSPECIFIED;
|
2006-02-28 22:24:00 +03:00
|
|
|
argc = setup_revisions(argc, argv, &revs, NULL);
|
2006-02-26 03:19:46 +03:00
|
|
|
|
2009-04-07 00:28:00 +04:00
|
|
|
memset(&info, 0, sizeof(info));
|
|
|
|
info.revs = &revs;
|
|
|
|
|
2008-07-18 09:39:09 +04:00
|
|
|
quiet = DIFF_OPT_TST(&revs.diffopt, QUIET);
|
2005-05-06 12:00:11 +04:00
|
|
|
for (i = 1 ; i < argc; i++) {
|
2005-10-21 08:25:09 +04:00
|
|
|
const char *arg = argv[i];
|
2005-05-06 12:00:11 +04:00
|
|
|
|
2005-05-26 05:29:09 +04:00
|
|
|
if (!strcmp(arg, "--header")) {
|
2006-04-16 10:48:27 +04:00
|
|
|
revs.verbose_header = 1;
|
2005-06-01 19:42:22 +04:00
|
|
|
continue;
|
|
|
|
}
|
2006-03-22 11:22:00 +03:00
|
|
|
if (!strcmp(arg, "--timestamp")) {
|
2009-04-07 00:28:00 +04:00
|
|
|
info.show_timestamp = 1;
|
2006-03-22 11:22:00 +03:00
|
|
|
continue;
|
|
|
|
}
|
2005-06-18 09:54:50 +04:00
|
|
|
if (!strcmp(arg, "--bisect")) {
|
|
|
|
bisect_list = 1;
|
|
|
|
continue;
|
|
|
|
}
|
2007-10-22 09:47:56 +04:00
|
|
|
if (!strcmp(arg, "--bisect-all")) {
|
|
|
|
bisect_list = 1;
|
|
|
|
bisect_find_all = 1;
|
2009-04-07 07:08:42 +04:00
|
|
|
info.bisect_show_flags = BISECT_SHOW_ALL;
|
2009-02-08 17:54:47 +03:00
|
|
|
revs.show_decorations = 1;
|
2007-10-22 09:47:56 +04:00
|
|
|
continue;
|
|
|
|
}
|
2007-03-22 08:15:54 +03:00
|
|
|
if (!strcmp(arg, "--bisect-vars")) {
|
|
|
|
bisect_list = 1;
|
|
|
|
bisect_show_vars = 1;
|
|
|
|
continue;
|
|
|
|
}
|
2006-09-06 08:39:02 +04:00
|
|
|
if (!strcmp(arg, "--stdin")) {
|
|
|
|
if (read_from_stdin++)
|
|
|
|
die("--stdin given twice?");
|
|
|
|
read_revisions_from_stdin(&revs);
|
|
|
|
continue;
|
|
|
|
}
|
2006-02-26 03:19:46 +03:00
|
|
|
usage(rev_list_usage);
|
2005-05-26 05:29:09 +04:00
|
|
|
|
2005-05-06 12:00:11 +04:00
|
|
|
}
|
2006-04-16 10:48:27 +04:00
|
|
|
if (revs.commit_format != CMIT_FMT_UNSPECIFIED) {
|
|
|
|
/* The command line has a --pretty */
|
2009-04-07 00:28:00 +04:00
|
|
|
info.hdr_termination = '\n';
|
2006-04-16 10:48:27 +04:00
|
|
|
if (revs.commit_format == CMIT_FMT_ONELINE)
|
2009-04-07 00:28:00 +04:00
|
|
|
info.header_prefix = "";
|
2006-04-16 10:48:27 +04:00
|
|
|
else
|
2009-04-07 00:28:00 +04:00
|
|
|
info.header_prefix = "commit ";
|
2006-04-16 10:48:27 +04:00
|
|
|
}
|
2006-04-17 23:42:36 +04:00
|
|
|
else if (revs.verbose_header)
|
|
|
|
/* Only --header was specified */
|
|
|
|
revs.commit_format = CMIT_FMT_RAW;
|
2005-05-06 12:00:11 +04:00
|
|
|
|
2009-04-07 00:28:00 +04:00
|
|
|
if ((!revs.commits &&
|
2006-04-15 09:43:34 +04:00
|
|
|
(!(revs.tag_objects||revs.tree_objects||revs.blob_objects) &&
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 04:42:35 +04:00
|
|
|
!revs.pending.nr)) ||
|
2006-04-15 09:43:34 +04:00
|
|
|
revs.diff)
|
2005-10-26 02:24:55 +04:00
|
|
|
usage(rev_list_usage);
|
|
|
|
|
2008-08-25 10:15:05 +04:00
|
|
|
save_commit_buffer = revs.verbose_header ||
|
|
|
|
revs.grep_filter.pattern_list;
|
rev-list --bisect: limit list before bisecting.
I noticed bisect does not work well without both good and bad.
Running this script in git.git repository would give you quite
different results:
#!/bin/sh
initial=e83c5163316f89bfbde7d9ab23ca2e25604af290
mid0=`git rev-list --bisect ^$initial --all`
git rev-list $mid0 | wc -l
git rev-list ^$mid0 --all | wc -l
mid1=`git rev-list --bisect --all`
git rev-list $mid1 | wc -l
git rev-list ^$mid1 --all | wc -l
The $initial commit is the very first commit you made. The
first midpoint bisects things evenly as designed, but the latter
does not.
The reason I got interested in this was because I was wondering
if something like the following would help people converting a
huge repository from foreign SCM, or preparing a repository to
be fetched over plain dumb HTTP only:
#!/bin/sh
N=4
P=.git/objects/pack
bottom=
while test 0 \< $N
do
N=$((N-1))
if test -z "$bottom"
then
newbottom=`git rev-list --bisect --all`
else
newbottom=`git rev-list --bisect ^$bottom --all`
fi
if test -z "$bottom"
then
rev_list="$newbottom"
elif test 0 = $N
then
rev_list="^$bottom --all"
else
rev_list="^$bottom $newbottom"
fi
p=$(git rev-list --unpacked --objects $rev_list |
git pack-objects $P/pack)
git show-index <$P/pack-$p.idx | wc -l
bottom=$newbottom
done
The idea is to pack older half of the history to one pack, then
older half of the remaining history to another, to continue a
few times, using finer granularity as we get closer to the tip.
This may not matter, since for a truly huge history, running
bisect number of times could be quite time consuming, and we
might be better off running "git rev-list --all" once into a
temporary file, and manually pick cut-off points from the
resulting list of commits. After all we are talking about
"approximately half" for such an usage, and older history does
not matter much.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-15 02:57:32 +04:00
|
|
|
if (bisect_list)
|
|
|
|
revs.limited = 1;
|
2006-03-29 05:28:04 +04:00
|
|
|
|
2008-02-18 10:31:56 +03:00
|
|
|
if (prepare_revision_walk(&revs))
|
|
|
|
die("revision walk setup failed");
|
2006-02-28 22:24:00 +03:00
|
|
|
if (revs.tree_objects)
|
2006-09-06 12:42:23 +04:00
|
|
|
mark_edges_uninteresting(revs.commits, &revs, show_edge);
|
2006-02-28 22:24:00 +03:00
|
|
|
|
2007-03-22 08:15:54 +03:00
|
|
|
if (bisect_list) {
|
|
|
|
int reaches = reaches, all = all;
|
|
|
|
|
2007-10-22 09:47:56 +04:00
|
|
|
revs.commits = find_bisection(revs.commits, &reaches, &all,
|
|
|
|
bisect_find_all);
|
2009-03-26 07:55:49 +03:00
|
|
|
|
2009-03-26 07:55:30 +03:00
|
|
|
if (bisect_show_vars)
|
2009-04-07 07:08:42 +04:00
|
|
|
return show_bisect_vars(&info, reaches, all);
|
2007-03-22 08:15:54 +03:00
|
|
|
}
|
2005-10-26 02:24:55 +04:00
|
|
|
|
2007-11-11 10:29:41 +03:00
|
|
|
traverse_commit_list(&revs,
|
2009-04-06 23:28:36 +04:00
|
|
|
quiet ? finish_commit : show_commit,
|
|
|
|
quiet ? finish_object : show_object,
|
2009-04-07 00:28:00 +04:00
|
|
|
&info);
|
2005-05-31 05:46:32 +04:00
|
|
|
|
2005-04-24 06:04:40 +04:00
|
|
|
return 0;
|
|
|
|
}
|