2005-04-24 06:04:40 +04:00
|
|
|
#include "cache.h"
|
2017-06-14 21:07:36 +03:00
|
|
|
#include "config.h"
|
2005-04-24 06:04:40 +04:00
|
|
|
#include "commit.h"
|
2006-04-11 05:14:54 +04:00
|
|
|
#include "diff.h"
|
2006-02-26 03:19:46 +03:00
|
|
|
#include "revision.h"
|
2006-09-05 08:50:12 +04:00
|
|
|
#include "list-objects.h"
|
2017-11-21 23:58:51 +03:00
|
|
|
#include "list-objects-filter.h"
|
|
|
|
#include "list-objects-filter-options.h"
|
2018-10-06 00:31:23 +03:00
|
|
|
#include "object.h"
|
2018-05-16 02:42:15 +03:00
|
|
|
#include "object-store.h"
|
rev-list: add bitmap mode to speed up object lists
The bitmap reachability index used to speed up the counting objects
phase during `pack-objects` can also be used to optimize a normal
rev-list if the only thing required are the SHA1s of the objects during
the list (i.e., not the path names at which trees and blobs were found).
Calling `git rev-list --objects --use-bitmap-index [committish]` will
perform an object iteration based on a bitmap result instead of actually
walking the object graph.
These are some example timings for `torvalds/linux` (warm cache,
best-of-five):
$ time git rev-list --objects master > /dev/null
real 0m34.191s
user 0m33.904s
sys 0m0.268s
$ time git rev-list --objects --use-bitmap-index master > /dev/null
real 0m1.041s
user 0m0.976s
sys 0m0.064s
Likewise, using `git rev-list --count --use-bitmap-index` will speed up
the counting operation by building the resulting bitmap and performing a
fast popcount (number of bits set on the bitmap) on the result.
Here are some sample timings of different ways to count commits in
`torvalds/linux`:
$ time git rev-list master | wc -l
399882
real 0m6.524s
user 0m6.060s
sys 0m3.284s
$ time git rev-list --count master
399882
real 0m4.318s
user 0m4.236s
sys 0m0.076s
$ time git rev-list --use-bitmap-index --count master
399882
real 0m0.217s
user 0m0.176s
sys 0m0.040s
This also respects negative refs, so you can use it to count
a slice of history:
$ time git rev-list --count v3.0..master
144843
real 0m1.971s
user 0m1.932s
sys 0m0.036s
$ time git rev-list --use-bitmap-index --count v3.0..master
real 0m0.280s
user 0m0.220s
sys 0m0.056s
Though note that the closer the endpoints, the less it helps. In the
traversal case, we have fewer commits to cross, so we take less time.
But the bitmap time is dominated by generating the pack revindex, which
is constant with respect to the refs given.
Note that you cannot yet get a fast --left-right count of a symmetric
difference (e.g., "--count --left-right master...topic"). The slow part
of that walk actually happens during the merge-base determination when
we parse "master...topic". Even though a count does not actually need to
know the real merge base (it only needs to take the symmetric difference
of the bitmaps), the revision code would require some refactoring to
handle this case.
Additionally, a `--test-bitmap` flag has been added that will perform
the same rev-list manually (i.e. using a normal revwalk) and using
bitmaps, and verify that the results are the same. This can be used to
exercise the bitmap code, and also to verify that the contents of the
.bitmap file are sane.
Signed-off-by: Vicent Marti <tanoku@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-21 18:00:12 +04:00
|
|
|
#include "pack.h"
|
|
|
|
#include "pack-bitmap.h"
|
2006-05-19 01:19:20 +04:00
|
|
|
#include "builtin.h"
|
2007-10-22 09:47:56 +04:00
|
|
|
#include "log-tree.h"
|
2008-05-04 14:36:54 +04:00
|
|
|
#include "graph.h"
|
2009-03-26 07:55:24 +03:00
|
|
|
#include "bisect.h"
|
2016-07-20 16:28:09 +03:00
|
|
|
#include "progress.h"
|
2017-07-07 12:08:30 +03:00
|
|
|
#include "reflog-walk.h"
|
2017-11-21 23:58:51 +03:00
|
|
|
#include "oidset.h"
|
2017-12-08 18:27:15 +03:00
|
|
|
#include "packfile.h"
|
2018-06-07 22:04:13 +03:00
|
|
|
#include "object-store.h"
|
2005-05-31 05:46:32 +04:00
|
|
|
|
2005-05-26 05:29:09 +04:00
|
|
|
static const char rev_list_usage[] =
|
2008-07-13 17:36:15 +04:00
|
|
|
"git rev-list [OPTION] <commit-id>... [ -- paths... ]\n"
|
2005-10-30 12:03:45 +03:00
|
|
|
" limiting output:\n"
|
2010-10-08 21:31:15 +04:00
|
|
|
" --max-count=<n>\n"
|
|
|
|
" --max-age=<epoch>\n"
|
|
|
|
" --min-age=<epoch>\n"
|
2005-10-30 12:03:45 +03:00
|
|
|
" --sparse\n"
|
|
|
|
" --no-merges\n"
|
2011-03-21 13:14:06 +03:00
|
|
|
" --min-parents=<n>\n"
|
|
|
|
" --no-min-parents\n"
|
|
|
|
" --max-parents=<n>\n"
|
|
|
|
" --no-max-parents\n"
|
2006-01-27 12:39:24 +03:00
|
|
|
" --remove-empty\n"
|
2005-10-30 12:03:45 +03:00
|
|
|
" --all\n"
|
2008-02-28 10:24:25 +03:00
|
|
|
" --branches\n"
|
|
|
|
" --tags\n"
|
|
|
|
" --remotes\n"
|
2006-09-06 08:39:02 +04:00
|
|
|
" --stdin\n"
|
2007-11-11 10:29:41 +03:00
|
|
|
" --quiet\n"
|
2005-10-30 12:03:45 +03:00
|
|
|
" ordering output:\n"
|
|
|
|
" --topo-order\n"
|
2006-02-16 09:05:33 +03:00
|
|
|
" --date-order\n"
|
2008-03-19 09:16:28 +03:00
|
|
|
" --reverse\n"
|
2005-10-30 12:03:45 +03:00
|
|
|
" formatting output:\n"
|
|
|
|
" --parents\n"
|
2008-04-04 10:01:47 +04:00
|
|
|
" --children\n"
|
2006-02-19 14:32:31 +03:00
|
|
|
" --objects | --objects-edge\n"
|
2005-10-30 12:03:45 +03:00
|
|
|
" --unpacked\n"
|
|
|
|
" --header | --pretty\n"
|
2010-10-08 21:31:15 +04:00
|
|
|
" --abbrev=<n> | --no-abbrev\n"
|
2006-04-07 08:32:36 +04:00
|
|
|
" --abbrev-commit\n"
|
2007-04-05 18:53:07 +04:00
|
|
|
" --left-right\n"
|
2015-07-01 12:24:11 +03:00
|
|
|
" --count\n"
|
2005-10-30 12:03:45 +03:00
|
|
|
" special purpose:\n"
|
2007-03-22 08:15:54 +03:00
|
|
|
" --bisect\n"
|
2007-10-22 09:47:56 +04:00
|
|
|
" --bisect-vars\n"
|
|
|
|
" --bisect-all"
|
2005-10-30 12:03:45 +03:00
|
|
|
;
|
2005-05-26 05:29:09 +04:00
|
|
|
|
2016-07-20 16:28:09 +03:00
|
|
|
static struct progress *progress;
|
|
|
|
static unsigned progress_counter;
|
|
|
|
|
2017-11-21 23:58:51 +03:00
|
|
|
static struct list_objects_filter_options filter_options;
|
|
|
|
static struct oidset omitted_objects;
|
|
|
|
static int arg_print_omitted; /* print objects omitted by filter */
|
|
|
|
|
|
|
|
static struct oidset missing_objects;
|
|
|
|
enum missing_action {
|
|
|
|
MA_ERROR = 0, /* fail if any missing objects are encountered */
|
|
|
|
MA_ALLOW_ANY, /* silently allow ALL missing objects */
|
|
|
|
MA_PRINT, /* print ALL missing objects in special section */
|
2017-12-08 18:27:15 +03:00
|
|
|
MA_ALLOW_PROMISOR, /* silently allow all missing PROMISOR objects */
|
2017-11-21 23:58:51 +03:00
|
|
|
};
|
|
|
|
static enum missing_action arg_missing_action;
|
|
|
|
|
|
|
|
#define DEFAULT_OIDSET_SIZE (16*1024)
|
|
|
|
|
2019-05-10 00:32:03 +03:00
|
|
|
static void finish_commit(struct commit *commit);
|
2009-04-06 23:28:36 +04:00
|
|
|
static void show_commit(struct commit *commit, void *data)
|
2005-06-02 20:19:53 +04:00
|
|
|
{
|
2009-04-07 00:28:00 +04:00
|
|
|
struct rev_list_info *info = data;
|
|
|
|
struct rev_info *revs = info->revs;
|
2009-04-06 23:28:36 +04:00
|
|
|
|
2016-07-20 16:28:09 +03:00
|
|
|
display_progress(progress, ++progress_counter);
|
|
|
|
|
2012-02-28 18:00:00 +04:00
|
|
|
if (info->flags & REV_LIST_QUIET) {
|
2019-05-10 00:32:03 +03:00
|
|
|
finish_commit(commit);
|
2012-02-28 18:00:00 +04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2009-04-06 23:28:36 +04:00
|
|
|
graph_show_commit(revs->graph);
|
2008-05-04 14:36:54 +04:00
|
|
|
|
2010-06-10 15:47:23 +04:00
|
|
|
if (revs->count) {
|
2011-04-26 12:24:29 +04:00
|
|
|
if (commit->object.flags & PATCHSAME)
|
|
|
|
revs->count_same++;
|
|
|
|
else if (commit->object.flags & SYMMETRIC_LEFT)
|
2010-06-10 15:47:23 +04:00
|
|
|
revs->count_left++;
|
|
|
|
else
|
|
|
|
revs->count_right++;
|
2019-05-10 00:32:03 +03:00
|
|
|
finish_commit(commit);
|
2010-06-10 15:47:23 +04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2009-04-07 00:28:00 +04:00
|
|
|
if (info->show_timestamp)
|
2017-04-21 13:45:48 +03:00
|
|
|
printf("%"PRItime" ", commit->date);
|
2009-04-07 00:28:00 +04:00
|
|
|
if (info->header_prefix)
|
|
|
|
fputs(info->header_prefix, stdout);
|
2008-05-25 11:07:21 +04:00
|
|
|
|
2011-03-07 15:31:39 +03:00
|
|
|
if (!revs->graph)
|
|
|
|
fputs(get_revision_mark(revs, commit), stdout);
|
2009-04-06 23:28:36 +04:00
|
|
|
if (revs->abbrev_commit && revs->abbrev)
|
2018-03-12 05:27:30 +03:00
|
|
|
fputs(find_unique_abbrev(&commit->object.oid, revs->abbrev),
|
2006-04-16 10:48:27 +04:00
|
|
|
stdout);
|
2006-04-07 08:32:36 +04:00
|
|
|
else
|
2015-11-10 05:22:28 +03:00
|
|
|
fputs(oid_to_hex(&commit->object.oid), stdout);
|
2009-04-06 23:28:36 +04:00
|
|
|
if (revs->print_parents) {
|
2005-06-02 20:19:53 +04:00
|
|
|
struct commit_list *parents = commit->parents;
|
|
|
|
while (parents) {
|
2015-11-10 05:22:28 +03:00
|
|
|
printf(" %s", oid_to_hex(&parents->item->object.oid));
|
2005-06-02 20:19:53 +04:00
|
|
|
parents = parents->next;
|
|
|
|
}
|
|
|
|
}
|
2009-04-06 23:28:36 +04:00
|
|
|
if (revs->children.name) {
|
2008-04-04 10:01:47 +04:00
|
|
|
struct commit_list *children;
|
|
|
|
|
2009-04-06 23:28:36 +04:00
|
|
|
children = lookup_decoration(&revs->children, &commit->object);
|
2008-04-04 10:01:47 +04:00
|
|
|
while (children) {
|
2015-11-10 05:22:28 +03:00
|
|
|
printf(" %s", oid_to_hex(&children->item->object.oid));
|
2008-04-04 10:01:47 +04:00
|
|
|
children = children->next;
|
|
|
|
}
|
|
|
|
}
|
2009-04-06 23:28:36 +04:00
|
|
|
show_decorations(revs, commit);
|
|
|
|
if (revs->commit_format == CMIT_FMT_ONELINE)
|
2005-08-09 09:15:40 +04:00
|
|
|
putchar(' ');
|
|
|
|
else
|
|
|
|
putchar('\n');
|
|
|
|
|
commit: drop uses of get_cached_commit_buffer()
The "--show-all" revision option shows UNINTERESTING
commits. Some of these commits may be unparsed when we try
to show them (since we may or may not need to walk their
parents to fulfill the request).
Commit 3131b71301 (Add "--show-all" revision walker flag for
debugging, 2008-02-09) resolved this by just skipping
pretty-printing for commits without their object contents
cached, saying:
Because we now end up listing commits we may not even have been parsed
at all "show_log" and "show_commit" need to protect against commits
that don't have a commit buffer entry.
That was the easy fix to avoid the pretty-printer segfaulting,
but:
1. It doesn't work for all formats. E.g., --oneline
prints the oid for each such commit but not a trailing
newline, leading to jumbled output.
2. It only affects some commits, depending on whether we
happened to parse them or not (so if they were at the
tip of an UNINTERESTING starting point, or if we
happened to traverse over them, you'd see more data).
3. It unncessarily ties the decision to show the verbose
header to whether the commit buffer was cached. That
makes it harder to change the logic around caching
(e.g., if we could traverse without actually loading
the full commit objects).
These days it's safe to feed such a commit to the
pretty-print code. Since be5c9fb904 (logmsg_reencode: lazily
load missing commit buffers, 2013-01-26), we'll load it on
demand in such a case. So let's just always show the verbose
headers.
This does change the behavior of plumbing, but:
a. The --show-all option was explicitly introduced as a
debugging aid, and was never documented (and has rarely
even been mentioned on the list by git devs).
b. Avoiding the commits was already not deterministic due
to (2) above. So the caller might have seen full
headers for these commits anyway, and would need to be
prepared for it.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-02-22 02:13:38 +03:00
|
|
|
if (revs->verbose_header) {
|
2008-10-09 23:12:12 +04:00
|
|
|
struct strbuf buf = STRBUF_INIT;
|
2009-10-19 19:48:08 +04:00
|
|
|
struct pretty_print_context ctx = {0};
|
|
|
|
ctx.abbrev = revs->abbrev;
|
|
|
|
ctx.date_mode = revs->date_mode;
|
log: respect date_mode_explicit with --format:%gd
When we show a reflog selector (e.g., via "git log -g"), we
perform some DWIM magic: while we normally show the entry's
index (e.g., HEAD@{1}), if the user has given us a date
with "--date", then we show a date-based select (e.g.,
HEAD@{yesterday}).
However, we don't want to trigger this magic if the
alternate date format we got was from the "log.date"
configuration; that is not sufficiently strong context for
us to invoke this particular magic. To fix this, commit
f4ea32f (improve reflog date/number heuristic, 2009-09-24)
introduced a "date_mode_explicit" flag in rev_info. This
flag is set only when we see a "--date" option on the
command line, and we a vanilla date to the reflog code if
the date was not explicit.
Later, commit 8f8f547 (Introduce new pretty formats %g[sdD]
for reflog information, 2009-10-19) added another way to
show selectors, and it did not respect the date_mode_explicit
flag from f4ea32f.
This patch propagates the date_mode_explicit flag to the
pretty-print code, which can then use it to pass the
appropriate date field to the reflog code. This brings the
behavior of "%gd" in line with the other formats, and means
that its output is independent of any user configuration.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-05-04 09:25:18 +04:00
|
|
|
ctx.date_mode_explicit = revs->date_mode_explicit;
|
2011-05-27 02:27:49 +04:00
|
|
|
ctx.fmt = revs->commit_format;
|
2013-06-26 14:19:50 +04:00
|
|
|
ctx.output_encoding = get_log_output_encoding();
|
2017-07-13 18:07:30 +03:00
|
|
|
ctx.color = revs->diffopt.use_color;
|
2011-05-27 02:27:49 +04:00
|
|
|
pretty_print_commit(&ctx, commit, &buf);
|
2016-09-01 02:27:20 +03:00
|
|
|
if (buf.len) {
|
|
|
|
if (revs->commit_format != CMIT_FMT_ONELINE)
|
|
|
|
graph_show_oneline(revs->graph);
|
2008-05-04 14:36:54 +04:00
|
|
|
|
2016-09-01 02:27:20 +03:00
|
|
|
graph_show_commit_msg(revs->graph, stdout, &buf);
|
2008-05-04 14:36:54 +04:00
|
|
|
|
2016-09-01 02:27:20 +03:00
|
|
|
/*
|
|
|
|
* Add a newline after the commit message.
|
|
|
|
*
|
|
|
|
* Usually, this newline produces a blank
|
|
|
|
* padding line between entries, in which case
|
|
|
|
* we need to add graph padding on this line.
|
|
|
|
*
|
|
|
|
* However, the commit message may not end in a
|
|
|
|
* newline. In this case the newline simply
|
|
|
|
* ends the last line of the commit message,
|
|
|
|
* and we don't need any graph output. (This
|
|
|
|
* always happens with CMIT_FMT_ONELINE, and it
|
|
|
|
* happens with CMIT_FMT_USERFORMAT when the
|
|
|
|
* format doesn't explicitly end in a newline.)
|
|
|
|
*/
|
|
|
|
if (buf.len && buf.buf[buf.len - 1] == '\n')
|
|
|
|
graph_show_padding(revs->graph);
|
2016-10-20 23:41:00 +03:00
|
|
|
putchar(info->hdr_termination);
|
2008-05-04 14:36:54 +04:00
|
|
|
} else {
|
2016-09-01 02:27:20 +03:00
|
|
|
/*
|
|
|
|
* If the message buffer is empty, just show
|
|
|
|
* the rest of the graph output for this
|
|
|
|
* commit.
|
|
|
|
*/
|
|
|
|
if (graph_show_remainder(revs->graph))
|
|
|
|
putchar('\n');
|
|
|
|
if (revs->commit_format == CMIT_FMT_ONELINE)
|
|
|
|
putchar('\n');
|
2008-05-04 14:36:54 +04:00
|
|
|
}
|
2007-09-10 14:35:06 +04:00
|
|
|
strbuf_release(&buf);
|
2008-05-04 14:36:54 +04:00
|
|
|
} else {
|
2009-04-06 23:28:36 +04:00
|
|
|
if (graph_show_remainder(revs->graph))
|
2008-05-04 14:36:54 +04:00
|
|
|
putchar('\n');
|
2005-07-05 03:36:48 +04:00
|
|
|
}
|
2007-06-29 21:40:46 +04:00
|
|
|
maybe_flush_or_die(stdout, "stdout");
|
2019-05-10 00:32:03 +03:00
|
|
|
finish_commit(commit);
|
2007-11-11 10:29:41 +03:00
|
|
|
}
|
|
|
|
|
2019-05-10 00:32:03 +03:00
|
|
|
static void finish_commit(struct commit *commit)
|
2007-11-11 10:29:41 +03:00
|
|
|
{
|
2006-06-18 05:47:58 +04:00
|
|
|
if (commit->parents) {
|
|
|
|
free_commit_list(commit->parents);
|
|
|
|
commit->parents = NULL;
|
|
|
|
}
|
2018-12-15 03:09:40 +03:00
|
|
|
free_commit_buffer(the_repository->parsed_objects,
|
|
|
|
commit);
|
2005-06-06 19:39:40 +04:00
|
|
|
}
|
|
|
|
|
2017-11-21 23:58:51 +03:00
|
|
|
static inline void finish_object__ma(struct object *obj)
|
|
|
|
{
|
2017-12-08 18:27:15 +03:00
|
|
|
/*
|
|
|
|
* Whether or not we try to dynamically fetch missing objects
|
|
|
|
* from the server, we currently DO NOT have the object. We
|
|
|
|
* can either print, allow (ignore), or conditionally allow
|
|
|
|
* (ignore) them.
|
|
|
|
*/
|
2017-11-21 23:58:51 +03:00
|
|
|
switch (arg_missing_action) {
|
|
|
|
case MA_ERROR:
|
2018-10-06 00:31:23 +03:00
|
|
|
die("missing %s object '%s'",
|
|
|
|
type_name(obj->type), oid_to_hex(&obj->oid));
|
2017-11-21 23:58:51 +03:00
|
|
|
return;
|
|
|
|
|
|
|
|
case MA_ALLOW_ANY:
|
|
|
|
return;
|
|
|
|
|
|
|
|
case MA_PRINT:
|
|
|
|
oidset_insert(&missing_objects, &obj->oid);
|
|
|
|
return;
|
|
|
|
|
2017-12-08 18:27:15 +03:00
|
|
|
case MA_ALLOW_PROMISOR:
|
|
|
|
if (is_promisor_object(&obj->oid))
|
|
|
|
return;
|
2018-10-06 00:31:23 +03:00
|
|
|
die("unexpected missing %s object '%s'",
|
|
|
|
type_name(obj->type), oid_to_hex(&obj->oid));
|
2017-12-08 18:27:15 +03:00
|
|
|
return;
|
|
|
|
|
2017-11-21 23:58:51 +03:00
|
|
|
default:
|
|
|
|
BUG("unhandled missing_action");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-12-08 18:27:15 +03:00
|
|
|
static int finish_object(struct object *obj, const char *name, void *cb_data)
|
2007-11-11 10:29:41 +03:00
|
|
|
{
|
2012-02-28 18:00:00 +04:00
|
|
|
struct rev_list_info *info = cb_data;
|
rev-list: allow cached objects in existence check
This fixes a regression in 7c0fe330d5 (rev-list: handle missing tree
objects properly, 2018-10-05) where rev-list will now complain about the
empty tree when it doesn't physically exist on disk.
Before that commit, we relied on the traversal code in list-objects.c to
walk through the trees. Since it uses parse_tree(), we'd do a normal
object lookup that includes looking in the set of "cached" objects
(which is where our magic internal empty-tree kicks in).
After that commit, we instead tell list-objects.c not to die on any
missing trees, and we check them ourselves using has_object_file(). But
that function uses OBJECT_INFO_SKIP_CACHED, which means we won't use our
internal empty tree.
This normally wouldn't come up. For most operations, Git will try to
write out the empty tree object as it would any other object. And
pack-objects in a push or fetch will send the empty tree (even if it's
virtual on the sending side). However, there are cases where this can
matter. One I found in the wild:
1. The root tree of a commit became empty by deleting all files,
without using an index. In this case it was done using libgit2's
tree builder API, but as the included test shows, it can easily be
done with regular git using hash-object.
The resulting repo works OK, as we'd avoid walking over our own
reachable commits for a connectivity check.
2. Cloning with --reference pointing to the repository from (1) can
trigger the problem, because we tell the other side we already have
that commit (and hence the empty tree), but then walk over it
during the connectivity check (where we complain about it missing).
Arguably the workflow in step (1) should be more careful about writing
the empty tree object if we're referencing it. But this workflow did
work prior to 7c0fe330d5, so let's restore it.
This patch makes the minimal fix, which is to swap out a direct call to
oid_object_info_extended(), minus the SKIP_CACHED flag, instead of
calling has_object_file(). This is all that has_object_file() is doing
under the hood. And there's little danger of unrelated fallout from
other unexpected "cached" objects, since there's only one call site that
ends such a cached object, and it's in git-blame.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-04 20:40:54 +03:00
|
|
|
if (oid_object_info_extended(the_repository, &obj->oid, NULL, 0) < 0) {
|
2017-11-21 23:58:51 +03:00
|
|
|
finish_object__ma(obj);
|
2017-12-08 18:27:15 +03:00
|
|
|
return 1;
|
|
|
|
}
|
2012-02-28 18:00:00 +04:00
|
|
|
if (info->revs->verify_objects && !obj->parsed && obj->type != OBJ_COMMIT)
|
2018-06-29 04:21:51 +03:00
|
|
|
parse_object(the_repository, &obj->oid);
|
2017-12-08 18:27:15 +03:00
|
|
|
return 0;
|
2007-11-11 10:29:41 +03:00
|
|
|
}
|
|
|
|
|
list-objects: pass full pathname to callbacks
When we find a blob at "a/b/c", we currently pass this to
our show_object_fn callbacks as two components: "a/b/" and
"c". Callbacks which want the full value then call
path_name(), which concatenates the two. But this is an
inefficient interface; the path is a strbuf, and we could
simply append "c" to it temporarily, then roll back the
length, without creating a new copy.
So we could improve this by teaching the callsites of
path_name() this trick (and there are only 3). But we can
also notice that no callback actually cares about the
broken-down representation, and simply pass each callback
the full path "a/b/c" as a string. The callback code becomes
even simpler, then, as we do not have to worry about freeing
an allocated buffer, nor rolling back our modification to
the strbuf.
This is theoretically less efficient, as some callbacks
would not bother to format the final path component. But in
practice this is not measurable. Since we use the same
strbuf over and over, our work to grow it is amortized, and
we really only pay to memcpy a few bytes.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-02-12 01:28:36 +03:00
|
|
|
static void show_object(struct object *obj, const char *name, void *cb_data)
|
2005-06-25 09:56:58 +04:00
|
|
|
{
|
2012-02-14 00:17:11 +04:00
|
|
|
struct rev_list_info *info = cb_data;
|
2017-12-08 18:27:15 +03:00
|
|
|
if (finish_object(obj, name, cb_data))
|
|
|
|
return;
|
2016-07-20 16:28:09 +03:00
|
|
|
display_progress(progress, ++progress_counter);
|
2012-02-28 18:00:00 +04:00
|
|
|
if (info->flags & REV_LIST_QUIET)
|
|
|
|
return;
|
list-objects: pass full pathname to callbacks
When we find a blob at "a/b/c", we currently pass this to
our show_object_fn callbacks as two components: "a/b/" and
"c". Callbacks which want the full value then call
path_name(), which concatenates the two. But this is an
inefficient interface; the path is a strbuf, and we could
simply append "c" to it temporarily, then roll back the
length, without creating a new copy.
So we could improve this by teaching the callsites of
path_name() this trick (and there are only 3). But we can
also notice that no callback actually cares about the
broken-down representation, and simply pass each callback
the full path "a/b/c" as a string. The callback code becomes
even simpler, then, as we do not have to worry about freeing
an allocated buffer, nor rolling back our modification to
the strbuf.
This is theoretically less efficient, as some callbacks
would not bother to format the final path component. But in
practice this is not measurable. Since we use the same
strbuf over and over, our work to grow it is amortized, and
we really only pay to memcpy a few bytes.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-02-12 01:28:36 +03:00
|
|
|
show_object_with_name(stdout, obj, name);
|
2005-06-25 09:56:58 +04:00
|
|
|
}
|
|
|
|
|
2006-09-06 12:42:23 +04:00
|
|
|
static void show_edge(struct commit *commit)
|
|
|
|
{
|
2015-11-10 05:22:28 +03:00
|
|
|
printf("-%s\n", oid_to_hex(&commit->object.oid));
|
2006-09-06 12:42:23 +04:00
|
|
|
}
|
|
|
|
|
2009-04-21 09:54:10 +04:00
|
|
|
static void print_var_str(const char *var, const char *val)
|
2009-04-19 13:55:43 +04:00
|
|
|
{
|
2009-04-21 09:54:10 +04:00
|
|
|
printf("%s='%s'\n", var, val);
|
2009-04-19 13:55:43 +04:00
|
|
|
}
|
|
|
|
|
2009-04-21 09:54:10 +04:00
|
|
|
static void print_var_int(const char *var, int val)
|
2009-04-19 13:55:43 +04:00
|
|
|
{
|
2009-04-21 09:54:10 +04:00
|
|
|
printf("%s=%d\n", var, val);
|
2009-04-19 13:55:43 +04:00
|
|
|
}
|
|
|
|
|
2010-01-12 09:21:18 +03:00
|
|
|
static int show_bisect_vars(struct rev_list_info *info, int reaches, int all)
|
2009-03-26 07:55:30 +03:00
|
|
|
{
|
2012-02-28 18:00:00 +04:00
|
|
|
int cnt, flags = info->flags;
|
2017-03-26 19:01:24 +03:00
|
|
|
char hex[GIT_MAX_HEXSZ + 1] = "";
|
2009-03-26 07:55:49 +03:00
|
|
|
struct commit_list *tried;
|
2009-04-07 00:28:00 +04:00
|
|
|
struct rev_info *revs = info->revs;
|
2009-03-26 07:55:30 +03:00
|
|
|
|
2012-02-28 17:59:59 +04:00
|
|
|
if (!revs->commits)
|
2009-03-26 07:55:30 +03:00
|
|
|
return 1;
|
|
|
|
|
2009-06-06 08:41:33 +04:00
|
|
|
revs->commits = filter_skipped(revs->commits, &tried,
|
|
|
|
flags & BISECT_SHOW_ALL,
|
|
|
|
NULL, NULL);
|
2009-03-26 07:55:49 +03:00
|
|
|
|
2009-03-26 07:55:30 +03:00
|
|
|
/*
|
2009-03-26 07:55:41 +03:00
|
|
|
* revs->commits can reach "reaches" commits among
|
2009-03-26 07:55:30 +03:00
|
|
|
* "all" commits. If it is good, then there are
|
|
|
|
* (all-reaches) commits left to be bisected.
|
|
|
|
* On the other hand, if it is bad, then the set
|
|
|
|
* to bisect is "reaches".
|
|
|
|
* A bisect set of size N has (N-1) commits further
|
|
|
|
* to test, as we already know one bad one.
|
|
|
|
*/
|
|
|
|
cnt = all - reaches;
|
|
|
|
if (cnt < reaches)
|
|
|
|
cnt = reaches;
|
2009-03-26 07:55:35 +03:00
|
|
|
|
2009-03-26 07:55:49 +03:00
|
|
|
if (revs->commits)
|
2017-01-29 01:03:03 +03:00
|
|
|
oid_to_hex_r(hex, &revs->commits->item->object.oid);
|
2009-03-26 07:55:30 +03:00
|
|
|
|
2009-03-29 13:55:43 +04:00
|
|
|
if (flags & BISECT_SHOW_ALL) {
|
2009-04-07 00:28:00 +04:00
|
|
|
traverse_commit_list(revs, show_commit, show_object, info);
|
2009-03-26 07:55:30 +03:00
|
|
|
printf("------\n");
|
|
|
|
}
|
|
|
|
|
2009-04-21 09:54:10 +04:00
|
|
|
print_var_str("bisect_rev", hex);
|
|
|
|
print_var_int("bisect_nr", cnt - 1);
|
|
|
|
print_var_int("bisect_good", all - reaches - 1);
|
|
|
|
print_var_int("bisect_bad", reaches - 1);
|
|
|
|
print_var_int("bisect_all", all);
|
|
|
|
print_var_int("bisect_steps", estimate_bisect_steps(all));
|
2009-03-26 07:55:30 +03:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
rev-list: add bitmap mode to speed up object lists
The bitmap reachability index used to speed up the counting objects
phase during `pack-objects` can also be used to optimize a normal
rev-list if the only thing required are the SHA1s of the objects during
the list (i.e., not the path names at which trees and blobs were found).
Calling `git rev-list --objects --use-bitmap-index [committish]` will
perform an object iteration based on a bitmap result instead of actually
walking the object graph.
These are some example timings for `torvalds/linux` (warm cache,
best-of-five):
$ time git rev-list --objects master > /dev/null
real 0m34.191s
user 0m33.904s
sys 0m0.268s
$ time git rev-list --objects --use-bitmap-index master > /dev/null
real 0m1.041s
user 0m0.976s
sys 0m0.064s
Likewise, using `git rev-list --count --use-bitmap-index` will speed up
the counting operation by building the resulting bitmap and performing a
fast popcount (number of bits set on the bitmap) on the result.
Here are some sample timings of different ways to count commits in
`torvalds/linux`:
$ time git rev-list master | wc -l
399882
real 0m6.524s
user 0m6.060s
sys 0m3.284s
$ time git rev-list --count master
399882
real 0m4.318s
user 0m4.236s
sys 0m0.076s
$ time git rev-list --use-bitmap-index --count master
399882
real 0m0.217s
user 0m0.176s
sys 0m0.040s
This also respects negative refs, so you can use it to count
a slice of history:
$ time git rev-list --count v3.0..master
144843
real 0m1.971s
user 0m1.932s
sys 0m0.036s
$ time git rev-list --use-bitmap-index --count v3.0..master
real 0m0.280s
user 0m0.220s
sys 0m0.056s
Though note that the closer the endpoints, the less it helps. In the
traversal case, we have fewer commits to cross, so we take less time.
But the bitmap time is dominated by generating the pack revindex, which
is constant with respect to the refs given.
Note that you cannot yet get a fast --left-right count of a symmetric
difference (e.g., "--count --left-right master...topic"). The slow part
of that walk actually happens during the merge-base determination when
we parse "master...topic". Even though a count does not actually need to
know the real merge base (it only needs to take the symmetric difference
of the bitmaps), the revision code would require some refactoring to
handle this case.
Additionally, a `--test-bitmap` flag has been added that will perform
the same rev-list manually (i.e. using a normal revwalk) and using
bitmaps, and verify that the results are the same. This can be used to
exercise the bitmap code, and also to verify that the contents of the
.bitmap file are sane.
Signed-off-by: Vicent Marti <tanoku@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-21 18:00:12 +04:00
|
|
|
static int show_object_fast(
|
2017-10-16 01:07:00 +03:00
|
|
|
const struct object_id *oid,
|
rev-list: add bitmap mode to speed up object lists
The bitmap reachability index used to speed up the counting objects
phase during `pack-objects` can also be used to optimize a normal
rev-list if the only thing required are the SHA1s of the objects during
the list (i.e., not the path names at which trees and blobs were found).
Calling `git rev-list --objects --use-bitmap-index [committish]` will
perform an object iteration based on a bitmap result instead of actually
walking the object graph.
These are some example timings for `torvalds/linux` (warm cache,
best-of-five):
$ time git rev-list --objects master > /dev/null
real 0m34.191s
user 0m33.904s
sys 0m0.268s
$ time git rev-list --objects --use-bitmap-index master > /dev/null
real 0m1.041s
user 0m0.976s
sys 0m0.064s
Likewise, using `git rev-list --count --use-bitmap-index` will speed up
the counting operation by building the resulting bitmap and performing a
fast popcount (number of bits set on the bitmap) on the result.
Here are some sample timings of different ways to count commits in
`torvalds/linux`:
$ time git rev-list master | wc -l
399882
real 0m6.524s
user 0m6.060s
sys 0m3.284s
$ time git rev-list --count master
399882
real 0m4.318s
user 0m4.236s
sys 0m0.076s
$ time git rev-list --use-bitmap-index --count master
399882
real 0m0.217s
user 0m0.176s
sys 0m0.040s
This also respects negative refs, so you can use it to count
a slice of history:
$ time git rev-list --count v3.0..master
144843
real 0m1.971s
user 0m1.932s
sys 0m0.036s
$ time git rev-list --use-bitmap-index --count v3.0..master
real 0m0.280s
user 0m0.220s
sys 0m0.056s
Though note that the closer the endpoints, the less it helps. In the
traversal case, we have fewer commits to cross, so we take less time.
But the bitmap time is dominated by generating the pack revindex, which
is constant with respect to the refs given.
Note that you cannot yet get a fast --left-right count of a symmetric
difference (e.g., "--count --left-right master...topic"). The slow part
of that walk actually happens during the merge-base determination when
we parse "master...topic". Even though a count does not actually need to
know the real merge base (it only needs to take the symmetric difference
of the bitmaps), the revision code would require some refactoring to
handle this case.
Additionally, a `--test-bitmap` flag has been added that will perform
the same rev-list manually (i.e. using a normal revwalk) and using
bitmaps, and verify that the results are the same. This can be used to
exercise the bitmap code, and also to verify that the contents of the
.bitmap file are sane.
Signed-off-by: Vicent Marti <tanoku@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-21 18:00:12 +04:00
|
|
|
enum object_type type,
|
|
|
|
int exclude,
|
|
|
|
uint32_t name_hash,
|
|
|
|
struct packed_git *found_pack,
|
|
|
|
off_t found_offset)
|
|
|
|
{
|
2017-10-16 01:07:00 +03:00
|
|
|
fprintf(stdout, "%s\n", oid_to_hex(oid));
|
rev-list: add bitmap mode to speed up object lists
The bitmap reachability index used to speed up the counting objects
phase during `pack-objects` can also be used to optimize a normal
rev-list if the only thing required are the SHA1s of the objects during
the list (i.e., not the path names at which trees and blobs were found).
Calling `git rev-list --objects --use-bitmap-index [committish]` will
perform an object iteration based on a bitmap result instead of actually
walking the object graph.
These are some example timings for `torvalds/linux` (warm cache,
best-of-five):
$ time git rev-list --objects master > /dev/null
real 0m34.191s
user 0m33.904s
sys 0m0.268s
$ time git rev-list --objects --use-bitmap-index master > /dev/null
real 0m1.041s
user 0m0.976s
sys 0m0.064s
Likewise, using `git rev-list --count --use-bitmap-index` will speed up
the counting operation by building the resulting bitmap and performing a
fast popcount (number of bits set on the bitmap) on the result.
Here are some sample timings of different ways to count commits in
`torvalds/linux`:
$ time git rev-list master | wc -l
399882
real 0m6.524s
user 0m6.060s
sys 0m3.284s
$ time git rev-list --count master
399882
real 0m4.318s
user 0m4.236s
sys 0m0.076s
$ time git rev-list --use-bitmap-index --count master
399882
real 0m0.217s
user 0m0.176s
sys 0m0.040s
This also respects negative refs, so you can use it to count
a slice of history:
$ time git rev-list --count v3.0..master
144843
real 0m1.971s
user 0m1.932s
sys 0m0.036s
$ time git rev-list --use-bitmap-index --count v3.0..master
real 0m0.280s
user 0m0.220s
sys 0m0.056s
Though note that the closer the endpoints, the less it helps. In the
traversal case, we have fewer commits to cross, so we take less time.
But the bitmap time is dominated by generating the pack revindex, which
is constant with respect to the refs given.
Note that you cannot yet get a fast --left-right count of a symmetric
difference (e.g., "--count --left-right master...topic"). The slow part
of that walk actually happens during the merge-base determination when
we parse "master...topic". Even though a count does not actually need to
know the real merge base (it only needs to take the symmetric difference
of the bitmaps), the revision code would require some refactoring to
handle this case.
Additionally, a `--test-bitmap` flag has been added that will perform
the same rev-list manually (i.e. using a normal revwalk) and using
bitmaps, and verify that the results are the same. This can be used to
exercise the bitmap code, and also to verify that the contents of the
.bitmap file are sane.
Signed-off-by: Vicent Marti <tanoku@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-21 18:00:12 +04:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2017-11-21 23:58:51 +03:00
|
|
|
static inline int parse_missing_action_value(const char *value)
|
|
|
|
{
|
|
|
|
if (!strcmp(value, "error")) {
|
|
|
|
arg_missing_action = MA_ERROR;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!strcmp(value, "allow-any")) {
|
|
|
|
arg_missing_action = MA_ALLOW_ANY;
|
2017-12-08 18:27:15 +03:00
|
|
|
fetch_if_missing = 0;
|
2017-11-21 23:58:51 +03:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!strcmp(value, "print")) {
|
|
|
|
arg_missing_action = MA_PRINT;
|
2017-12-08 18:27:15 +03:00
|
|
|
fetch_if_missing = 0;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!strcmp(value, "allow-promisor")) {
|
|
|
|
arg_missing_action = MA_ALLOW_PROMISOR;
|
|
|
|
fetch_if_missing = 0;
|
2017-11-21 23:58:51 +03:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2006-07-29 09:44:25 +04:00
|
|
|
int cmd_rev_list(int argc, const char **argv, const char *prefix)
|
2005-04-24 06:04:40 +04:00
|
|
|
{
|
2009-04-06 23:28:36 +04:00
|
|
|
struct rev_info revs;
|
2009-04-07 00:28:00 +04:00
|
|
|
struct rev_list_info info;
|
2018-12-04 01:10:19 +03:00
|
|
|
struct setup_revision_opt s_r_opt = {
|
|
|
|
.allow_exclude_promisor_objects = 1,
|
|
|
|
};
|
2006-02-27 19:54:36 +03:00
|
|
|
int i;
|
2009-03-26 07:55:17 +03:00
|
|
|
int bisect_list = 0;
|
2007-03-22 08:15:54 +03:00
|
|
|
int bisect_show_vars = 0;
|
2007-10-22 09:47:56 +04:00
|
|
|
int bisect_find_all = 0;
|
rev-list: add bitmap mode to speed up object lists
The bitmap reachability index used to speed up the counting objects
phase during `pack-objects` can also be used to optimize a normal
rev-list if the only thing required are the SHA1s of the objects during
the list (i.e., not the path names at which trees and blobs were found).
Calling `git rev-list --objects --use-bitmap-index [committish]` will
perform an object iteration based on a bitmap result instead of actually
walking the object graph.
These are some example timings for `torvalds/linux` (warm cache,
best-of-five):
$ time git rev-list --objects master > /dev/null
real 0m34.191s
user 0m33.904s
sys 0m0.268s
$ time git rev-list --objects --use-bitmap-index master > /dev/null
real 0m1.041s
user 0m0.976s
sys 0m0.064s
Likewise, using `git rev-list --count --use-bitmap-index` will speed up
the counting operation by building the resulting bitmap and performing a
fast popcount (number of bits set on the bitmap) on the result.
Here are some sample timings of different ways to count commits in
`torvalds/linux`:
$ time git rev-list master | wc -l
399882
real 0m6.524s
user 0m6.060s
sys 0m3.284s
$ time git rev-list --count master
399882
real 0m4.318s
user 0m4.236s
sys 0m0.076s
$ time git rev-list --use-bitmap-index --count master
399882
real 0m0.217s
user 0m0.176s
sys 0m0.040s
This also respects negative refs, so you can use it to count
a slice of history:
$ time git rev-list --count v3.0..master
144843
real 0m1.971s
user 0m1.932s
sys 0m0.036s
$ time git rev-list --use-bitmap-index --count v3.0..master
real 0m0.280s
user 0m0.220s
sys 0m0.056s
Though note that the closer the endpoints, the less it helps. In the
traversal case, we have fewer commits to cross, so we take less time.
But the bitmap time is dominated by generating the pack revindex, which
is constant with respect to the refs given.
Note that you cannot yet get a fast --left-right count of a symmetric
difference (e.g., "--count --left-right master...topic"). The slow part
of that walk actually happens during the merge-base determination when
we parse "master...topic". Even though a count does not actually need to
know the real merge base (it only needs to take the symmetric difference
of the bitmaps), the revision code would require some refactoring to
handle this case.
Additionally, a `--test-bitmap` flag has been added that will perform
the same rev-list manually (i.e. using a normal revwalk) and using
bitmaps, and verify that the results are the same. This can be used to
exercise the bitmap code, and also to verify that the contents of the
.bitmap file are sane.
Signed-off-by: Vicent Marti <tanoku@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-21 18:00:12 +04:00
|
|
|
int use_bitmap_index = 0;
|
2016-07-20 16:28:09 +03:00
|
|
|
const char *show_progress = NULL;
|
2005-04-24 06:04:40 +04:00
|
|
|
|
2017-06-01 07:38:16 +03:00
|
|
|
if (argc == 2 && !strcmp(argv[1], "-h"))
|
|
|
|
usage(rev_list_usage);
|
|
|
|
|
2008-05-14 21:46:53 +04:00
|
|
|
git_config(git_default_config, NULL);
|
2018-09-21 18:57:38 +03:00
|
|
|
repo_init_revisions(the_repository, &revs, prefix);
|
2010-03-22 16:36:30 +03:00
|
|
|
revs.abbrev = DEFAULT_ABBREV;
|
2006-04-16 10:48:27 +04:00
|
|
|
revs.commit_format = CMIT_FMT_UNSPECIFIED;
|
2017-12-08 18:27:15 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Scan the argument list before invoking setup_revisions(), so that we
|
|
|
|
* know if fetch_if_missing needs to be set to 0.
|
|
|
|
*
|
|
|
|
* "--exclude-promisor-objects" acts as a pre-filter on missing objects
|
|
|
|
* by not crossing the boundary from realized objects to promisor
|
|
|
|
* objects.
|
|
|
|
*
|
|
|
|
* Let "--missing" to conditionally set fetch_if_missing.
|
|
|
|
*/
|
|
|
|
for (i = 1; i < argc; i++) {
|
|
|
|
const char *arg = argv[i];
|
|
|
|
if (!strcmp(arg, "--exclude-promisor-objects")) {
|
|
|
|
fetch_if_missing = 0;
|
|
|
|
revs.exclude_promisor_objects = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (i = 1; i < argc; i++) {
|
|
|
|
const char *arg = argv[i];
|
|
|
|
if (skip_prefix(arg, "--missing=", &arg)) {
|
|
|
|
if (revs.exclude_promisor_objects)
|
|
|
|
die(_("cannot combine --exclude-promisor-objects and --missing"));
|
|
|
|
if (parse_missing_action_value(arg))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
rev-list: let traversal die when --missing is not in use
Commit 7c0fe330d5 (rev-list: handle missing tree objects properly,
2018-10-05) taught the traversal machinery used by git-rev-list to
ignore missing trees, so that rev-list could handle them itself.
However, it does so only by checking via oid_object_info_extended() that
the object exists at all. This can miss several classes of errors that
were previously detected by rev-list:
- type mismatches (e.g., we expected a tree but got a blob)
- failure to read the object data (e.g., due to bitrot on disk)
This is especially important because we use "rev-list --objects" as our
connectivity check to admit new objects to the repository, and it will
now miss these cases (though the bitrot one is less important here,
because we'd typically have just hashed and stored the object).
There are a few options to fix this:
1. we could check these properties in rev-list when we do the existence
check. This is probably too expensive in practice (perhaps even for
a type check, but definitely for checking the whole content again,
which implies loading each object into memory twice).
2. teach the traversal machinery to differentiate between a missing
object, and one that could not be loaded as expected. This probably
wouldn't be too hard to detect type mismatches, but detecting bitrot
versus a truly missing object would require deep changes to the
object-loading code.
3. have the traversal machinery communicate the failure to the caller,
so that it can decide how to proceed without re-evaluting the object
itself.
Of those, I think (3) is probably the best path forward. However, this
patch does none of them. In the name of expediently fixing the
regression to a normal "rev-list --objects" that we use for connectivity
checks, this simply restores the pre-7c0fe330d5 behavior of having the
traversal die as soon as it fails to load a tree (when --missing is set
to MA_ERROR, which is the default).
Note that we can't get rid of the object-existence check in
finish_object(), because this also handles blobs (which are not
otherwise checked at all by the traversal code).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-04-10 05:13:23 +03:00
|
|
|
if (arg_missing_action)
|
|
|
|
revs.do_not_die_on_missing_tree = 1;
|
|
|
|
|
2018-12-04 01:10:19 +03:00
|
|
|
argc = setup_revisions(argc, argv, &revs, &s_r_opt);
|
2006-02-26 03:19:46 +03:00
|
|
|
|
2009-04-07 00:28:00 +04:00
|
|
|
memset(&info, 0, sizeof(info));
|
|
|
|
info.revs = &revs;
|
2009-10-27 21:28:07 +03:00
|
|
|
if (revs.bisect)
|
|
|
|
bisect_list = 1;
|
2009-04-07 00:28:00 +04:00
|
|
|
|
2017-10-31 21:19:11 +03:00
|
|
|
if (revs.diffopt.flags.quick)
|
2012-02-28 18:00:00 +04:00
|
|
|
info.flags |= REV_LIST_QUIET;
|
2005-05-06 12:00:11 +04:00
|
|
|
for (i = 1 ; i < argc; i++) {
|
2005-10-21 08:25:09 +04:00
|
|
|
const char *arg = argv[i];
|
2005-05-06 12:00:11 +04:00
|
|
|
|
2005-05-26 05:29:09 +04:00
|
|
|
if (!strcmp(arg, "--header")) {
|
2006-04-16 10:48:27 +04:00
|
|
|
revs.verbose_header = 1;
|
2005-06-01 19:42:22 +04:00
|
|
|
continue;
|
|
|
|
}
|
2006-03-22 11:22:00 +03:00
|
|
|
if (!strcmp(arg, "--timestamp")) {
|
2009-04-07 00:28:00 +04:00
|
|
|
info.show_timestamp = 1;
|
2006-03-22 11:22:00 +03:00
|
|
|
continue;
|
|
|
|
}
|
2005-06-18 09:54:50 +04:00
|
|
|
if (!strcmp(arg, "--bisect")) {
|
|
|
|
bisect_list = 1;
|
|
|
|
continue;
|
|
|
|
}
|
2007-10-22 09:47:56 +04:00
|
|
|
if (!strcmp(arg, "--bisect-all")) {
|
|
|
|
bisect_list = 1;
|
|
|
|
bisect_find_all = 1;
|
2012-02-28 18:00:00 +04:00
|
|
|
info.flags |= BISECT_SHOW_ALL;
|
2009-02-08 17:54:47 +03:00
|
|
|
revs.show_decorations = 1;
|
2007-10-22 09:47:56 +04:00
|
|
|
continue;
|
|
|
|
}
|
2007-03-22 08:15:54 +03:00
|
|
|
if (!strcmp(arg, "--bisect-vars")) {
|
|
|
|
bisect_list = 1;
|
|
|
|
bisect_show_vars = 1;
|
|
|
|
continue;
|
|
|
|
}
|
rev-list: add bitmap mode to speed up object lists
The bitmap reachability index used to speed up the counting objects
phase during `pack-objects` can also be used to optimize a normal
rev-list if the only thing required are the SHA1s of the objects during
the list (i.e., not the path names at which trees and blobs were found).
Calling `git rev-list --objects --use-bitmap-index [committish]` will
perform an object iteration based on a bitmap result instead of actually
walking the object graph.
These are some example timings for `torvalds/linux` (warm cache,
best-of-five):
$ time git rev-list --objects master > /dev/null
real 0m34.191s
user 0m33.904s
sys 0m0.268s
$ time git rev-list --objects --use-bitmap-index master > /dev/null
real 0m1.041s
user 0m0.976s
sys 0m0.064s
Likewise, using `git rev-list --count --use-bitmap-index` will speed up
the counting operation by building the resulting bitmap and performing a
fast popcount (number of bits set on the bitmap) on the result.
Here are some sample timings of different ways to count commits in
`torvalds/linux`:
$ time git rev-list master | wc -l
399882
real 0m6.524s
user 0m6.060s
sys 0m3.284s
$ time git rev-list --count master
399882
real 0m4.318s
user 0m4.236s
sys 0m0.076s
$ time git rev-list --use-bitmap-index --count master
399882
real 0m0.217s
user 0m0.176s
sys 0m0.040s
This also respects negative refs, so you can use it to count
a slice of history:
$ time git rev-list --count v3.0..master
144843
real 0m1.971s
user 0m1.932s
sys 0m0.036s
$ time git rev-list --use-bitmap-index --count v3.0..master
real 0m0.280s
user 0m0.220s
sys 0m0.056s
Though note that the closer the endpoints, the less it helps. In the
traversal case, we have fewer commits to cross, so we take less time.
But the bitmap time is dominated by generating the pack revindex, which
is constant with respect to the refs given.
Note that you cannot yet get a fast --left-right count of a symmetric
difference (e.g., "--count --left-right master...topic"). The slow part
of that walk actually happens during the merge-base determination when
we parse "master...topic". Even though a count does not actually need to
know the real merge base (it only needs to take the symmetric difference
of the bitmaps), the revision code would require some refactoring to
handle this case.
Additionally, a `--test-bitmap` flag has been added that will perform
the same rev-list manually (i.e. using a normal revwalk) and using
bitmaps, and verify that the results are the same. This can be used to
exercise the bitmap code, and also to verify that the contents of the
.bitmap file are sane.
Signed-off-by: Vicent Marti <tanoku@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-21 18:00:12 +04:00
|
|
|
if (!strcmp(arg, "--use-bitmap-index")) {
|
|
|
|
use_bitmap_index = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp(arg, "--test-bitmap")) {
|
|
|
|
test_bitmap_walk(&revs);
|
|
|
|
return 0;
|
|
|
|
}
|
2016-07-20 16:28:09 +03:00
|
|
|
if (skip_prefix(arg, "--progress=", &arg)) {
|
|
|
|
show_progress = arg;
|
|
|
|
continue;
|
|
|
|
}
|
2017-11-21 23:58:51 +03:00
|
|
|
|
|
|
|
if (skip_prefix(arg, ("--" CL_ARG__FILTER "="), &arg)) {
|
|
|
|
parse_list_objects_filter(&filter_options, arg);
|
|
|
|
if (filter_options.choice && !revs.blob_objects)
|
|
|
|
die(_("object filtering requires --objects"));
|
|
|
|
if (filter_options.choice == LOFC_SPARSE_OID &&
|
|
|
|
!filter_options.sparse_oid_value)
|
2019-06-28 01:54:10 +03:00
|
|
|
die(
|
|
|
|
_("invalid sparse value '%s'"),
|
|
|
|
list_objects_filter_spec(
|
|
|
|
&filter_options));
|
2017-11-21 23:58:51 +03:00
|
|
|
continue;
|
|
|
|
}
|
2017-12-05 19:50:14 +03:00
|
|
|
if (!strcmp(arg, ("--no-" CL_ARG__FILTER))) {
|
2017-12-08 18:58:50 +03:00
|
|
|
list_objects_filter_set_no_filter(&filter_options);
|
2017-12-05 19:50:14 +03:00
|
|
|
continue;
|
|
|
|
}
|
2017-11-21 23:58:51 +03:00
|
|
|
if (!strcmp(arg, "--filter-print-omitted")) {
|
|
|
|
arg_print_omitted = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2017-12-08 18:27:15 +03:00
|
|
|
if (!strcmp(arg, "--exclude-promisor-objects"))
|
|
|
|
continue; /* already handled above */
|
|
|
|
if (skip_prefix(arg, "--missing=", &arg))
|
|
|
|
continue; /* already handled above */
|
2017-11-21 23:58:51 +03:00
|
|
|
|
2006-02-26 03:19:46 +03:00
|
|
|
usage(rev_list_usage);
|
2005-05-26 05:29:09 +04:00
|
|
|
|
2005-05-06 12:00:11 +04:00
|
|
|
}
|
2006-04-16 10:48:27 +04:00
|
|
|
if (revs.commit_format != CMIT_FMT_UNSPECIFIED) {
|
|
|
|
/* The command line has a --pretty */
|
2009-04-07 00:28:00 +04:00
|
|
|
info.hdr_termination = '\n';
|
2006-04-16 10:48:27 +04:00
|
|
|
if (revs.commit_format == CMIT_FMT_ONELINE)
|
2009-04-07 00:28:00 +04:00
|
|
|
info.header_prefix = "";
|
2006-04-16 10:48:27 +04:00
|
|
|
else
|
2009-04-07 00:28:00 +04:00
|
|
|
info.header_prefix = "commit ";
|
2006-04-16 10:48:27 +04:00
|
|
|
}
|
2006-04-17 23:42:36 +04:00
|
|
|
else if (revs.verbose_header)
|
|
|
|
/* Only --header was specified */
|
|
|
|
revs.commit_format = CMIT_FMT_RAW;
|
2005-05-06 12:00:11 +04:00
|
|
|
|
2017-07-07 12:08:30 +03:00
|
|
|
if ((!revs.commits && reflog_walk_empty(revs.reflog_info) &&
|
2013-10-16 21:26:39 +04:00
|
|
|
(!(revs.tag_objects || revs.tree_objects || revs.blob_objects) &&
|
2017-08-03 01:26:06 +03:00
|
|
|
!revs.pending.nr) &&
|
rev-list: make empty --stdin not an error
When we originally did the series that contains 7ba826290a
(revision: add rev_input_given flag, 2017-08-02) the intent
was that "git rev-list --stdin </dev/null" would similarly
become a successful noop. However, an attempt at the time to
do that did not work[1]. The problem is that rev_input_given
serves two roles:
- it tells rev-list.c that it should not error out
- it tells revision.c that it should not have the "default"
ref kick (e.g., "HEAD" in "git log")
We want to trigger the former, but not the latter. This is
technically possible with a single flag, if we set the flag
only after revision.c's revs->def check. But this introduces
a rather subtle ordering dependency.
Instead, let's keep two flags: one to denote when we got
actual input (which triggers both roles) and one for when we
read stdin (which triggers only the first).
This does mean a caller interested in the first role has to
check both flags, but there's only one such caller. And any
future callers might want to make the distinction anyway
(e.g., if they care less about erroring out, and more about
whether revision.c soaked up our stdin).
In fact, we already keep such a flag internally in
revision.c for this purpose, so this is really just exposing
that to the caller (and the old function-local flag can go
away in favor of our new one).
[1] https://public-inbox.org/git/20170802223416.gwiezhbuxbdmbjzx@sigill.intra.peff.net/
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-23 00:37:23 +03:00
|
|
|
!revs.rev_input_given && !revs.read_from_stdin) ||
|
2006-04-15 09:43:34 +04:00
|
|
|
revs.diff)
|
2005-10-26 02:24:55 +04:00
|
|
|
usage(rev_list_usage);
|
|
|
|
|
2015-08-23 20:56:40 +03:00
|
|
|
if (revs.show_notes)
|
|
|
|
die(_("rev-list does not support display of notes"));
|
|
|
|
|
2017-11-21 23:58:51 +03:00
|
|
|
if (filter_options.choice && use_bitmap_index)
|
|
|
|
die(_("cannot combine --use-bitmap-index with object filtering"));
|
|
|
|
|
"log --author=me --grep=it" should find intersection, not union
Historically, any grep filter in "git log" family of commands were taken
as restricting to commits with any of the words in the commit log message.
However, the user almost always want to find commits "done by this person
on that topic". With "--all-match" option, a series of grep patterns can
be turned into a requirement that all of them must produce a match, but
that makes it impossible to ask for "done by me, on either this or that"
with:
log --author=me --committer=him --grep=this --grep=that
because it will require both "this" and "that" to appear.
Change the "header" parser of grep library to treat the headers specially,
and parse it as:
(all-match-OR (HEADER-AUTHOR me)
(HEADER-COMMITTER him)
(OR
(PATTERN this)
(PATTERN that) ) )
Even though the "log" command line parser doesn't give direct access to
the extended grep syntax to group terms with parentheses, this change will
cover the majority of the case the users would want.
This incidentally revealed that one test in t7002 was bogus. It ran:
log --author=Thor --grep=Thu --format='%s'
and expected (wrongly) "Thu" to match "Thursday" in the author/committer
date, but that would never match, as the timestamp in raw commit buffer
does not have the name of the day-of-the-week.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-18 07:09:06 +03:00
|
|
|
save_commit_buffer = (revs.verbose_header ||
|
|
|
|
revs.grep_filter.pattern_list ||
|
|
|
|
revs.grep_filter.header_list);
|
rev-list --bisect: limit list before bisecting.
I noticed bisect does not work well without both good and bad.
Running this script in git.git repository would give you quite
different results:
#!/bin/sh
initial=e83c5163316f89bfbde7d9ab23ca2e25604af290
mid0=`git rev-list --bisect ^$initial --all`
git rev-list $mid0 | wc -l
git rev-list ^$mid0 --all | wc -l
mid1=`git rev-list --bisect --all`
git rev-list $mid1 | wc -l
git rev-list ^$mid1 --all | wc -l
The $initial commit is the very first commit you made. The
first midpoint bisects things evenly as designed, but the latter
does not.
The reason I got interested in this was because I was wondering
if something like the following would help people converting a
huge repository from foreign SCM, or preparing a repository to
be fetched over plain dumb HTTP only:
#!/bin/sh
N=4
P=.git/objects/pack
bottom=
while test 0 \< $N
do
N=$((N-1))
if test -z "$bottom"
then
newbottom=`git rev-list --bisect --all`
else
newbottom=`git rev-list --bisect ^$bottom --all`
fi
if test -z "$bottom"
then
rev_list="$newbottom"
elif test 0 = $N
then
rev_list="^$bottom --all"
else
rev_list="^$bottom $newbottom"
fi
p=$(git rev-list --unpacked --objects $rev_list |
git pack-objects $P/pack)
git show-index <$P/pack-$p.idx | wc -l
bottom=$newbottom
done
The idea is to pack older half of the history to one pack, then
older half of the remaining history to another, to continue a
few times, using finer granularity as we get closer to the tip.
This may not matter, since for a truly huge history, running
bisect number of times could be quite time consuming, and we
might be better off running "git rev-list --all" once into a
temporary file, and manually pick cut-off points from the
resulting list of commits. After all we are talking about
"approximately half" for such an usage, and older history does
not matter much.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-15 02:57:32 +04:00
|
|
|
if (bisect_list)
|
|
|
|
revs.limited = 1;
|
2006-03-29 05:28:04 +04:00
|
|
|
|
2016-07-20 16:28:09 +03:00
|
|
|
if (show_progress)
|
progress: simplify "delayed" progress API
We used to expose the full power of the delayed progress API to the
callers, so that they can specify, not just the message to show and
expected total amount of work that is used to compute the percentage
of work performed so far, the percent-threshold parameter P and the
delay-seconds parameter N. The progress meter starts to show at N
seconds into the operation only if we have not yet completed P per-cent
of the total work.
Most callers used either (0%, 2s) or (50%, 1s) as (P, N), but there
are oddballs that chose more random-looking values like 95%.
For a smoother workload, (50%, 1s) would allow us to start showing
the progress meter earlier than (0%, 2s), while keeping the chance
of not showing progress meter for long running operation the same as
the latter. For a task that would take 2s or more to complete, it
is likely that less than half of it would complete within the first
second, if the workload is smooth. But for a spiky workload whose
earlier part is easier, such a setting is likely to fail to show the
progress meter entirely and (0%, 2s) is more appropriate.
But that is merely a theory. Realistically, it is of dubious value
to ask each codepath to carefully consider smoothness of their
workload and specify their own setting by passing two extra
parameters. Let's simplify the API by dropping both parameters and
have everybody use (0%, 2s).
Oh, by the way, the percent-threshold parameter and the structure
member were consistently misspelled, which also is now fixed ;-)
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-08-19 20:39:41 +03:00
|
|
|
progress = start_delayed_progress(show_progress, 0);
|
2016-07-20 16:28:09 +03:00
|
|
|
|
2015-07-01 21:42:17 +03:00
|
|
|
if (use_bitmap_index && !revs.prune) {
|
rev-list: add bitmap mode to speed up object lists
The bitmap reachability index used to speed up the counting objects
phase during `pack-objects` can also be used to optimize a normal
rev-list if the only thing required are the SHA1s of the objects during
the list (i.e., not the path names at which trees and blobs were found).
Calling `git rev-list --objects --use-bitmap-index [committish]` will
perform an object iteration based on a bitmap result instead of actually
walking the object graph.
These are some example timings for `torvalds/linux` (warm cache,
best-of-five):
$ time git rev-list --objects master > /dev/null
real 0m34.191s
user 0m33.904s
sys 0m0.268s
$ time git rev-list --objects --use-bitmap-index master > /dev/null
real 0m1.041s
user 0m0.976s
sys 0m0.064s
Likewise, using `git rev-list --count --use-bitmap-index` will speed up
the counting operation by building the resulting bitmap and performing a
fast popcount (number of bits set on the bitmap) on the result.
Here are some sample timings of different ways to count commits in
`torvalds/linux`:
$ time git rev-list master | wc -l
399882
real 0m6.524s
user 0m6.060s
sys 0m3.284s
$ time git rev-list --count master
399882
real 0m4.318s
user 0m4.236s
sys 0m0.076s
$ time git rev-list --use-bitmap-index --count master
399882
real 0m0.217s
user 0m0.176s
sys 0m0.040s
This also respects negative refs, so you can use it to count
a slice of history:
$ time git rev-list --count v3.0..master
144843
real 0m1.971s
user 0m1.932s
sys 0m0.036s
$ time git rev-list --use-bitmap-index --count v3.0..master
real 0m0.280s
user 0m0.220s
sys 0m0.056s
Though note that the closer the endpoints, the less it helps. In the
traversal case, we have fewer commits to cross, so we take less time.
But the bitmap time is dominated by generating the pack revindex, which
is constant with respect to the refs given.
Note that you cannot yet get a fast --left-right count of a symmetric
difference (e.g., "--count --left-right master...topic"). The slow part
of that walk actually happens during the merge-base determination when
we parse "master...topic". Even though a count does not actually need to
know the real merge base (it only needs to take the symmetric difference
of the bitmaps), the revision code would require some refactoring to
handle this case.
Additionally, a `--test-bitmap` flag has been added that will perform
the same rev-list manually (i.e. using a normal revwalk) and using
bitmaps, and verify that the results are the same. This can be used to
exercise the bitmap code, and also to verify that the contents of the
.bitmap file are sane.
Signed-off-by: Vicent Marti <tanoku@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-21 18:00:12 +04:00
|
|
|
if (revs.count && !revs.left_right && !revs.cherry_mark) {
|
|
|
|
uint32_t commit_count;
|
rev-list: "adjust" results of "--count --use-bitmap-index -n"
If you ask rev-list for:
git rev-list --count --use-bitmap-index HEAD
we optimize out the actual traversal and just give you the
number of bits set in the commit bitmap. This is faster,
which is good.
But if you ask to limit the size of the traversal, like:
git rev-list --count --use-bitmap-index -n 100 HEAD
we'll still output the full bitmapped number we found. On
the surface, that might even seem OK. You explicitly asked
to use the bitmap index, and it was cheap to compute the
real answer, so we gave it to you.
But there's something much more complicated going on under
the hood. If we don't have a bitmap directly for HEAD, then
we have to actually traverse backwards, looking for a
bitmapped commit. And _that_ traversal is bounded by our
`-n` count.
This is a good thing, because it bounds the work we have to
do, which is probably what the user wanted by asking for
`-n`. But now it makes the output quite confusing. You might
get many values:
- your `-n` value, if we walked back and never found a
bitmap (or fewer if there weren't that many commits)
- the actual full count, if we found a bitmap root for
every path of our traversal with in the `-n` limit
- any number in between! We might have walked back and
found _some_ bitmaps, but then cut off the traversal
early with some commits not accounted for in the result.
So you cannot even see a value higher than your `-n` and say
"OK, bitmaps kicked in, this must be the real full count".
The only sane thing is for git to just clamp the value to a
maximum of the `-n` value, which means we should output the
exact same results whether bitmaps are in use or not.
The test in t5310 demonstrates this by using `-n 1`.
Without this patch we fail in the full-bitmap case (where we
do not have to traverse at all) but _not_ in the
partial-bitmap case (where we have to walk down to find an
actual bitmap). With this patch, both cases just work.
I didn't implement the crazy in-between case, just because
it's complicated to set up, and is really a subset of the
full-count case, which we do cover.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-03 10:07:34 +03:00
|
|
|
int max_count = revs.max_count;
|
2018-06-07 22:04:13 +03:00
|
|
|
struct bitmap_index *bitmap_git;
|
|
|
|
if ((bitmap_git = prepare_bitmap_walk(&revs))) {
|
|
|
|
count_bitmap_commit_list(bitmap_git, &commit_count, NULL, NULL, NULL);
|
rev-list: "adjust" results of "--count --use-bitmap-index -n"
If you ask rev-list for:
git rev-list --count --use-bitmap-index HEAD
we optimize out the actual traversal and just give you the
number of bits set in the commit bitmap. This is faster,
which is good.
But if you ask to limit the size of the traversal, like:
git rev-list --count --use-bitmap-index -n 100 HEAD
we'll still output the full bitmapped number we found. On
the surface, that might even seem OK. You explicitly asked
to use the bitmap index, and it was cheap to compute the
real answer, so we gave it to you.
But there's something much more complicated going on under
the hood. If we don't have a bitmap directly for HEAD, then
we have to actually traverse backwards, looking for a
bitmapped commit. And _that_ traversal is bounded by our
`-n` count.
This is a good thing, because it bounds the work we have to
do, which is probably what the user wanted by asking for
`-n`. But now it makes the output quite confusing. You might
get many values:
- your `-n` value, if we walked back and never found a
bitmap (or fewer if there weren't that many commits)
- the actual full count, if we found a bitmap root for
every path of our traversal with in the `-n` limit
- any number in between! We might have walked back and
found _some_ bitmaps, but then cut off the traversal
early with some commits not accounted for in the result.
So you cannot even see a value higher than your `-n` and say
"OK, bitmaps kicked in, this must be the real full count".
The only sane thing is for git to just clamp the value to a
maximum of the `-n` value, which means we should output the
exact same results whether bitmaps are in use or not.
The test in t5310 demonstrates this by using `-n 1`.
Without this patch we fail in the full-bitmap case (where we
do not have to traverse at all) but _not_ in the
partial-bitmap case (where we have to walk down to find an
actual bitmap). With this patch, both cases just work.
I didn't implement the crazy in-between case, just because
it's complicated to set up, and is really a subset of the
full-count case, which we do cover.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-03 10:07:34 +03:00
|
|
|
if (max_count >= 0 && max_count < commit_count)
|
|
|
|
commit_count = max_count;
|
rev-list: add bitmap mode to speed up object lists
The bitmap reachability index used to speed up the counting objects
phase during `pack-objects` can also be used to optimize a normal
rev-list if the only thing required are the SHA1s of the objects during
the list (i.e., not the path names at which trees and blobs were found).
Calling `git rev-list --objects --use-bitmap-index [committish]` will
perform an object iteration based on a bitmap result instead of actually
walking the object graph.
These are some example timings for `torvalds/linux` (warm cache,
best-of-five):
$ time git rev-list --objects master > /dev/null
real 0m34.191s
user 0m33.904s
sys 0m0.268s
$ time git rev-list --objects --use-bitmap-index master > /dev/null
real 0m1.041s
user 0m0.976s
sys 0m0.064s
Likewise, using `git rev-list --count --use-bitmap-index` will speed up
the counting operation by building the resulting bitmap and performing a
fast popcount (number of bits set on the bitmap) on the result.
Here are some sample timings of different ways to count commits in
`torvalds/linux`:
$ time git rev-list master | wc -l
399882
real 0m6.524s
user 0m6.060s
sys 0m3.284s
$ time git rev-list --count master
399882
real 0m4.318s
user 0m4.236s
sys 0m0.076s
$ time git rev-list --use-bitmap-index --count master
399882
real 0m0.217s
user 0m0.176s
sys 0m0.040s
This also respects negative refs, so you can use it to count
a slice of history:
$ time git rev-list --count v3.0..master
144843
real 0m1.971s
user 0m1.932s
sys 0m0.036s
$ time git rev-list --use-bitmap-index --count v3.0..master
real 0m0.280s
user 0m0.220s
sys 0m0.056s
Though note that the closer the endpoints, the less it helps. In the
traversal case, we have fewer commits to cross, so we take less time.
But the bitmap time is dominated by generating the pack revindex, which
is constant with respect to the refs given.
Note that you cannot yet get a fast --left-right count of a symmetric
difference (e.g., "--count --left-right master...topic"). The slow part
of that walk actually happens during the merge-base determination when
we parse "master...topic". Even though a count does not actually need to
know the real merge base (it only needs to take the symmetric difference
of the bitmaps), the revision code would require some refactoring to
handle this case.
Additionally, a `--test-bitmap` flag has been added that will perform
the same rev-list manually (i.e. using a normal revwalk) and using
bitmaps, and verify that the results are the same. This can be used to
exercise the bitmap code, and also to verify that the contents of the
.bitmap file are sane.
Signed-off-by: Vicent Marti <tanoku@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-21 18:00:12 +04:00
|
|
|
printf("%d\n", commit_count);
|
2018-06-07 22:04:14 +03:00
|
|
|
free_bitmap_index(bitmap_git);
|
rev-list: add bitmap mode to speed up object lists
The bitmap reachability index used to speed up the counting objects
phase during `pack-objects` can also be used to optimize a normal
rev-list if the only thing required are the SHA1s of the objects during
the list (i.e., not the path names at which trees and blobs were found).
Calling `git rev-list --objects --use-bitmap-index [committish]` will
perform an object iteration based on a bitmap result instead of actually
walking the object graph.
These are some example timings for `torvalds/linux` (warm cache,
best-of-five):
$ time git rev-list --objects master > /dev/null
real 0m34.191s
user 0m33.904s
sys 0m0.268s
$ time git rev-list --objects --use-bitmap-index master > /dev/null
real 0m1.041s
user 0m0.976s
sys 0m0.064s
Likewise, using `git rev-list --count --use-bitmap-index` will speed up
the counting operation by building the resulting bitmap and performing a
fast popcount (number of bits set on the bitmap) on the result.
Here are some sample timings of different ways to count commits in
`torvalds/linux`:
$ time git rev-list master | wc -l
399882
real 0m6.524s
user 0m6.060s
sys 0m3.284s
$ time git rev-list --count master
399882
real 0m4.318s
user 0m4.236s
sys 0m0.076s
$ time git rev-list --use-bitmap-index --count master
399882
real 0m0.217s
user 0m0.176s
sys 0m0.040s
This also respects negative refs, so you can use it to count
a slice of history:
$ time git rev-list --count v3.0..master
144843
real 0m1.971s
user 0m1.932s
sys 0m0.036s
$ time git rev-list --use-bitmap-index --count v3.0..master
real 0m0.280s
user 0m0.220s
sys 0m0.056s
Though note that the closer the endpoints, the less it helps. In the
traversal case, we have fewer commits to cross, so we take less time.
But the bitmap time is dominated by generating the pack revindex, which
is constant with respect to the refs given.
Note that you cannot yet get a fast --left-right count of a symmetric
difference (e.g., "--count --left-right master...topic"). The slow part
of that walk actually happens during the merge-base determination when
we parse "master...topic". Even though a count does not actually need to
know the real merge base (it only needs to take the symmetric difference
of the bitmaps), the revision code would require some refactoring to
handle this case.
Additionally, a `--test-bitmap` flag has been added that will perform
the same rev-list manually (i.e. using a normal revwalk) and using
bitmaps, and verify that the results are the same. This can be used to
exercise the bitmap code, and also to verify that the contents of the
.bitmap file are sane.
Signed-off-by: Vicent Marti <tanoku@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-21 18:00:12 +04:00
|
|
|
return 0;
|
|
|
|
}
|
rev-list: disable bitmaps when "-n" is used with listing objects
You can ask rev-list to use bitmaps to speed up an --objects
traversal, which should generally give you your answers much
faster.
Likewise, you can ask rev-list to limit such a traversal
with `-n`, in which case we'll show only a limited set of
commits (and only the tree and commit objects directly
reachable from those commits).
But if you do both together, the results are nonsensical. We
end up limiting any fallback traversal we do to _find_ the
bitmaps, but the actual set of objects we output will be
picked arbitrarily from the union of any bitmaps we do find,
and will involve the objects of many more commits.
It's possible that somebody might want this as a "show me
what you can, but limit the amount of work you do" flag.
But as with the prior commit clamping "--count", the results
are basically non-deterministic; you'll get the values from
some commits between `n` and the total number, and you can't
tell which.
And unlike the `--count` case, we can't easily generate the
"real" value from the bitmap values (you can't just walk
back `-n` commits and subtract out the reachable objects
from the boundary commits; the bitmaps for `X` record its
total reachability, so you don't know which objects are
directly from `X` itself, which from `X^`, and so on).
So let's just fallback to the non-bitmap code path in this
case, so we always give a sane answer.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-03 10:08:05 +03:00
|
|
|
} else if (revs.max_count < 0 &&
|
|
|
|
revs.tag_objects && revs.tree_objects && revs.blob_objects) {
|
2018-06-07 22:04:13 +03:00
|
|
|
struct bitmap_index *bitmap_git;
|
|
|
|
if ((bitmap_git = prepare_bitmap_walk(&revs))) {
|
|
|
|
traverse_bitmap_commit_list(bitmap_git, &show_object_fast);
|
2018-06-07 22:04:14 +03:00
|
|
|
free_bitmap_index(bitmap_git);
|
rev-list: add bitmap mode to speed up object lists
The bitmap reachability index used to speed up the counting objects
phase during `pack-objects` can also be used to optimize a normal
rev-list if the only thing required are the SHA1s of the objects during
the list (i.e., not the path names at which trees and blobs were found).
Calling `git rev-list --objects --use-bitmap-index [committish]` will
perform an object iteration based on a bitmap result instead of actually
walking the object graph.
These are some example timings for `torvalds/linux` (warm cache,
best-of-five):
$ time git rev-list --objects master > /dev/null
real 0m34.191s
user 0m33.904s
sys 0m0.268s
$ time git rev-list --objects --use-bitmap-index master > /dev/null
real 0m1.041s
user 0m0.976s
sys 0m0.064s
Likewise, using `git rev-list --count --use-bitmap-index` will speed up
the counting operation by building the resulting bitmap and performing a
fast popcount (number of bits set on the bitmap) on the result.
Here are some sample timings of different ways to count commits in
`torvalds/linux`:
$ time git rev-list master | wc -l
399882
real 0m6.524s
user 0m6.060s
sys 0m3.284s
$ time git rev-list --count master
399882
real 0m4.318s
user 0m4.236s
sys 0m0.076s
$ time git rev-list --use-bitmap-index --count master
399882
real 0m0.217s
user 0m0.176s
sys 0m0.040s
This also respects negative refs, so you can use it to count
a slice of history:
$ time git rev-list --count v3.0..master
144843
real 0m1.971s
user 0m1.932s
sys 0m0.036s
$ time git rev-list --use-bitmap-index --count v3.0..master
real 0m0.280s
user 0m0.220s
sys 0m0.056s
Though note that the closer the endpoints, the less it helps. In the
traversal case, we have fewer commits to cross, so we take less time.
But the bitmap time is dominated by generating the pack revindex, which
is constant with respect to the refs given.
Note that you cannot yet get a fast --left-right count of a symmetric
difference (e.g., "--count --left-right master...topic"). The slow part
of that walk actually happens during the merge-base determination when
we parse "master...topic". Even though a count does not actually need to
know the real merge base (it only needs to take the symmetric difference
of the bitmaps), the revision code would require some refactoring to
handle this case.
Additionally, a `--test-bitmap` flag has been added that will perform
the same rev-list manually (i.e. using a normal revwalk) and using
bitmaps, and verify that the results are the same. This can be used to
exercise the bitmap code, and also to verify that the contents of the
.bitmap file are sane.
Signed-off-by: Vicent Marti <tanoku@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-21 18:00:12 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-02-18 10:31:56 +03:00
|
|
|
if (prepare_revision_walk(&revs))
|
|
|
|
die("revision walk setup failed");
|
2006-02-28 22:24:00 +03:00
|
|
|
if (revs.tree_objects)
|
2019-01-16 21:25:58 +03:00
|
|
|
mark_edges_uninteresting(&revs, show_edge, 0);
|
2006-02-28 22:24:00 +03:00
|
|
|
|
2007-03-22 08:15:54 +03:00
|
|
|
if (bisect_list) {
|
-Wuninitialized: remove some 'init-self' workarounds
The 'self-initialised' variables construct (ie <type> var = var;) has
been used to silence gcc '-W[maybe-]uninitialized' warnings. This has,
unfortunately, caused MSVC to issue 'uninitialized variable' warnings.
Also, using clang static analysis causes complaints about an 'Assigned
value is garbage or undefined'.
There are six such constructs in the current codebase. Only one of the
six causes gcc to issue a '-Wmaybe-uninitialized' warning (which will
be addressed elsewhere). The remaining five 'init-self' gcc workarounds
are noted below, along with the commit which introduced them:
1. builtin/rev-list.c: 'reaches' and 'all', see commit 457f08a030
("git-rev-list: add --bisect-vars option.", 2007-03-21).
2. merge-recursive.c:2064 'mrtree', see commit f120ae2a8e ("merge-
recursive.c: mrtree in merge() is not used before set", 2007-10-29).
3. fast-import.c:3023 'oe', see commit 85c62395b1 ("fast-import: let
importers retrieve blobs", 2010-11-28).
4. fast-import.c:3006 'oe', see commit 28c7b1f7b7 ("fast-import: add a
get-mark command", 2015-07-01).
Remove the 'self-initialised' variable constructs noted above.
Signed-off-by: Ramsay Jones <ramsay@ramsayjones.plus.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-03-19 20:54:35 +03:00
|
|
|
int reaches, all;
|
2007-03-22 08:15:54 +03:00
|
|
|
|
2017-11-05 23:24:28 +03:00
|
|
|
find_bisection(&revs.commits, &reaches, &all, bisect_find_all);
|
2009-03-26 07:55:49 +03:00
|
|
|
|
2009-03-26 07:55:30 +03:00
|
|
|
if (bisect_show_vars)
|
2009-04-07 07:08:42 +04:00
|
|
|
return show_bisect_vars(&info, reaches, all);
|
2007-03-22 08:15:54 +03:00
|
|
|
}
|
2005-10-26 02:24:55 +04:00
|
|
|
|
2017-11-21 23:58:51 +03:00
|
|
|
if (arg_print_omitted)
|
|
|
|
oidset_init(&omitted_objects, DEFAULT_OIDSET_SIZE);
|
|
|
|
if (arg_missing_action == MA_PRINT)
|
|
|
|
oidset_init(&missing_objects, DEFAULT_OIDSET_SIZE);
|
|
|
|
|
|
|
|
traverse_commit_list_filtered(
|
|
|
|
&filter_options, &revs, show_commit, show_object, &info,
|
|
|
|
(arg_print_omitted ? &omitted_objects : NULL));
|
|
|
|
|
|
|
|
if (arg_print_omitted) {
|
|
|
|
struct oidset_iter iter;
|
|
|
|
struct object_id *oid;
|
|
|
|
oidset_iter_init(&omitted_objects, &iter);
|
|
|
|
while ((oid = oidset_iter_next(&iter)))
|
|
|
|
printf("~%s\n", oid_to_hex(oid));
|
|
|
|
oidset_clear(&omitted_objects);
|
|
|
|
}
|
|
|
|
if (arg_missing_action == MA_PRINT) {
|
|
|
|
struct oidset_iter iter;
|
|
|
|
struct object_id *oid;
|
|
|
|
oidset_iter_init(&missing_objects, &iter);
|
|
|
|
while ((oid = oidset_iter_next(&iter)))
|
|
|
|
printf("?%s\n", oid_to_hex(oid));
|
|
|
|
oidset_clear(&missing_objects);
|
|
|
|
}
|
2005-05-31 05:46:32 +04:00
|
|
|
|
2016-07-20 16:28:09 +03:00
|
|
|
stop_progress(&progress);
|
|
|
|
|
2010-06-10 15:47:23 +04:00
|
|
|
if (revs.count) {
|
2011-04-26 12:24:29 +04:00
|
|
|
if (revs.left_right && revs.cherry_mark)
|
|
|
|
printf("%d\t%d\t%d\n", revs.count_left, revs.count_right, revs.count_same);
|
|
|
|
else if (revs.left_right)
|
2010-06-10 15:47:23 +04:00
|
|
|
printf("%d\t%d\n", revs.count_left, revs.count_right);
|
2011-04-26 12:24:29 +04:00
|
|
|
else if (revs.cherry_mark)
|
|
|
|
printf("%d\t%d\n", revs.count_left + revs.count_right, revs.count_same);
|
2010-06-10 15:47:23 +04:00
|
|
|
else
|
|
|
|
printf("%d\n", revs.count_left + revs.count_right);
|
|
|
|
}
|
|
|
|
|
2005-04-24 06:04:40 +04:00
|
|
|
return 0;
|
|
|
|
}
|