2006-07-06 21:16:22 +04:00
|
|
|
#include "cache.h"
|
|
|
|
#include "commit.h"
|
|
|
|
#include "diff.h"
|
|
|
|
#include "revision.h"
|
|
|
|
#include "builtin.h"
|
2007-01-06 13:16:17 +03:00
|
|
|
#include "reachable.h"
|
2008-03-23 23:50:29 +03:00
|
|
|
#include "parse-options.h"
|
2011-11-05 16:00:08 +04:00
|
|
|
#include "progress.h"
|
2018-05-16 02:42:15 +03:00
|
|
|
#include "object-store.h"
|
2006-07-06 21:16:22 +04:00
|
|
|
|
2008-03-23 23:50:29 +03:00
|
|
|
static const char * const prune_usage[] = {
|
2017-11-21 18:51:52 +03:00
|
|
|
N_("git prune [-n] [-v] [--progress] [--expire <time>] [--] [<head>...]"),
|
2008-03-23 23:50:29 +03:00
|
|
|
NULL
|
|
|
|
};
|
2006-08-15 21:23:48 +04:00
|
|
|
static int show_only;
|
2008-09-29 20:49:52 +04:00
|
|
|
static int verbose;
|
2017-04-26 22:29:31 +03:00
|
|
|
static timestamp_t expire;
|
2011-11-08 09:34:08 +04:00
|
|
|
static int show_progress = -1;
|
2006-07-06 21:16:22 +04:00
|
|
|
|
2013-12-18 03:22:31 +04:00
|
|
|
static int prune_tmp_file(const char *fullpath)
|
2008-07-25 02:41:12 +04:00
|
|
|
{
|
2010-02-27 06:50:02 +03:00
|
|
|
struct stat st;
|
|
|
|
if (lstat(fullpath, &st))
|
|
|
|
return error("Could not stat '%s'", fullpath);
|
|
|
|
if (st.st_mtime > expire)
|
|
|
|
return 0;
|
2012-08-07 09:01:49 +04:00
|
|
|
if (show_only || verbose)
|
|
|
|
printf("Removing stale temporary file %s\n", fullpath);
|
2008-07-25 02:41:12 +04:00
|
|
|
if (!show_only)
|
2009-04-30 01:22:56 +04:00
|
|
|
unlink_or_warn(fullpath);
|
2008-07-25 02:41:12 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
prune: lazily perform reachability traversal
The general strategy of "git prune" is to do a full reachability walk,
then for each loose object see if we found it in our walk. But if we
don't have any loose objects, we don't need to do the expensive walk in
the first place.
This patch postpones that walk until the first time we need to see its
results.
Note that this is really a specific case of a more general optimization,
which is that we could traverse only far enough to find the object under
consideration (i.e., stop the traversal when we find it, then pick up
again when asked about the next object, etc). That could save us in some
instances from having to do a full walk. But it's actually a bit tricky
to do with our traversal code, and you'd need to do a full walk anyway
if you have even a single unreachable object (which you generally do, if
any objects are actually left after running git-repack).
So in practice this lazy-load of the full walk catches one easy but
common case (i.e., you've just repacked via git-gc, and there's nothing
unreachable).
The perf script is fairly contrived, but it does show off the
improvement:
Test HEAD^ HEAD
-------------------------------------------------------------------------
5304.4: prune with no objects 3.66(3.60+0.05) 0.00(0.00+0.00) -100.0%
and would let us know if we accidentally regress this optimization.
Note also that we need to take special care with prune_shallow(), which
relies on us having performed the traversal. So this optimization can
only kick in for a non-shallow repository. Since this is easy to get
wrong and is not covered by existing tests, let's add an extra test to
t5304 that covers this case explicitly.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-14 07:35:22 +03:00
|
|
|
static void perform_reachability_traversal(struct rev_info *revs)
|
2006-07-06 21:16:22 +04:00
|
|
|
{
|
prune: lazily perform reachability traversal
The general strategy of "git prune" is to do a full reachability walk,
then for each loose object see if we found it in our walk. But if we
don't have any loose objects, we don't need to do the expensive walk in
the first place.
This patch postpones that walk until the first time we need to see its
results.
Note that this is really a specific case of a more general optimization,
which is that we could traverse only far enough to find the object under
consideration (i.e., stop the traversal when we find it, then pick up
again when asked about the next object, etc). That could save us in some
instances from having to do a full walk. But it's actually a bit tricky
to do with our traversal code, and you'd need to do a full walk anyway
if you have even a single unreachable object (which you generally do, if
any objects are actually left after running git-repack).
So in practice this lazy-load of the full walk catches one easy but
common case (i.e., you've just repacked via git-gc, and there's nothing
unreachable).
The perf script is fairly contrived, but it does show off the
improvement:
Test HEAD^ HEAD
-------------------------------------------------------------------------
5304.4: prune with no objects 3.66(3.60+0.05) 0.00(0.00+0.00) -100.0%
and would let us know if we accidentally regress this optimization.
Note also that we need to take special care with prune_shallow(), which
relies on us having performed the traversal. So this optimization can
only kick in for a non-shallow repository. Since this is easy to get
wrong and is not covered by existing tests, let's add an extra test to
t5304 that covers this case explicitly.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-14 07:35:22 +03:00
|
|
|
static int initialized;
|
|
|
|
struct progress *progress = NULL;
|
|
|
|
|
|
|
|
if (initialized)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (show_progress)
|
|
|
|
progress = start_delayed_progress(_("Checking connectivity"), 0);
|
|
|
|
mark_reachable_objects(revs, 1, expire, progress);
|
|
|
|
stop_progress(&progress);
|
|
|
|
initialized = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int is_object_reachable(const struct object_id *oid,
|
|
|
|
struct rev_info *revs)
|
|
|
|
{
|
2019-02-14 07:38:21 +03:00
|
|
|
struct object *obj;
|
|
|
|
|
prune: lazily perform reachability traversal
The general strategy of "git prune" is to do a full reachability walk,
then for each loose object see if we found it in our walk. But if we
don't have any loose objects, we don't need to do the expensive walk in
the first place.
This patch postpones that walk until the first time we need to see its
results.
Note that this is really a specific case of a more general optimization,
which is that we could traverse only far enough to find the object under
consideration (i.e., stop the traversal when we find it, then pick up
again when asked about the next object, etc). That could save us in some
instances from having to do a full walk. But it's actually a bit tricky
to do with our traversal code, and you'd need to do a full walk anyway
if you have even a single unreachable object (which you generally do, if
any objects are actually left after running git-repack).
So in practice this lazy-load of the full walk catches one easy but
common case (i.e., you've just repacked via git-gc, and there's nothing
unreachable).
The perf script is fairly contrived, but it does show off the
improvement:
Test HEAD^ HEAD
-------------------------------------------------------------------------
5304.4: prune with no objects 3.66(3.60+0.05) 0.00(0.00+0.00) -100.0%
and would let us know if we accidentally regress this optimization.
Note also that we need to take special care with prune_shallow(), which
relies on us having performed the traversal. So this optimization can
only kick in for a non-shallow repository. Since this is easy to get
wrong and is not covered by existing tests, let's add an extra test to
t5304 that covers this case explicitly.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-14 07:35:22 +03:00
|
|
|
perform_reachability_traversal(revs);
|
2014-10-16 02:38:55 +04:00
|
|
|
|
2019-06-20 10:41:14 +03:00
|
|
|
obj = lookup_object(the_repository, oid);
|
2019-02-14 07:38:21 +03:00
|
|
|
return obj && (obj->flags & SEEN);
|
prune: lazily perform reachability traversal
The general strategy of "git prune" is to do a full reachability walk,
then for each loose object see if we found it in our walk. But if we
don't have any loose objects, we don't need to do the expensive walk in
the first place.
This patch postpones that walk until the first time we need to see its
results.
Note that this is really a specific case of a more general optimization,
which is that we could traverse only far enough to find the object under
consideration (i.e., stop the traversal when we find it, then pick up
again when asked about the next object, etc). That could save us in some
instances from having to do a full walk. But it's actually a bit tricky
to do with our traversal code, and you'd need to do a full walk anyway
if you have even a single unreachable object (which you generally do, if
any objects are actually left after running git-repack).
So in practice this lazy-load of the full walk catches one easy but
common case (i.e., you've just repacked via git-gc, and there's nothing
unreachable).
The perf script is fairly contrived, but it does show off the
improvement:
Test HEAD^ HEAD
-------------------------------------------------------------------------
5304.4: prune with no objects 3.66(3.60+0.05) 0.00(0.00+0.00) -100.0%
and would let us know if we accidentally regress this optimization.
Note also that we need to take special care with prune_shallow(), which
relies on us having performed the traversal. So this optimization can
only kick in for a non-shallow repository. Since this is easy to get
wrong and is not covered by existing tests, let's add an extra test to
t5304 that covers this case explicitly.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-14 07:35:22 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static int prune_object(const struct object_id *oid, const char *fullpath,
|
|
|
|
void *data)
|
|
|
|
{
|
|
|
|
struct rev_info *revs = data;
|
|
|
|
struct stat st;
|
|
|
|
|
|
|
|
if (is_object_reachable(oid, revs))
|
2014-10-16 02:38:55 +04:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (lstat(fullpath, &st)) {
|
|
|
|
/* report errors, but do not stop pruning */
|
|
|
|
error("Could not stat '%s'", fullpath);
|
|
|
|
return 0;
|
|
|
|
}
|
2010-02-27 06:50:02 +03:00
|
|
|
if (st.st_mtime > expire)
|
|
|
|
return 0;
|
2008-09-29 20:49:52 +04:00
|
|
|
if (show_only || verbose) {
|
2018-04-25 21:20:59 +03:00
|
|
|
enum object_type type = oid_object_info(the_repository, oid,
|
|
|
|
NULL);
|
2017-02-22 02:47:35 +03:00
|
|
|
printf("%s %s\n", oid_to_hex(oid),
|
2018-02-14 21:59:24 +03:00
|
|
|
(type > 0) ? type_name(type) : "unknown");
|
2008-09-29 20:49:52 +04:00
|
|
|
}
|
|
|
|
if (!show_only)
|
2009-04-30 01:22:56 +04:00
|
|
|
unlink_or_warn(fullpath);
|
2006-07-06 21:16:22 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-10-16 02:38:55 +04:00
|
|
|
static int prune_cruft(const char *basename, const char *path, void *data)
|
2006-07-06 21:16:22 +04:00
|
|
|
{
|
2014-10-16 02:38:55 +04:00
|
|
|
if (starts_with(basename, "tmp_obj_"))
|
|
|
|
prune_tmp_file(path);
|
|
|
|
else
|
|
|
|
fprintf(stderr, "bad sha1 file: %s\n", path);
|
2006-07-06 21:16:22 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-06-24 17:09:39 +03:00
|
|
|
static int prune_subdir(unsigned int nr, const char *path, void *data)
|
2006-07-06 21:16:22 +04:00
|
|
|
{
|
2014-10-16 02:38:55 +04:00
|
|
|
if (!show_only)
|
|
|
|
rmdir(path);
|
|
|
|
return 0;
|
2006-07-06 21:16:22 +04:00
|
|
|
}
|
|
|
|
|
2008-02-07 05:55:14 +03:00
|
|
|
/*
|
|
|
|
* Write errors (particularly out of space) can result in
|
|
|
|
* failed temporary packs (and more rarely indexes and other
|
2010-02-04 08:23:18 +03:00
|
|
|
* files beginning with "tmp_") accumulating in the object
|
2008-09-23 03:34:26 +04:00
|
|
|
* and the pack directories.
|
2008-02-07 05:55:14 +03:00
|
|
|
*/
|
2008-09-23 03:34:26 +04:00
|
|
|
static void remove_temporary_files(const char *path)
|
2008-02-07 05:55:14 +03:00
|
|
|
{
|
|
|
|
DIR *dir;
|
|
|
|
struct dirent *de;
|
|
|
|
|
2008-09-23 03:34:26 +04:00
|
|
|
dir = opendir(path);
|
2008-02-07 05:55:14 +03:00
|
|
|
if (!dir) {
|
2008-09-23 03:34:26 +04:00
|
|
|
fprintf(stderr, "Unable to open directory %s\n", path);
|
2008-02-07 05:55:14 +03:00
|
|
|
return;
|
|
|
|
}
|
2008-07-25 02:41:12 +04:00
|
|
|
while ((de = readdir(dir)) != NULL)
|
2013-12-01 00:55:40 +04:00
|
|
|
if (starts_with(de->d_name, "tmp_"))
|
2013-12-18 03:22:31 +04:00
|
|
|
prune_tmp_file(mkpath("%s/%s", path, de->d_name));
|
2008-02-07 05:55:14 +03:00
|
|
|
closedir(dir);
|
|
|
|
}
|
|
|
|
|
2006-07-29 09:44:25 +04:00
|
|
|
int cmd_prune(int argc, const char **argv, const char *prefix)
|
2006-07-06 21:16:22 +04:00
|
|
|
{
|
2007-01-06 13:16:10 +03:00
|
|
|
struct rev_info revs;
|
2017-12-08 18:27:16 +03:00
|
|
|
int exclude_promisor_objects = 0;
|
2008-03-23 23:50:29 +03:00
|
|
|
const struct option options[] = {
|
2012-08-20 16:32:32 +04:00
|
|
|
OPT__DRY_RUN(&show_only, N_("do not remove, show only")),
|
|
|
|
OPT__VERBOSE(&verbose, N_("report pruned objects")),
|
|
|
|
OPT_BOOL(0, "progress", &show_progress, N_("show progress")),
|
2013-04-25 22:13:49 +04:00
|
|
|
OPT_EXPIRY_DATE(0, "expire", &expire,
|
|
|
|
N_("expire objects older than <time>")),
|
2017-12-08 18:27:16 +03:00
|
|
|
OPT_BOOL(0, "exclude-promisor-objects", &exclude_promisor_objects,
|
|
|
|
N_("limit traversal to objects outside promisor packfiles")),
|
2008-03-23 23:50:29 +03:00
|
|
|
OPT_END()
|
|
|
|
};
|
2008-09-23 03:34:26 +04:00
|
|
|
char *s;
|
2006-07-06 21:16:22 +04:00
|
|
|
|
2017-04-26 22:29:31 +03:00
|
|
|
expire = TIME_MAX;
|
2007-01-06 00:31:43 +03:00
|
|
|
save_commit_buffer = 0;
|
2018-07-18 23:45:20 +03:00
|
|
|
read_replace_refs = 0;
|
2015-03-20 21:43:09 +03:00
|
|
|
ref_paranoia = 1;
|
2018-09-21 18:57:38 +03:00
|
|
|
repo_init_revisions(the_repository, &revs, prefix);
|
2006-07-06 21:16:22 +04:00
|
|
|
|
2009-05-23 22:53:12 +04:00
|
|
|
argc = parse_options(argc, argv, prefix, options, prune_usage, 0);
|
2014-11-30 11:24:48 +03:00
|
|
|
|
2015-06-23 13:54:11 +03:00
|
|
|
if (repository_format_precious_objects)
|
|
|
|
die(_("cannot prune in a precious-objects repo"));
|
|
|
|
|
2008-03-25 09:20:51 +03:00
|
|
|
while (argc--) {
|
2017-05-01 05:28:58 +03:00
|
|
|
struct object_id oid;
|
2008-03-25 09:20:51 +03:00
|
|
|
const char *name = *argv++;
|
|
|
|
|
2017-05-01 05:28:58 +03:00
|
|
|
if (!get_oid(name, &oid)) {
|
object: convert parse_object* to take struct object_id
Make parse_object, parse_object_or_die, and parse_object_buffer take a
pointer to struct object_id. Remove the temporary variables inserted
earlier, since they are no longer necessary. Transform all of the
callers using the following semantic patch:
@@
expression E1;
@@
- parse_object(E1.hash)
+ parse_object(&E1)
@@
expression E1;
@@
- parse_object(E1->hash)
+ parse_object(E1)
@@
expression E1, E2;
@@
- parse_object_or_die(E1.hash, E2)
+ parse_object_or_die(&E1, E2)
@@
expression E1, E2;
@@
- parse_object_or_die(E1->hash, E2)
+ parse_object_or_die(E1, E2)
@@
expression E1, E2, E3, E4, E5;
@@
- parse_object_buffer(E1.hash, E2, E3, E4, E5)
+ parse_object_buffer(&E1, E2, E3, E4, E5)
@@
expression E1, E2, E3, E4, E5;
@@
- parse_object_buffer(E1->hash, E2, E3, E4, E5)
+ parse_object_buffer(E1, E2, E3, E4, E5)
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-07 01:10:38 +03:00
|
|
|
struct object *object = parse_object_or_die(&oid,
|
|
|
|
name);
|
2008-03-25 09:20:51 +03:00
|
|
|
add_pending_object(&revs, object, "");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
die("unrecognized argument: %s", name);
|
|
|
|
}
|
2011-11-08 09:34:08 +04:00
|
|
|
|
|
|
|
if (show_progress == -1)
|
|
|
|
show_progress = isatty(2);
|
2017-12-08 18:27:16 +03:00
|
|
|
if (exclude_promisor_objects) {
|
|
|
|
fetch_if_missing = 0;
|
|
|
|
revs.exclude_promisor_objects = 1;
|
|
|
|
}
|
2011-11-08 09:34:08 +04:00
|
|
|
|
2014-10-16 02:38:55 +04:00
|
|
|
for_each_loose_file_in_objdir(get_object_directory(), prune_object,
|
prune: lazily perform reachability traversal
The general strategy of "git prune" is to do a full reachability walk,
then for each loose object see if we found it in our walk. But if we
don't have any loose objects, we don't need to do the expensive walk in
the first place.
This patch postpones that walk until the first time we need to see its
results.
Note that this is really a specific case of a more general optimization,
which is that we could traverse only far enough to find the object under
consideration (i.e., stop the traversal when we find it, then pick up
again when asked about the next object, etc). That could save us in some
instances from having to do a full walk. But it's actually a bit tricky
to do with our traversal code, and you'd need to do a full walk anyway
if you have even a single unreachable object (which you generally do, if
any objects are actually left after running git-repack).
So in practice this lazy-load of the full walk catches one easy but
common case (i.e., you've just repacked via git-gc, and there's nothing
unreachable).
The perf script is fairly contrived, but it does show off the
improvement:
Test HEAD^ HEAD
-------------------------------------------------------------------------
5304.4: prune with no objects 3.66(3.60+0.05) 0.00(0.00+0.00) -100.0%
and would let us know if we accidentally regress this optimization.
Note also that we need to take special care with prune_shallow(), which
relies on us having performed the traversal. So this optimization can
only kick in for a non-shallow repository. Since this is easy to get
wrong and is not covered by existing tests, let's add an extra test to
t5304 that covers this case explicitly.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-14 07:35:22 +03:00
|
|
|
prune_cruft, prune_subdir, &revs);
|
2006-07-06 21:16:22 +04:00
|
|
|
|
2013-05-27 15:18:47 +04:00
|
|
|
prune_packed_objects(show_only ? PRUNE_PACKED_DRY_RUN : 0);
|
2008-09-23 03:34:26 +04:00
|
|
|
remove_temporary_files(get_object_directory());
|
2012-09-04 21:31:14 +04:00
|
|
|
s = mkpathdup("%s/pack", get_object_directory());
|
2008-09-23 03:34:26 +04:00
|
|
|
remove_temporary_files(s);
|
|
|
|
free(s);
|
2013-12-05 17:02:54 +04:00
|
|
|
|
prune: lazily perform reachability traversal
The general strategy of "git prune" is to do a full reachability walk,
then for each loose object see if we found it in our walk. But if we
don't have any loose objects, we don't need to do the expensive walk in
the first place.
This patch postpones that walk until the first time we need to see its
results.
Note that this is really a specific case of a more general optimization,
which is that we could traverse only far enough to find the object under
consideration (i.e., stop the traversal when we find it, then pick up
again when asked about the next object, etc). That could save us in some
instances from having to do a full walk. But it's actually a bit tricky
to do with our traversal code, and you'd need to do a full walk anyway
if you have even a single unreachable object (which you generally do, if
any objects are actually left after running git-repack).
So in practice this lazy-load of the full walk catches one easy but
common case (i.e., you've just repacked via git-gc, and there's nothing
unreachable).
The perf script is fairly contrived, but it does show off the
improvement:
Test HEAD^ HEAD
-------------------------------------------------------------------------
5304.4: prune with no objects 3.66(3.60+0.05) 0.00(0.00+0.00) -100.0%
and would let us know if we accidentally regress this optimization.
Note also that we need to take special care with prune_shallow(), which
relies on us having performed the traversal. So this optimization can
only kick in for a non-shallow repository. Since this is easy to get
wrong and is not covered by existing tests, let's add an extra test to
t5304 that covers this case explicitly.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-14 07:35:22 +03:00
|
|
|
if (is_repository_shallow(the_repository)) {
|
|
|
|
perform_reachability_traversal(&revs);
|
2018-10-24 18:56:12 +03:00
|
|
|
prune_shallow(show_only ? PRUNE_SHOW_ONLY : 0);
|
prune: lazily perform reachability traversal
The general strategy of "git prune" is to do a full reachability walk,
then for each loose object see if we found it in our walk. But if we
don't have any loose objects, we don't need to do the expensive walk in
the first place.
This patch postpones that walk until the first time we need to see its
results.
Note that this is really a specific case of a more general optimization,
which is that we could traverse only far enough to find the object under
consideration (i.e., stop the traversal when we find it, then pick up
again when asked about the next object, etc). That could save us in some
instances from having to do a full walk. But it's actually a bit tricky
to do with our traversal code, and you'd need to do a full walk anyway
if you have even a single unreachable object (which you generally do, if
any objects are actually left after running git-repack).
So in practice this lazy-load of the full walk catches one easy but
common case (i.e., you've just repacked via git-gc, and there's nothing
unreachable).
The perf script is fairly contrived, but it does show off the
improvement:
Test HEAD^ HEAD
-------------------------------------------------------------------------
5304.4: prune with no objects 3.66(3.60+0.05) 0.00(0.00+0.00) -100.0%
and would let us know if we accidentally regress this optimization.
Note also that we need to take special care with prune_shallow(), which
relies on us having performed the traversal. So this optimization can
only kick in for a non-shallow repository. Since this is easy to get
wrong and is not covered by existing tests, let's add an extra test to
t5304 that covers this case explicitly.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-02-14 07:35:22 +03:00
|
|
|
}
|
2013-12-05 17:02:54 +04:00
|
|
|
|
2006-07-06 21:16:22 +04:00
|
|
|
return 0;
|
|
|
|
}
|