Improve the safety check used in fetch.c

The recent safety check to trust only the commits we have made
things impossibly slow and turn out to waste a lot of memory.

This commit fixes it with the following improvements:

 - mark already scanned objects and avoid rescanning the same
   object again;

 - free the tree entries when we have scanned the tree entries;
   this is the same as b0d8923ec0
   which reduced memory usage by rev-list;

 - plug memory leak from the object_list dequeuing code;

 - use the process_queue not just for fetching but for scanning,
   to make things tail recursive to avoid deep recursion; the
   deep recursion was especially prominent when we cloned a big
   pack.

 - avoid has_sha1_file() call when we already know we do not have
   that object.

Signed-off-by: Junio C Hamano <junkio@cox.net>
This commit is contained in:
Junio C Hamano 2005-09-18 01:01:07 -07:00
Родитель 490e23d22a
Коммит 85d106c267
1 изменённых файлов: 44 добавлений и 32 удалений

76
fetch.c
Просмотреть файл

@ -33,36 +33,33 @@ static void report_missing(const char *what, const unsigned char *missing)
what, missing_hex, sha1_to_hex(current_commit_sha1));
}
static int make_sure_we_have_it(const char *what, unsigned char *sha1)
{
int status = 0;
if (!has_sha1_file(sha1)) {
status = fetch(sha1);
if (status && what)
report_missing(what, sha1);
}
return status;
}
static int process(unsigned char *sha1, const char *type);
static int process_tree(struct tree *tree)
{
struct tree_entry_list *entries;
struct tree_entry_list *entry;
if (parse_tree(tree))
return -1;
for (entries = tree->entries; entries; entries = entries->next) {
if (process(entries->item.any->sha1,
entries->directory ? tree_type : blob_type))
entry = tree->entries;
tree->entries = NULL;
while (entry) {
struct tree_entry_list *next = entry->next;
if (process(entry->item.any->sha1,
entry->directory ? tree_type : blob_type))
return -1;
free(entry);
entry = next;
}
return 0;
}
#define COMPLETE 1U
#define TO_FETCH 2U
#define TO_SCAN 4U
#define SCANNED 8U
static struct commit_list *complete = NULL;
static int process_commit(struct commit *commit)
@ -74,12 +71,13 @@ static int process_commit(struct commit *commit)
pop_most_recent_commit(&complete, COMPLETE);
}
if (commit->object.flags & COMPLETE)
return 0;
memcpy(current_commit_sha1, commit->object.sha1, 20);
pull_say("walk %s\n", sha1_to_hex(commit->object.sha1));
if (get_tree) {
if (process(commit->tree->object.sha1, tree_type))
return -1;
@ -89,8 +87,7 @@ static int process_commit(struct commit *commit)
if (get_history) {
struct commit_list *parents = commit->parents;
for (; parents; parents = parents->next) {
if (process(parents->item->object.sha1,
commit_type))
if (process(parents->item->object.sha1, commit_type))
return -1;
}
}
@ -109,6 +106,10 @@ static struct object_list **process_queue_end = &process_queue;
static int process_object(struct object *obj)
{
if (obj->flags & SCANNED)
return 0;
obj->flags |= SCANNED;
if (obj->type == commit_type) {
if (process_commit((struct commit *)obj))
return -1;
@ -139,14 +140,19 @@ static int process(unsigned char *sha1, const char *type)
if (has_sha1_file(sha1)) {
parse_object(sha1);
/* We already have it, so we should scan it now. */
return process_object(obj);
if (obj->flags & (SCANNED | TO_SCAN))
return 0;
object_list_insert(obj, process_queue_end);
process_queue_end = &(*process_queue_end)->next;
obj->flags |= TO_SCAN;
return 0;
}
if (object_list_contains(process_queue, obj))
if (obj->flags & (COMPLETE | TO_FETCH))
return 0;
object_list_insert(obj, process_queue_end);
process_queue_end = &(*process_queue_end)->next;
obj->flags |= TO_FETCH;
//fprintf(stderr, "prefetch %s\n", sha1_to_hex(sha1));
prefetch(sha1);
return 0;
@ -154,21 +160,27 @@ static int process(unsigned char *sha1, const char *type)
static int loop(void)
{
struct object_list *elem;
while (process_queue) {
struct object *obj = process_queue->item;
/*
fprintf(stderr, "%d objects to pull\n",
object_list_length(process_queue));
*/
process_queue = process_queue->next;
elem = process_queue;
process_queue = elem->next;
free(elem);
if (!process_queue)
process_queue_end = &process_queue;
//fprintf(stderr, "fetch %s\n", sha1_to_hex(obj->sha1));
if (make_sure_we_have_it(obj->type ? obj->type : "object",
obj->sha1))
return -1;
/* If we are not scanning this object, we placed it in
* the queue because we needed to fetch it first.
*/
if (! (obj->flags & TO_SCAN)) {
if (fetch(obj->sha1)) {
report_missing(obj->type
? obj->type
: "object", obj->sha1);
return -1;
}
}
if (!obj->type)
parse_object(obj->sha1);
if (process_object(obj))