From b41860bf28a16ad6015eb0c55dc95602f1235640 Mon Sep 17 00:00:00 2001 From: Martin Koegler Date: Mon, 25 Feb 2008 22:46:11 +0100 Subject: [PATCH 1/5] unpack-objects: prevent writing of inconsistent objects This patch introduces a strict mode, which ensures that: - no malformed object will be written - no object with broken links will be written The patch ensures this by delaying the write of all non blob object. These object are written, after all objects they link to are written. An error can only result in unreferenced objects. Signed-off-by: Martin Koegler Signed-off-by: Junio C Hamano --- Documentation/git-unpack-objects.txt | 3 + builtin-unpack-objects.c | 110 +++++++++++++++++++++++++-- 2 files changed, 106 insertions(+), 7 deletions(-) diff --git a/Documentation/git-unpack-objects.txt b/Documentation/git-unpack-objects.txt index b79be3fd4c..3697896a06 100644 --- a/Documentation/git-unpack-objects.txt +++ b/Documentation/git-unpack-objects.txt @@ -40,6 +40,9 @@ OPTIONS and make the best effort to recover as many objects as possible. +--strict:: + Don't write objects with broken content or links. + Author ------ diff --git a/builtin-unpack-objects.c b/builtin-unpack-objects.c index 50e07faa12..9d2a854950 100644 --- a/builtin-unpack-objects.c +++ b/builtin-unpack-objects.c @@ -7,11 +7,13 @@ #include "commit.h" #include "tag.h" #include "tree.h" +#include "tree-walk.h" #include "progress.h" #include "decorate.h" +#include "fsck.h" -static int dry_run, quiet, recover, has_errors; -static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-file"; +static int dry_run, quiet, recover, has_errors, strict; +static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] [--strict] < pack-file"; /* We always read in 4kB chunks. */ static unsigned char buffer[4096]; @@ -31,6 +33,16 @@ static struct obj_buffer *lookup_object_buffer(struct object *base) return lookup_decoration(&obj_decorate, base); } +static void add_object_buffer(struct object *object, char *buffer, unsigned long size) +{ + struct obj_buffer *obj; + obj = xcalloc(1, sizeof(struct obj_buffer)); + obj->buffer = buffer; + obj->size = size; + if (add_decoration(&obj_decorate, object, obj)) + die("object %s tried to add buffer twice!", sha1_to_hex(object->sha1)); +} + /* * Make sure at least "min" bytes are available in the buffer, and * return the pointer to the buffer. @@ -134,9 +146,58 @@ static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1, struct obj_info { off_t offset; unsigned char sha1[20]; + struct object *obj; }; +#define FLAG_OPEN (1u<<20) +#define FLAG_WRITTEN (1u<<21) + static struct obj_info *obj_list; +unsigned nr_objects; + +static void write_cached_object(struct object *obj) +{ + unsigned char sha1[20]; + struct obj_buffer *obj_buf = lookup_object_buffer(obj); + if (write_sha1_file(obj_buf->buffer, obj_buf->size, typename(obj->type), sha1) < 0) + die("failed to write object %s", sha1_to_hex(obj->sha1)); + obj->flags |= FLAG_WRITTEN; +} + +static int check_object(struct object *obj, int type, void *data) +{ + if (!obj) + return 0; + + if (obj->flags & FLAG_WRITTEN) + return 1; + + if (type != OBJ_ANY && obj->type != type) + die("object type mismatch"); + + if (!(obj->flags & FLAG_OPEN)) { + unsigned long size; + int type = sha1_object_info(obj->sha1, &size); + if (type != obj->type || type <= 0) + die("object of unexpected type"); + obj->flags |= FLAG_WRITTEN; + return 1; + } + + if (fsck_object(obj, 1, fsck_error_function)) + die("Error in object"); + if (!fsck_walk(obj, check_object, 0)) + die("Error on reachable objects of %s", sha1_to_hex(obj->sha1)); + write_cached_object(obj); + return 1; +} + +static void write_rest(void) +{ + unsigned i; + for (i = 0; i < nr_objects; i++) + check_object(obj_list[i].obj, OBJ_ANY, 0); +} static void added_object(unsigned nr, enum object_type type, void *data, unsigned long size); @@ -144,9 +205,36 @@ static void added_object(unsigned nr, enum object_type type, static void write_object(unsigned nr, enum object_type type, void *buf, unsigned long size) { - if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0) - die("failed to write object"); added_object(nr, type, buf, size); + if (!strict) { + if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0) + die("failed to write object"); + free(buf); + obj_list[nr].obj = 0; + } else if (type == OBJ_BLOB) { + struct blob *blob; + if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0) + die("failed to write object"); + free(buf); + + blob = lookup_blob(obj_list[nr].sha1); + if (blob) + blob->object.flags |= FLAG_WRITTEN; + else + die("invalid blob object"); + obj_list[nr].obj = 0; + } else { + struct object *obj; + int eaten; + hash_sha1_file(buf, size, typename(type), obj_list[nr].sha1); + obj = parse_object_buffer(obj_list[nr].sha1, type, size, buf, &eaten); + if (!obj) + die("invalid %s", typename(type)); + /* buf is stored via add_object_buffer and in obj, if its a tree or commit */ + add_object_buffer(obj, buf, size); + obj->flags |= FLAG_OPEN; + obj_list[nr].obj = obj; + } } static void resolve_delta(unsigned nr, enum object_type type, @@ -163,7 +251,6 @@ static void resolve_delta(unsigned nr, enum object_type type, die("failed to apply delta"); free(delta); write_object(nr, type, result, result_size); - free(result); } static void added_object(unsigned nr, enum object_type type, @@ -193,7 +280,8 @@ static void unpack_non_delta_entry(enum object_type type, unsigned long size, if (!dry_run && buf) write_object(nr, type, buf, size); - free(buf); + else + free(buf); } static void unpack_delta_entry(enum object_type type, unsigned long delta_size, @@ -336,7 +424,8 @@ static void unpack_all(void) int i; struct progress *progress = NULL; struct pack_header *hdr = fill(sizeof(struct pack_header)); - unsigned nr_objects = ntohl(hdr->hdr_entries); + + nr_objects = ntohl(hdr->hdr_entries); if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE) die("bad pack file"); @@ -347,6 +436,7 @@ static void unpack_all(void) if (!quiet) progress = start_progress("Unpacking objects", nr_objects); obj_list = xmalloc(nr_objects * sizeof(*obj_list)); + memset(obj_list, 0, nr_objects * sizeof(*obj_list)); for (i = 0; i < nr_objects; i++) { unpack_one(i); display_progress(progress, i + 1); @@ -382,6 +472,10 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix) recover = 1; continue; } + if (!strcmp(arg, "--strict")) { + strict = 1; + continue; + } if (!prefixcmp(arg, "--pack_header=")) { struct pack_header *hdr; char *c; @@ -407,6 +501,8 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix) unpack_all(); SHA1_Update(&ctx, buffer, offset); SHA1_Final(sha1, &ctx); + if (strict) + write_rest(); if (hashcmp(fill(20), sha1)) die("final sha1 did not match"); use(20); From c0e809e5c027443d6f77714a1c5f8c6233ca0a20 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 5 Mar 2008 00:14:32 -0800 Subject: [PATCH 2/5] t5300: add test for "unpack-objects --strict" This adds test for unpacking deltified objects with --strict option. - unpacking full trees with --strict should pass; - unpacking only trees with --strict should be rejected due to missing blobs; - unpacking only trees with --strict into an existing repository with necessary blobs should succeed. Signed-off-by: Junio C Hamano --- t/t5300-pack-object.sh | 48 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh index cd3c149800..ffc638ab2c 100755 --- a/t/t5300-pack-object.sh +++ b/t/t5300-pack-object.sh @@ -274,4 +274,52 @@ test_expect_success \ packname_4=$(git pack-objects test-4 LIST && + rm -f .git/index && + git update-index --index-info Date: Tue, 4 Mar 2008 23:46:51 -0800 Subject: [PATCH 3/5] unpack-objects: fix --strict handling Earlier attempt (which was reverted) called added_object() (by the way, the function should be renamed to resolve_dependents() --- it is called when we have a complete object data, and is responsible to resolve pending deltified objects that use this object as their delta base object) without updating obj_list[nr].sha1 with the correct value. Signed-off-by: Junio C Hamano --- builtin-unpack-objects.c | 73 +++++++++++++++++++++++++++++++--------- t/t5300-pack-object.sh | 2 +- 2 files changed, 58 insertions(+), 17 deletions(-) diff --git a/builtin-unpack-objects.c b/builtin-unpack-objects.c index 9d2a854950..fecf0be779 100644 --- a/builtin-unpack-objects.c +++ b/builtin-unpack-objects.c @@ -21,6 +21,11 @@ static unsigned int offset, len; static off_t consumed_bytes; static SHA_CTX ctx; +/* + * When running under --strict mode, objects whose reachability are + * suspect are kept in core without getting written in the object + * store. + */ struct obj_buffer { char *buffer; unsigned long size; @@ -155,6 +160,10 @@ struct obj_info { static struct obj_info *obj_list; unsigned nr_objects; +/* + * Called only from check_object() after it verified this object + * is Ok. + */ static void write_cached_object(struct object *obj) { unsigned char sha1[20]; @@ -164,6 +173,11 @@ static void write_cached_object(struct object *obj) obj->flags |= FLAG_WRITTEN; } +/* + * At the very end of the processing, write_rest() scans the objects + * that have reachability requirements and calls this function. + * Verify its reachability and validity recursively and write it out. + */ static int check_object(struct object *obj, int type, void *data) { if (!obj) @@ -202,19 +216,25 @@ static void write_rest(void) static void added_object(unsigned nr, enum object_type type, void *data, unsigned long size); +/* + * Write out nr-th object from the list, now we know the contents + * of it. Under --strict, this buffers structured objects in-core, + * to be checked at the end. + */ static void write_object(unsigned nr, enum object_type type, void *buf, unsigned long size) { - added_object(nr, type, buf, size); if (!strict) { if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0) die("failed to write object"); + added_object(nr, type, buf, size); free(buf); - obj_list[nr].obj = 0; + obj_list[nr].obj = NULL; } else if (type == OBJ_BLOB) { struct blob *blob; if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0) die("failed to write object"); + added_object(nr, type, buf, size); free(buf); blob = lookup_blob(obj_list[nr].sha1); @@ -222,15 +242,15 @@ static void write_object(unsigned nr, enum object_type type, blob->object.flags |= FLAG_WRITTEN; else die("invalid blob object"); - obj_list[nr].obj = 0; + obj_list[nr].obj = NULL; } else { struct object *obj; int eaten; hash_sha1_file(buf, size, typename(type), obj_list[nr].sha1); + added_object(nr, type, buf, size); obj = parse_object_buffer(obj_list[nr].sha1, type, size, buf, &eaten); if (!obj) die("invalid %s", typename(type)); - /* buf is stored via add_object_buffer and in obj, if its a tree or commit */ add_object_buffer(obj, buf, size); obj->flags |= FLAG_OPEN; obj_list[nr].obj = obj; @@ -253,6 +273,10 @@ static void resolve_delta(unsigned nr, enum object_type type, write_object(nr, type, result, result_size); } +/* + * We now know the contents of an object (which is nr-th in the pack); + * resolve all the deltified objects that are based on it. + */ static void added_object(unsigned nr, enum object_type type, void *data, unsigned long size) { @@ -284,13 +308,28 @@ static void unpack_non_delta_entry(enum object_type type, unsigned long size, free(buf); } +static int resolve_against_held(unsigned nr, const unsigned char *base, + void *delta_data, unsigned long delta_size) +{ + struct object *obj; + struct obj_buffer *obj_buffer; + obj = lookup_object(base); + if (!obj) + return 0; + obj_buffer = lookup_object_buffer(obj); + if (!obj_buffer) + return 0; + resolve_delta(nr, obj->type, obj_buffer->buffer, + obj_buffer->size, delta_data, delta_size); + return 1; +} + static void unpack_delta_entry(enum object_type type, unsigned long delta_size, unsigned nr) { void *delta_data, *base; unsigned long base_size; unsigned char base_sha1[20]; - struct object *obj; if (type == OBJ_REF_DELTA) { hashcpy(base_sha1, fill(20)); @@ -300,7 +339,13 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size, free(delta_data); return; } - if (!has_sha1_file(base_sha1)) { + if (has_sha1_file(base_sha1)) + ; /* Ok we have this one */ + else if (resolve_against_held(nr, base_sha1, + delta_data, delta_size)) + return; /* we are done */ + else { + /* cannot resolve yet --- queue it */ hashcpy(obj_list[nr].sha1, null_sha1); add_delta_to_list(nr, base_sha1, 0, delta_data, delta_size); return; @@ -346,22 +391,18 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size, } } if (!base_found) { - /* The delta base object is itself a delta that - has not been resolved yet. */ + /* + * The delta base object is itself a delta that + * has not been resolved yet. + */ hashcpy(obj_list[nr].sha1, null_sha1); add_delta_to_list(nr, null_sha1, base_offset, delta_data, delta_size); return; } } - obj = lookup_object(base_sha1); - if (obj) { - struct obj_buffer *obj_buf = lookup_object_buffer(obj); - if (obj_buf) { - resolve_delta(nr, obj->type, obj_buf->buffer, obj_buf->size, delta_data, delta_size); - return; - } - } + if (resolve_against_held(nr, base_sha1, delta_data, delta_size)) + return; base = read_sha1_file(base_sha1, &type, &base_size); if (!base) { diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh index ffc638ab2c..7647f2142b 100755 --- a/t/t5300-pack-object.sh +++ b/t/t5300-pack-object.sh @@ -274,7 +274,7 @@ test_expect_success \ packname_4=$(git pack-objects test-4 Date: Mon, 25 Feb 2008 22:46:13 +0100 Subject: [PATCH 4/5] receive-pack: allow using --strict mode for unpacking objects When a configuration variable receive.fsckobjects is set, receive-pack runs unpack-objects with --strict mode to check all received objects. Signed-off-by: Martin Koegler Signed-off-by: Junio C Hamano --- Documentation/config.txt | 6 ++++++ receive-pack.c | 36 +++++++++++++++++++++++------------- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index 4027726f2e..74a092c5e8 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -939,6 +939,12 @@ imap:: The configuration variables in the 'imap' section are described in linkgit:git-imap-send[1]. +receive.fsckObjects:: + If it is set to true, git-receive-pack will check all received + objects. It will abort in the case of a malformed object or a + broken link. The result of an abort are only dangling objects. + Defaults to false. + receive.unpackLimit:: If the number of objects received in a push is below this limit then the objects will be unpacked into loose object diff --git a/receive-pack.c b/receive-pack.c index f83ae87e15..828d49001d 100644 --- a/receive-pack.c +++ b/receive-pack.c @@ -10,6 +10,7 @@ static const char receive_pack_usage[] = "git-receive-pack "; static int deny_non_fast_forwards = 0; +static int receive_fsck_objects; static int receive_unpack_limit = -1; static int transfer_unpack_limit = -1; static int unpack_limit = 100; @@ -35,6 +36,11 @@ static int receive_pack_config(const char *var, const char *value) return 0; } + if (strcmp(var, "receive.fsckobjects") == 0) { + receive_fsck_objects = git_config_bool(var, value); + return 0; + } + return git_default_config(var, value); } @@ -368,11 +374,13 @@ static const char *unpack(void) ntohl(hdr.hdr_version), ntohl(hdr.hdr_entries)); if (ntohl(hdr.hdr_entries) < unpack_limit) { - int code; - const char *unpacker[3]; - unpacker[0] = "unpack-objects"; - unpacker[1] = hdr_arg; - unpacker[2] = NULL; + int code, i = 0; + const char *unpacker[4]; + unpacker[i++] = "unpack-objects"; + if (receive_fsck_objects) + unpacker[i++] = "--strict"; + unpacker[i++] = hdr_arg; + unpacker[i++] = NULL; code = run_command_v_opt(unpacker, RUN_GIT_CMD); switch (code) { case 0: @@ -393,8 +401,8 @@ static const char *unpack(void) return "unpacker exited with error code"; } } else { - const char *keeper[6]; - int s, status; + const char *keeper[7]; + int s, status, i = 0; char keep_arg[256]; struct child_process ip; @@ -402,12 +410,14 @@ static const char *unpack(void) if (gethostname(keep_arg + s, sizeof(keep_arg) - s)) strcpy(keep_arg + s, "localhost"); - keeper[0] = "index-pack"; - keeper[1] = "--stdin"; - keeper[2] = "--fix-thin"; - keeper[3] = hdr_arg; - keeper[4] = keep_arg; - keeper[5] = NULL; + keeper[i++] = "index-pack"; + keeper[i++] = "--stdin"; + if (receive_fsck_objects) + keeper[i++] = "--strict"; + keeper[i++] = "--fix-thin"; + keeper[i++] = hdr_arg; + keeper[i++] = keep_arg; + keeper[i++] = NULL; memset(&ip, 0, sizeof(ip)); ip.argv = keeper; ip.out = -1; From 38a5739dfadd41c9e45bb05ed6fe1559895360cb Mon Sep 17 00:00:00 2001 From: Martin Koegler Date: Fri, 7 Mar 2008 08:39:53 +0100 Subject: [PATCH 5/5] t5300: add test for "index-pack --strict" This adds test for indexing packs with --strict option, basically the same as c0e809e (t5300: add test for "unpack-objects --strict") has done for unpack-objects. Signed-off-by: Martin Koegler Signed-off-by: Junio C Hamano --- t/t5300-pack-object.sh | 47 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh index 7647f2142b..007b136fe6 100755 --- a/t/t5300-pack-object.sh +++ b/t/t5300-pack-object.sh @@ -322,4 +322,51 @@ test_expect_success 'unpacking with --strict' ' ) ' +test_expect_success 'index-pack with --strict' ' + + for j in a b c d e f g + do + for i in 0 1 2 3 4 5 6 7 8 9 + do + o=$(echo $j$i | git hash-object -w --stdin) && + echo "100644 $o 0 $j$i" + done + done >LIST && + rm -f .git/index && + git update-index --index-info