зеркало из https://github.com/microsoft/git.git
Merge branch 'jk/submodule-fsck-loose' into maint
* jk/submodule-fsck-loose: fsck: complain when .gitmodules is a symlink index-pack: check .gitmodules files with --strict unpack-objects: call fsck_finish() after fscking objects fsck: call fsck_finish() after fscking objects fsck: check .gitmodules content fsck: handle promisor objects in .gitmodules check fsck: detect gitmodules files fsck: actually fsck blob data fsck: simplify ".git" check index-pack: make fsck error message more specific
This commit is contained in:
Коммит
9e84a6d758
|
@ -337,7 +337,7 @@ static void check_connectivity(void)
|
|||
}
|
||||
}
|
||||
|
||||
static int fsck_obj(struct object *obj)
|
||||
static int fsck_obj(struct object *obj, void *buffer, unsigned long size)
|
||||
{
|
||||
int err;
|
||||
|
||||
|
@ -351,7 +351,7 @@ static int fsck_obj(struct object *obj)
|
|||
|
||||
if (fsck_walk(obj, NULL, &fsck_obj_options))
|
||||
objerror(obj, "broken links");
|
||||
err = fsck_object(obj, NULL, 0, &fsck_obj_options);
|
||||
err = fsck_object(obj, buffer, size, &fsck_obj_options);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
|
@ -396,7 +396,7 @@ static int fsck_obj_buffer(const struct object_id *oid, enum object_type type,
|
|||
}
|
||||
obj->flags &= ~(REACHABLE | SEEN);
|
||||
obj->flags |= HAS_OBJ;
|
||||
return fsck_obj(obj);
|
||||
return fsck_obj(obj, buffer, size);
|
||||
}
|
||||
|
||||
static int default_refs;
|
||||
|
@ -504,44 +504,42 @@ static void get_default_heads(void)
|
|||
}
|
||||
}
|
||||
|
||||
static struct object *parse_loose_object(const struct object_id *oid,
|
||||
const char *path)
|
||||
{
|
||||
struct object *obj;
|
||||
void *contents;
|
||||
enum object_type type;
|
||||
unsigned long size;
|
||||
int eaten;
|
||||
|
||||
if (read_loose_object(path, oid->hash, &type, &size, &contents) < 0)
|
||||
return NULL;
|
||||
|
||||
if (!contents && type != OBJ_BLOB)
|
||||
die("BUG: read_loose_object streamed a non-blob");
|
||||
|
||||
obj = parse_object_buffer(oid, type, size, contents, &eaten);
|
||||
|
||||
if (!eaten)
|
||||
free(contents);
|
||||
return obj;
|
||||
}
|
||||
|
||||
static int fsck_loose(const struct object_id *oid, const char *path, void *data)
|
||||
{
|
||||
struct object *obj = parse_loose_object(oid, path);
|
||||
struct object *obj;
|
||||
enum object_type type;
|
||||
unsigned long size;
|
||||
void *contents;
|
||||
int eaten;
|
||||
|
||||
if (!obj) {
|
||||
if (read_loose_object(path, oid->hash, &type, &size, &contents) < 0) {
|
||||
errors_found |= ERROR_OBJECT;
|
||||
error("%s: object corrupt or missing: %s",
|
||||
oid_to_hex(oid), path);
|
||||
return 0; /* keep checking other objects */
|
||||
}
|
||||
|
||||
if (!contents && type != OBJ_BLOB)
|
||||
BUG("read_loose_object streamed a non-blob");
|
||||
|
||||
obj = parse_object_buffer(oid, type, size, contents, &eaten);
|
||||
if (!obj) {
|
||||
errors_found |= ERROR_OBJECT;
|
||||
error("%s: object could not be parsed: %s",
|
||||
oid_to_hex(oid), path);
|
||||
if (!eaten)
|
||||
free(contents);
|
||||
return 0; /* keep checking other objects */
|
||||
}
|
||||
|
||||
obj->flags &= ~(REACHABLE | SEEN);
|
||||
obj->flags |= HAS_OBJ;
|
||||
if (fsck_obj(obj))
|
||||
if (fsck_obj(obj, contents, size))
|
||||
errors_found |= ERROR_OBJECT;
|
||||
return 0;
|
||||
|
||||
if (!eaten)
|
||||
free(contents);
|
||||
return 0; /* keep checking other objects, even if we saw an error */
|
||||
}
|
||||
|
||||
static int fsck_cruft(const char *basename, const char *path, void *data)
|
||||
|
@ -750,6 +748,9 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
|
|||
}
|
||||
stop_progress(&progress);
|
||||
}
|
||||
|
||||
if (fsck_finish(&fsck_obj_options))
|
||||
errors_found |= ERROR_OBJECT;
|
||||
}
|
||||
|
||||
for (i = 0; i < argc; i++) {
|
||||
|
|
|
@ -836,6 +836,9 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
|
|||
blob->object.flags |= FLAG_CHECKED;
|
||||
else
|
||||
die(_("invalid blob object %s"), oid_to_hex(oid));
|
||||
if (do_fsck_object &&
|
||||
fsck_object(&blob->object, (void *)data, size, &fsck_options))
|
||||
die(_("fsck error in packed object"));
|
||||
} else {
|
||||
struct object *obj;
|
||||
int eaten;
|
||||
|
@ -853,7 +856,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
|
|||
die(_("invalid %s"), type_name(type));
|
||||
if (do_fsck_object &&
|
||||
fsck_object(obj, buf, size, &fsck_options))
|
||||
die(_("Error in object"));
|
||||
die(_("fsck error in packed object"));
|
||||
if (strict && fsck_walk(obj, NULL, &fsck_options))
|
||||
die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid));
|
||||
|
||||
|
@ -1477,6 +1480,9 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
|
|||
} else
|
||||
chmod(final_index_name, 0444);
|
||||
|
||||
if (do_fsck_object)
|
||||
add_packed_git(final_index_name, strlen(final_index_name), 0);
|
||||
|
||||
if (!from_stdin) {
|
||||
printf("%s\n", sha1_to_hex(hash));
|
||||
} else {
|
||||
|
@ -1818,6 +1824,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
|
|||
pack_hash);
|
||||
else
|
||||
close(input_fd);
|
||||
|
||||
if (do_fsck_object && fsck_finish(&fsck_options))
|
||||
die(_("fsck error in pack objects"));
|
||||
|
||||
free(objects);
|
||||
strbuf_release(&index_name_buf);
|
||||
if (pack_name == NULL)
|
||||
|
|
|
@ -210,7 +210,7 @@ static int check_object(struct object *obj, int type, void *data, struct fsck_op
|
|||
if (!obj_buf)
|
||||
die("Whoops! Cannot find object '%s'", oid_to_hex(&obj->oid));
|
||||
if (fsck_object(obj, obj_buf->buffer, obj_buf->size, &fsck_options))
|
||||
die("Error in object");
|
||||
die("fsck error in packed object");
|
||||
fsck_options.walk = check_object;
|
||||
if (fsck_walk(obj, NULL, &fsck_options))
|
||||
die("Error on reachable objects of %s", oid_to_hex(&obj->oid));
|
||||
|
@ -572,8 +572,11 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
|
|||
unpack_all();
|
||||
the_hash_algo->update_fn(&ctx, buffer, offset);
|
||||
the_hash_algo->final_fn(oid.hash, &ctx);
|
||||
if (strict)
|
||||
if (strict) {
|
||||
write_rest();
|
||||
if (fsck_finish(&fsck_options))
|
||||
die(_("fsck error in pack objects"));
|
||||
}
|
||||
if (hashcmp(fill(the_hash_algo->rawsz), oid.hash))
|
||||
die("final sha1 did not match");
|
||||
use(the_hash_algo->rawsz);
|
||||
|
|
138
fsck.c
138
fsck.c
|
@ -10,6 +10,13 @@
|
|||
#include "utf8.h"
|
||||
#include "sha1-array.h"
|
||||
#include "decorate.h"
|
||||
#include "oidset.h"
|
||||
#include "packfile.h"
|
||||
#include "submodule-config.h"
|
||||
#include "config.h"
|
||||
|
||||
static struct oidset gitmodules_found = OIDSET_INIT;
|
||||
static struct oidset gitmodules_done = OIDSET_INIT;
|
||||
|
||||
#define FSCK_FATAL -1
|
||||
#define FSCK_INFO -2
|
||||
|
@ -44,6 +51,7 @@
|
|||
FUNC(MISSING_TAG_ENTRY, ERROR) \
|
||||
FUNC(MISSING_TAG_OBJECT, ERROR) \
|
||||
FUNC(MISSING_TREE, ERROR) \
|
||||
FUNC(MISSING_TREE_OBJECT, ERROR) \
|
||||
FUNC(MISSING_TYPE, ERROR) \
|
||||
FUNC(MISSING_TYPE_ENTRY, ERROR) \
|
||||
FUNC(MULTIPLE_AUTHORS, ERROR) \
|
||||
|
@ -51,6 +59,11 @@
|
|||
FUNC(TREE_NOT_SORTED, ERROR) \
|
||||
FUNC(UNKNOWN_TYPE, ERROR) \
|
||||
FUNC(ZERO_PADDED_DATE, ERROR) \
|
||||
FUNC(GITMODULES_MISSING, ERROR) \
|
||||
FUNC(GITMODULES_BLOB, ERROR) \
|
||||
FUNC(GITMODULES_PARSE, ERROR) \
|
||||
FUNC(GITMODULES_NAME, ERROR) \
|
||||
FUNC(GITMODULES_SYMLINK, ERROR) \
|
||||
/* warnings */ \
|
||||
FUNC(BAD_FILEMODE, WARN) \
|
||||
FUNC(EMPTY_NAME, WARN) \
|
||||
|
@ -561,10 +574,18 @@ static int fsck_tree(struct tree *item, struct fsck_options *options)
|
|||
has_empty_name |= !*name;
|
||||
has_dot |= !strcmp(name, ".");
|
||||
has_dotdot |= !strcmp(name, "..");
|
||||
has_dotgit |= (!strcmp(name, ".git") ||
|
||||
is_hfs_dotgit(name) ||
|
||||
is_ntfs_dotgit(name));
|
||||
has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name);
|
||||
has_zero_pad |= *(char *)desc.buffer == '0';
|
||||
|
||||
if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) {
|
||||
if (!S_ISLNK(mode))
|
||||
oidset_insert(&gitmodules_found, oid);
|
||||
else
|
||||
retval += report(options, &item->object,
|
||||
FSCK_MSG_GITMODULES_SYMLINK,
|
||||
".gitmodules is a symbolic link");
|
||||
}
|
||||
|
||||
if (update_tree_entry_gently(&desc)) {
|
||||
retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree");
|
||||
break;
|
||||
|
@ -901,6 +922,66 @@ static int fsck_tag(struct tag *tag, const char *data,
|
|||
return fsck_tag_buffer(tag, data, size, options);
|
||||
}
|
||||
|
||||
struct fsck_gitmodules_data {
|
||||
struct object *obj;
|
||||
struct fsck_options *options;
|
||||
int ret;
|
||||
};
|
||||
|
||||
static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata)
|
||||
{
|
||||
struct fsck_gitmodules_data *data = vdata;
|
||||
const char *subsection, *key;
|
||||
int subsection_len;
|
||||
char *name;
|
||||
|
||||
if (parse_config_key(var, "submodule", &subsection, &subsection_len, &key) < 0 ||
|
||||
!subsection)
|
||||
return 0;
|
||||
|
||||
name = xmemdupz(subsection, subsection_len);
|
||||
if (check_submodule_name(name) < 0)
|
||||
data->ret |= report(data->options, data->obj,
|
||||
FSCK_MSG_GITMODULES_NAME,
|
||||
"disallowed submodule name: %s",
|
||||
name);
|
||||
free(name);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fsck_blob(struct blob *blob, const char *buf,
|
||||
unsigned long size, struct fsck_options *options)
|
||||
{
|
||||
struct fsck_gitmodules_data data;
|
||||
|
||||
if (!oidset_contains(&gitmodules_found, &blob->object.oid))
|
||||
return 0;
|
||||
oidset_insert(&gitmodules_done, &blob->object.oid);
|
||||
|
||||
if (!buf) {
|
||||
/*
|
||||
* A missing buffer here is a sign that the caller found the
|
||||
* blob too gigantic to load into memory. Let's just consider
|
||||
* that an error.
|
||||
*/
|
||||
return report(options, &blob->object,
|
||||
FSCK_MSG_GITMODULES_PARSE,
|
||||
".gitmodules too large to parse");
|
||||
}
|
||||
|
||||
data.obj = &blob->object;
|
||||
data.options = options;
|
||||
data.ret = 0;
|
||||
if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB,
|
||||
".gitmodules", buf, size, &data))
|
||||
data.ret |= report(options, &blob->object,
|
||||
FSCK_MSG_GITMODULES_PARSE,
|
||||
"could not parse gitmodules blob");
|
||||
|
||||
return data.ret;
|
||||
}
|
||||
|
||||
int fsck_object(struct object *obj, void *data, unsigned long size,
|
||||
struct fsck_options *options)
|
||||
{
|
||||
|
@ -908,7 +989,7 @@ int fsck_object(struct object *obj, void *data, unsigned long size,
|
|||
return report(options, obj, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck");
|
||||
|
||||
if (obj->type == OBJ_BLOB)
|
||||
return 0;
|
||||
return fsck_blob((struct blob *)obj, data, size, options);
|
||||
if (obj->type == OBJ_TREE)
|
||||
return fsck_tree((struct tree *) obj, options);
|
||||
if (obj->type == OBJ_COMMIT)
|
||||
|
@ -932,3 +1013,52 @@ int fsck_error_function(struct fsck_options *o,
|
|||
error("object %s: %s", describe_object(o, obj), message);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int fsck_finish(struct fsck_options *options)
|
||||
{
|
||||
int ret = 0;
|
||||
struct oidset_iter iter;
|
||||
const struct object_id *oid;
|
||||
|
||||
oidset_iter_init(&gitmodules_found, &iter);
|
||||
while ((oid = oidset_iter_next(&iter))) {
|
||||
struct blob *blob;
|
||||
enum object_type type;
|
||||
unsigned long size;
|
||||
char *buf;
|
||||
|
||||
if (oidset_contains(&gitmodules_done, oid))
|
||||
continue;
|
||||
|
||||
blob = lookup_blob(oid);
|
||||
if (!blob) {
|
||||
ret |= report(options, &blob->object,
|
||||
FSCK_MSG_GITMODULES_BLOB,
|
||||
"non-blob found at .gitmodules");
|
||||
continue;
|
||||
}
|
||||
|
||||
buf = read_sha1_file(oid->hash, &type, &size);
|
||||
if (!buf) {
|
||||
if (is_promisor_object(&blob->object.oid))
|
||||
continue;
|
||||
ret |= report(options, &blob->object,
|
||||
FSCK_MSG_GITMODULES_MISSING,
|
||||
"unable to read .gitmodules blob");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (type == OBJ_BLOB)
|
||||
ret |= fsck_blob(blob, buf, size, options);
|
||||
else
|
||||
ret |= report(options, &blob->object,
|
||||
FSCK_MSG_GITMODULES_BLOB,
|
||||
"non-blob found at .gitmodules");
|
||||
free(buf);
|
||||
}
|
||||
|
||||
|
||||
oidset_clear(&gitmodules_found);
|
||||
oidset_clear(&gitmodules_done);
|
||||
return ret;
|
||||
}
|
||||
|
|
7
fsck.h
7
fsck.h
|
@ -53,4 +53,11 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options);
|
|||
int fsck_object(struct object *obj, void *data, unsigned long size,
|
||||
struct fsck_options *options);
|
||||
|
||||
/*
|
||||
* Some fsck checks are context-dependent, and may end up queued; run this
|
||||
* after completing all fsck_object() calls in order to resolve any remaining
|
||||
* checks.
|
||||
*/
|
||||
int fsck_finish(struct fsck_options *options);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2209,7 +2209,7 @@ int read_loose_object(const char *path,
|
|||
goto out;
|
||||
}
|
||||
|
||||
if (*type == OBJ_BLOB) {
|
||||
if (*type == OBJ_BLOB && *size > big_file_threshold) {
|
||||
if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0)
|
||||
goto out;
|
||||
} else {
|
||||
|
|
|
@ -79,6 +79,18 @@ pack_obj () {
|
|||
;;
|
||||
esac
|
||||
|
||||
# If it's not a delta, we can convince pack-objects to generate a pack
|
||||
# with just our entry, and then strip off the header (12 bytes) and
|
||||
# trailer (20 bytes).
|
||||
if test -z "$2"
|
||||
then
|
||||
echo "$1" | git pack-objects --stdout >pack_obj.tmp &&
|
||||
size=$(wc -c <pack_obj.tmp) &&
|
||||
dd if=pack_obj.tmp bs=1 count=$((size - 20 - 12)) skip=12 &&
|
||||
rm -f pack_obj.tmp
|
||||
return
|
||||
fi
|
||||
|
||||
echo >&2 "BUG: don't know how to print $1${2:+ (from $2)}"
|
||||
return 1
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@ Exercise the name-checking function on a variety of names, and then give a
|
|||
real-world setup that confirms we catch this in practice.
|
||||
'
|
||||
. ./test-lib.sh
|
||||
. "$TEST_DIRECTORY"/lib-pack.sh
|
||||
|
||||
test_expect_success 'check names' '
|
||||
cat >expect <<-\EOF &&
|
||||
|
@ -73,4 +74,81 @@ test_expect_success 'clone evil superproject' '
|
|||
! grep "RUNNING POST CHECKOUT" output
|
||||
'
|
||||
|
||||
test_expect_success 'fsck detects evil superproject' '
|
||||
test_must_fail git fsck
|
||||
'
|
||||
|
||||
test_expect_success 'transfer.fsckObjects detects evil superproject (unpack)' '
|
||||
rm -rf dst.git &&
|
||||
git init --bare dst.git &&
|
||||
git -C dst.git config transfer.fsckObjects true &&
|
||||
test_must_fail git push dst.git HEAD
|
||||
'
|
||||
|
||||
test_expect_success 'transfer.fsckObjects detects evil superproject (index)' '
|
||||
rm -rf dst.git &&
|
||||
git init --bare dst.git &&
|
||||
git -C dst.git config transfer.fsckObjects true &&
|
||||
git -C dst.git config transfer.unpackLimit 1 &&
|
||||
test_must_fail git push dst.git HEAD
|
||||
'
|
||||
|
||||
# Normally our packs contain commits followed by trees followed by blobs. This
|
||||
# reverses the order, which requires backtracking to find the context of a
|
||||
# blob. We'll start with a fresh gitmodules-only tree to make it simpler.
|
||||
test_expect_success 'create oddly ordered pack' '
|
||||
git checkout --orphan odd &&
|
||||
git rm -rf --cached . &&
|
||||
git add .gitmodules &&
|
||||
git commit -m odd &&
|
||||
{
|
||||
pack_header 3 &&
|
||||
pack_obj $(git rev-parse HEAD:.gitmodules) &&
|
||||
pack_obj $(git rev-parse HEAD^{tree}) &&
|
||||
pack_obj $(git rev-parse HEAD)
|
||||
} >odd.pack &&
|
||||
pack_trailer odd.pack
|
||||
'
|
||||
|
||||
test_expect_success 'transfer.fsckObjects handles odd pack (unpack)' '
|
||||
rm -rf dst.git &&
|
||||
git init --bare dst.git &&
|
||||
test_must_fail git -C dst.git unpack-objects --strict <odd.pack
|
||||
'
|
||||
|
||||
test_expect_success 'transfer.fsckObjects handles odd pack (index)' '
|
||||
rm -rf dst.git &&
|
||||
git init --bare dst.git &&
|
||||
test_must_fail git -C dst.git index-pack --strict --stdin <odd.pack
|
||||
'
|
||||
|
||||
test_expect_success 'fsck detects symlinked .gitmodules file' '
|
||||
git init symlink &&
|
||||
(
|
||||
cd symlink &&
|
||||
|
||||
# Make the tree directly to avoid index restrictions.
|
||||
#
|
||||
# Because symlinks store the target as a blob, choose
|
||||
# a pathname that could be parsed as a .gitmodules file
|
||||
# to trick naive non-symlink-aware checking.
|
||||
tricky="[foo]bar=true" &&
|
||||
content=$(git hash-object -w ../.gitmodules) &&
|
||||
target=$(printf "$tricky" | git hash-object -w --stdin) &&
|
||||
tree=$(
|
||||
{
|
||||
printf "100644 blob $content\t$tricky\n" &&
|
||||
printf "120000 blob $target\t.gitmodules\n"
|
||||
} | git mktree
|
||||
) &&
|
||||
commit=$(git commit-tree $tree) &&
|
||||
|
||||
# Check not only that we fail, but that it is due to the
|
||||
# symlink detector; this grep string comes from the config
|
||||
# variable name and will not be translated.
|
||||
test_must_fail git fsck 2>output &&
|
||||
grep gitmodulesSymlink output
|
||||
)
|
||||
'
|
||||
|
||||
test_done
|
||||
|
|
Загрузка…
Ссылка в новой задаче