2008-02-26 00:46:04 +03:00
|
|
|
#ifndef GIT_FSCK_H
|
|
|
|
#define GIT_FSCK_H
|
|
|
|
|
2023-02-24 03:09:30 +03:00
|
|
|
#include "object.h"
|
2018-09-03 17:49:27 +03:00
|
|
|
#include "oidset.h"
|
|
|
|
|
fsck.h: move FSCK_{FATAL,INFO,ERROR,WARN,IGNORE} into an enum
Move the FSCK_{FATAL,INFO,ERROR,WARN,IGNORE} defines into a new
fsck_msg_type enum.
These defines were originally introduced in:
- ba002f3b28a (builtin-fsck: move common object checking code to
fsck.c, 2008-02-25)
- f50c4407305 (fsck: disallow demoting grave fsck errors to warnings,
2015-06-22)
- efaba7cc77f (fsck: optionally ignore specific fsck issues
completely, 2015-06-22)
- f27d05b1704 (fsck: allow upgrading fsck warnings to errors,
2015-06-22)
The reason these were defined in two different places is because we
use FSCK_{IGNORE,INFO,FATAL} only in fsck.c, but FSCK_{ERROR,WARN} are
used by external callbacks.
Untangling that would take some more work, since we expose the new
"enum fsck_msg_type" to both. Similar to "enum object_type" it's not
worth structuring the API in such a way that only those who need
FSCK_{ERROR,WARN} pass around a different type.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-03-28 16:15:40 +03:00
|
|
|
enum fsck_msg_type {
|
2021-03-28 16:15:41 +03:00
|
|
|
/* for internal use only */
|
|
|
|
FSCK_IGNORE,
|
|
|
|
FSCK_INFO,
|
|
|
|
FSCK_FATAL,
|
|
|
|
/* "public", fed to e.g. error_func callbacks */
|
|
|
|
FSCK_ERROR,
|
fsck.h: move FSCK_{FATAL,INFO,ERROR,WARN,IGNORE} into an enum
Move the FSCK_{FATAL,INFO,ERROR,WARN,IGNORE} defines into a new
fsck_msg_type enum.
These defines were originally introduced in:
- ba002f3b28a (builtin-fsck: move common object checking code to
fsck.c, 2008-02-25)
- f50c4407305 (fsck: disallow demoting grave fsck errors to warnings,
2015-06-22)
- efaba7cc77f (fsck: optionally ignore specific fsck issues
completely, 2015-06-22)
- f27d05b1704 (fsck: allow upgrading fsck warnings to errors,
2015-06-22)
The reason these were defined in two different places is because we
use FSCK_{IGNORE,INFO,FATAL} only in fsck.c, but FSCK_{ERROR,WARN} are
used by external callbacks.
Untangling that would take some more work, since we expose the new
"enum fsck_msg_type" to both. Similar to "enum object_type" it's not
worth structuring the API in such a way that only those who need
FSCK_{ERROR,WARN} pass around a different type.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-03-28 16:15:40 +03:00
|
|
|
FSCK_WARN,
|
|
|
|
};
|
2008-02-26 00:46:08 +03:00
|
|
|
|
2022-10-26 01:42:23 +03:00
|
|
|
/*
|
|
|
|
* Documentation/fsck-msgids.txt documents these; when
|
|
|
|
* modifying this list in any way, make sure to keep the
|
|
|
|
* two in sync.
|
|
|
|
*/
|
|
|
|
|
2021-03-28 16:15:45 +03:00
|
|
|
#define FOREACH_FSCK_MSG_ID(FUNC) \
|
|
|
|
/* fatal errors */ \
|
|
|
|
FUNC(NUL_IN_HEADER, FATAL) \
|
|
|
|
FUNC(UNTERMINATED_HEADER, FATAL) \
|
|
|
|
/* errors */ \
|
|
|
|
FUNC(BAD_DATE, ERROR) \
|
|
|
|
FUNC(BAD_DATE_OVERFLOW, ERROR) \
|
|
|
|
FUNC(BAD_EMAIL, ERROR) \
|
|
|
|
FUNC(BAD_NAME, ERROR) \
|
|
|
|
FUNC(BAD_OBJECT_SHA1, ERROR) \
|
|
|
|
FUNC(BAD_PARENT_SHA1, ERROR) \
|
|
|
|
FUNC(BAD_TIMEZONE, ERROR) \
|
|
|
|
FUNC(BAD_TREE, ERROR) \
|
|
|
|
FUNC(BAD_TREE_SHA1, ERROR) \
|
|
|
|
FUNC(BAD_TYPE, ERROR) \
|
|
|
|
FUNC(DUPLICATE_ENTRIES, ERROR) \
|
|
|
|
FUNC(MISSING_AUTHOR, ERROR) \
|
|
|
|
FUNC(MISSING_COMMITTER, ERROR) \
|
|
|
|
FUNC(MISSING_EMAIL, ERROR) \
|
|
|
|
FUNC(MISSING_NAME_BEFORE_EMAIL, ERROR) \
|
|
|
|
FUNC(MISSING_OBJECT, ERROR) \
|
|
|
|
FUNC(MISSING_SPACE_BEFORE_DATE, ERROR) \
|
|
|
|
FUNC(MISSING_SPACE_BEFORE_EMAIL, ERROR) \
|
|
|
|
FUNC(MISSING_TAG, ERROR) \
|
|
|
|
FUNC(MISSING_TAG_ENTRY, ERROR) \
|
|
|
|
FUNC(MISSING_TREE, ERROR) \
|
|
|
|
FUNC(MISSING_TYPE, ERROR) \
|
|
|
|
FUNC(MISSING_TYPE_ENTRY, ERROR) \
|
|
|
|
FUNC(MULTIPLE_AUTHORS, ERROR) \
|
|
|
|
FUNC(TREE_NOT_SORTED, ERROR) \
|
|
|
|
FUNC(UNKNOWN_TYPE, ERROR) \
|
|
|
|
FUNC(ZERO_PADDED_DATE, ERROR) \
|
|
|
|
FUNC(GITMODULES_MISSING, ERROR) \
|
|
|
|
FUNC(GITMODULES_BLOB, ERROR) \
|
|
|
|
FUNC(GITMODULES_LARGE, ERROR) \
|
|
|
|
FUNC(GITMODULES_NAME, ERROR) \
|
|
|
|
FUNC(GITMODULES_SYMLINK, ERROR) \
|
|
|
|
FUNC(GITMODULES_URL, ERROR) \
|
|
|
|
FUNC(GITMODULES_PATH, ERROR) \
|
|
|
|
FUNC(GITMODULES_UPDATE, ERROR) \
|
2022-12-01 17:46:09 +03:00
|
|
|
FUNC(GITATTRIBUTES_MISSING, ERROR) \
|
|
|
|
FUNC(GITATTRIBUTES_LARGE, ERROR) \
|
|
|
|
FUNC(GITATTRIBUTES_LINE_LENGTH, ERROR) \
|
|
|
|
FUNC(GITATTRIBUTES_BLOB, ERROR) \
|
2024-04-10 19:01:13 +03:00
|
|
|
FUNC(SYMLINK_TARGET_MISSING, ERROR) \
|
|
|
|
FUNC(SYMLINK_TARGET_BLOB, ERROR) \
|
2021-03-28 16:15:45 +03:00
|
|
|
/* warnings */ \
|
|
|
|
FUNC(EMPTY_NAME, WARN) \
|
|
|
|
FUNC(FULL_PATHNAME, WARN) \
|
|
|
|
FUNC(HAS_DOT, WARN) \
|
|
|
|
FUNC(HAS_DOTDOT, WARN) \
|
|
|
|
FUNC(HAS_DOTGIT, WARN) \
|
|
|
|
FUNC(NULL_SHA1, WARN) \
|
|
|
|
FUNC(ZERO_PADDED_FILEMODE, WARN) \
|
|
|
|
FUNC(NUL_IN_COMMIT, WARN) \
|
2023-08-31 09:20:01 +03:00
|
|
|
FUNC(LARGE_PATHNAME, WARN) \
|
2024-04-10 19:01:13 +03:00
|
|
|
FUNC(SYMLINK_TARGET_LENGTH, WARN) \
|
|
|
|
FUNC(SYMLINK_POINTS_TO_GIT_DIR, WARN) \
|
2021-03-28 16:15:45 +03:00
|
|
|
/* infos (reported as warnings, but ignored by default) */ \
|
fsck: downgrade tree badFilemode to "info"
The previous commit un-broke the "badFileMode" check; before then it was
literally testing nothing. And as far as I can tell, it has been so
since the very initial version of fsck.
The current severity of "badFileMode" is just "warning". But in the
--strict mode used by transfer.fsckObjects, that is elevated to an
error. This will potentially cause hassle for users, because historical
objects with bad modes will suddenly start causing pushes to many server
operators to be rejected.
At the same time, these bogus modes aren't actually a big risk. Because
we canonicalize them everywhere besides fsck, they can't cause too much
mischief in the real world. The worst thing you can do is end up with
two almost-identical trees that have different hashes but are
interpreted the same. That will generally cause things to be inefficient
rather than wrong, and is a bug somebody working on a Git implementation
would want to fix, but probably not worth inconveniencing users by
refusing to push or fetch.
So let's downgrade this to "info" by default, which is our setting for
"mention this when fscking, but don't ever reject, even under strict
mode". If somebody really wants to be paranoid, they can still adjust
the level using config.
Suggested-by: Xavier Morel <xavier.morel@masklinn.net>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-08-11 00:04:07 +03:00
|
|
|
FUNC(BAD_FILEMODE, INFO) \
|
2021-03-28 16:15:45 +03:00
|
|
|
FUNC(GITMODULES_PARSE, INFO) \
|
fsck: warn about symlinked dotfiles we'll open with O_NOFOLLOW
In the commits merged in via 204333b015 (Merge branch
'jk/open-dotgitx-with-nofollow', 2021-03-22), we stopped following
symbolic links for .gitattributes, .gitignore, and .mailmap files.
Let's teach fsck to warn that these symlinks are not going to do
anything. Note that this is just a warning, and won't block the objects
via transfer.fsckObjects, since there are reported to be cases of this
in the wild (and even once fixed, they will continue to exist in the
commit history of those projects, but are not particularly dangerous).
Note that we won't add these to the existing gitmodules block in the
fsck code. The logic for gitmodules is a bit more complicated, as we
also check the content of non-symlink instances we find. But for these
new files, there is no content check; we're just looking at the name and
mode of the tree entry (and we can avoid even the complicated name
checks in the common case that the mode doesn't indicate a symlink).
We can reuse the test helper function we defined for .gitmodules, though
(it needs some slight adjustments for the fsck error code, and because
we don't block these symlinks via verify_path()).
Note that I didn't explicitly test the transfer.fsckObjects case here
(nor does the existing .gitmodules test that it blocks a push). The
translation of fsck severities to outcomes is covered in general in
t5504.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-03 23:43:25 +03:00
|
|
|
FUNC(GITIGNORE_SYMLINK, INFO) \
|
|
|
|
FUNC(GITATTRIBUTES_SYMLINK, INFO) \
|
|
|
|
FUNC(MAILMAP_SYMLINK, INFO) \
|
2021-03-28 16:15:45 +03:00
|
|
|
FUNC(BAD_TAG_NAME, INFO) \
|
|
|
|
FUNC(MISSING_TAGGER_ENTRY, INFO) \
|
|
|
|
/* ignored (elevated when requested) */ \
|
|
|
|
FUNC(EXTRA_HEADER_ENTRY, IGNORE)
|
|
|
|
|
|
|
|
#define MSG_ID(id, msg_type) FSCK_MSG_##id,
|
|
|
|
enum fsck_msg_id {
|
|
|
|
FOREACH_FSCK_MSG_ID(MSG_ID)
|
|
|
|
FSCK_MSG_MAX
|
|
|
|
};
|
|
|
|
#undef MSG_ID
|
|
|
|
|
2015-06-22 18:25:00 +03:00
|
|
|
struct fsck_options;
|
2018-08-15 20:54:05 +03:00
|
|
|
struct object;
|
2015-06-22 18:25:00 +03:00
|
|
|
|
2021-03-28 16:15:47 +03:00
|
|
|
void fsck_set_msg_type_from_ids(struct fsck_options *options,
|
|
|
|
enum fsck_msg_id msg_id,
|
|
|
|
enum fsck_msg_type msg_type);
|
2015-06-22 18:25:25 +03:00
|
|
|
void fsck_set_msg_type(struct fsck_options *options,
|
2021-03-28 16:15:36 +03:00
|
|
|
const char *msg_id, const char *msg_type);
|
2015-06-22 18:25:25 +03:00
|
|
|
void fsck_set_msg_types(struct fsck_options *options, const char *values);
|
2015-06-22 18:25:31 +03:00
|
|
|
int is_valid_msg_type(const char *msg_id, const char *msg_type);
|
2015-06-22 18:25:25 +03:00
|
|
|
|
2008-02-26 00:46:04 +03:00
|
|
|
/*
|
|
|
|
* callback function for fsck_walk
|
|
|
|
* type is the expected type of the object or OBJ_ANY
|
|
|
|
* the return value is:
|
|
|
|
* 0 everything OK
|
|
|
|
* <0 error signaled and abort
|
|
|
|
* >0 error signaled and do not abort
|
|
|
|
*/
|
2021-03-28 16:15:35 +03:00
|
|
|
typedef int (*fsck_walk_func)(struct object *obj, enum object_type object_type,
|
|
|
|
void *data, struct fsck_options *options);
|
2008-02-26 00:46:04 +03:00
|
|
|
|
2008-02-26 00:46:08 +03:00
|
|
|
/* callback for fsck_object, type is FSCK_ERROR or FSCK_WARN */
|
2016-07-17 13:59:57 +03:00
|
|
|
typedef int (*fsck_error)(struct fsck_options *o,
|
2019-10-18 07:58:40 +03:00
|
|
|
const struct object_id *oid, enum object_type object_type,
|
2021-03-28 16:15:46 +03:00
|
|
|
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
|
|
|
|
const char *message);
|
2008-02-26 00:46:08 +03:00
|
|
|
|
2016-07-17 13:59:57 +03:00
|
|
|
int fsck_error_function(struct fsck_options *o,
|
2019-10-18 07:58:40 +03:00
|
|
|
const struct object_id *oid, enum object_type object_type,
|
2021-03-28 16:15:46 +03:00
|
|
|
enum fsck_msg_type msg_type, enum fsck_msg_id msg_id,
|
|
|
|
const char *message);
|
2021-03-28 16:15:51 +03:00
|
|
|
int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
|
|
|
|
const struct object_id *oid,
|
|
|
|
enum object_type object_type,
|
|
|
|
enum fsck_msg_type msg_type,
|
|
|
|
enum fsck_msg_id msg_id,
|
|
|
|
const char *message);
|
2008-02-26 00:46:09 +03:00
|
|
|
|
2015-06-22 18:25:00 +03:00
|
|
|
struct fsck_options {
|
|
|
|
fsck_walk_func walk;
|
|
|
|
fsck_error error_func;
|
|
|
|
unsigned strict:1;
|
fsck.h: move FSCK_{FATAL,INFO,ERROR,WARN,IGNORE} into an enum
Move the FSCK_{FATAL,INFO,ERROR,WARN,IGNORE} defines into a new
fsck_msg_type enum.
These defines were originally introduced in:
- ba002f3b28a (builtin-fsck: move common object checking code to
fsck.c, 2008-02-25)
- f50c4407305 (fsck: disallow demoting grave fsck errors to warnings,
2015-06-22)
- efaba7cc77f (fsck: optionally ignore specific fsck issues
completely, 2015-06-22)
- f27d05b1704 (fsck: allow upgrading fsck warnings to errors,
2015-06-22)
The reason these were defined in two different places is because we
use FSCK_{IGNORE,INFO,FATAL} only in fsck.c, but FSCK_{ERROR,WARN} are
used by external callbacks.
Untangling that would take some more work, since we expose the new
"enum fsck_msg_type" to both. Similar to "enum object_type" it's not
worth structuring the API in such a way that only those who need
FSCK_{ERROR,WARN} pass around a different type.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-03-28 16:15:40 +03:00
|
|
|
enum fsck_msg_type *msg_type;
|
2018-09-03 17:49:27 +03:00
|
|
|
struct oidset skiplist;
|
2021-03-28 16:15:48 +03:00
|
|
|
struct oidset gitmodules_found;
|
|
|
|
struct oidset gitmodules_done;
|
2022-12-01 17:46:09 +03:00
|
|
|
struct oidset gitattributes_found;
|
|
|
|
struct oidset gitattributes_done;
|
2024-04-10 19:01:13 +03:00
|
|
|
struct oidset symlink_targets_found;
|
|
|
|
struct oidset symlink_targets_done;
|
2019-10-18 07:57:37 +03:00
|
|
|
kh_oid_map_t *object_names;
|
2015-06-22 18:25:00 +03:00
|
|
|
};
|
|
|
|
|
2021-03-28 16:15:34 +03:00
|
|
|
#define FSCK_OPTIONS_DEFAULT { \
|
|
|
|
.skiplist = OIDSET_INIT, \
|
2021-03-28 16:15:48 +03:00
|
|
|
.gitmodules_found = OIDSET_INIT, \
|
|
|
|
.gitmodules_done = OIDSET_INIT, \
|
2022-12-01 17:46:09 +03:00
|
|
|
.gitattributes_found = OIDSET_INIT, \
|
|
|
|
.gitattributes_done = OIDSET_INIT, \
|
2024-04-10 19:01:13 +03:00
|
|
|
.symlink_targets_found = OIDSET_INIT, \
|
|
|
|
.symlink_targets_done = OIDSET_INIT, \
|
2021-03-28 16:15:34 +03:00
|
|
|
.error_func = fsck_error_function \
|
|
|
|
}
|
|
|
|
#define FSCK_OPTIONS_STRICT { \
|
|
|
|
.strict = 1, \
|
2021-03-28 16:15:48 +03:00
|
|
|
.gitmodules_found = OIDSET_INIT, \
|
|
|
|
.gitmodules_done = OIDSET_INIT, \
|
2022-12-01 17:46:09 +03:00
|
|
|
.gitattributes_found = OIDSET_INIT, \
|
|
|
|
.gitattributes_done = OIDSET_INIT, \
|
2024-04-10 19:01:13 +03:00
|
|
|
.symlink_targets_found = OIDSET_INIT, \
|
|
|
|
.symlink_targets_done = OIDSET_INIT, \
|
2021-03-28 16:15:34 +03:00
|
|
|
.error_func = fsck_error_function, \
|
|
|
|
}
|
2021-03-28 16:15:51 +03:00
|
|
|
#define FSCK_OPTIONS_MISSING_GITMODULES { \
|
|
|
|
.strict = 1, \
|
|
|
|
.gitmodules_found = OIDSET_INIT, \
|
|
|
|
.gitmodules_done = OIDSET_INIT, \
|
2022-12-01 17:46:09 +03:00
|
|
|
.gitattributes_found = OIDSET_INIT, \
|
|
|
|
.gitattributes_done = OIDSET_INIT, \
|
2024-04-10 19:01:13 +03:00
|
|
|
.symlink_targets_found = OIDSET_INIT, \
|
|
|
|
.symlink_targets_done = OIDSET_INIT, \
|
2021-03-28 16:15:51 +03:00
|
|
|
.error_func = fsck_error_cb_print_missing_gitmodules, \
|
|
|
|
}
|
2015-06-22 18:25:00 +03:00
|
|
|
|
2008-02-26 00:46:04 +03:00
|
|
|
/* descend in all linked child objects
|
|
|
|
* the return value is:
|
|
|
|
* -1 error in processing the object
|
|
|
|
* <0 return value of the callback, which lead to an abort
|
2009-04-17 22:13:30 +04:00
|
|
|
* >0 return value of the first signaled error >0 (in the case of no other errors)
|
2008-02-26 00:46:04 +03:00
|
|
|
* 0 everything OK
|
|
|
|
*/
|
2015-06-22 18:25:00 +03:00
|
|
|
int fsck_walk(struct object *obj, void *data, struct fsck_options *options);
|
fsck: require an actual buffer for non-blobs
The fsck_object() function takes in a buffer, but also a "struct
object". The rules for using these vary between types:
- for a commit, we'll use the provided buffer; if it's NULL, we'll
fall back to get_commit_buffer(), which loads from either an
in-memory cache or from disk. If the latter fails, we'd die(), which
is non-ideal for fsck.
- for a tag, a NULL buffer will fall back to loading the object from
disk (and failure would lead to an fsck error)
- for a tree, we _never_ look at the provided buffer, and always use
tree->buffer
- for a blob, we usually don't look at the buffer at all, unless it
has been marked as a .gitmodule file. In that case we check the
buffer given to us, or assume a NULL buffer is a very large blob
(and complain about it)
This is much more complex than it needs to be. It turns out that nobody
ever feeds a NULL buffer that isn't a blob:
- git-fsck calls fsck_object() only from fsck_obj(). That in turn is
called by one of:
- fsck_obj_buffer(), which is a callback to verify_pack(), which
unpacks everything except large blobs into a buffer (see
pack-check.c, lines 131-141).
- fsck_loose(), which hits a BUG() on non-blobs with a NULL buffer
(builtin/fsck.c, lines 639-640)
And in either case, we'll have just called parse_object_buffer()
anyway, which would segfault on a NULL buffer for commits or tags
(not for trees, but it would install a NULL tree->buffer which would
later cause a segfault)
- git-index-pack asserts that the buffer is non-NULL unless the object
is a blob (see builtin/index-pack.c, line 832)
- git-unpack-objects always writes a non-NULL buffer into its
obj_buffer hash, which is then fed to fsck_object(). (There is
actually a funny thing here where it does not store blob buffers at
all, nor does it call fsck on them; it does check any needed blobs
via fsck_finish() though).
Let's make the rules simpler, which reduces the amount of code and gives
us more flexibility in refactoring the fsck code. The new rules are:
- only blobs are allowed to pass a NULL buffer
- we always use the provided buffer, never pulling information from
the object struct
We don't have to adjust any callers, because they were already adhering
to these. Note that we do drop a few fsck identifiers for missing tags,
but that was all dead code (because nobody passed a NULL tag buffer).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-10-18 07:54:12 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Blob objects my pass a NULL data pointer, which indicates they are too large
|
|
|
|
* to fit in memory. All other types must pass a real buffer.
|
|
|
|
*/
|
2014-09-10 17:52:51 +04:00
|
|
|
int fsck_object(struct object *obj, void *data, unsigned long size,
|
2015-06-22 18:25:00 +03:00
|
|
|
struct fsck_options *options);
|
2008-02-26 00:46:04 +03:00
|
|
|
|
2023-01-18 23:43:53 +03:00
|
|
|
/*
|
|
|
|
* Same as fsck_object(), but for when the caller doesn't have an object
|
|
|
|
* struct.
|
|
|
|
*/
|
|
|
|
int fsck_buffer(const struct object_id *oid, enum object_type,
|
|
|
|
void *data, unsigned long size,
|
|
|
|
struct fsck_options *options);
|
|
|
|
|
mktag: use fsck instead of custom verify_tag()
Change the validation logic in "mktag" to use fsck's fsck_tag()
instead of its own custom parser. Curiously the logic for both dates
back to the same commit[1]. Let's unify them so we're not maintaining
two sets functions to verify that a tag is OK.
The behavior of fsck_tag() and the old "mktag" code being removed here
is different in few aspects.
I think it makes sense to remove some of those checks, namely:
A. fsck only cares that the timezone matches [-+][0-9]{4}. The mktag
code disallowed values larger than 1400.
Yes there's currently no timezone with a greater offset[2], but
since we allow any number of non-offical timezones (e.g. +1234)
passing this through seems fine. Git also won't break in the
future if e.g. French Polynesia decides it needs to outdo the Line
Islands when it comes to timezone extravagance.
B. fsck allows missing author names such as "tagger <email>", mktag
wouldn't, but would allow e.g. "tagger [2 spaces] <email>" (but
not "tagger [1 space] <email>"). Now we allow all of these.
C. Like B, but "mktag" disallowed spaces in the <email> part, fsck
allows it.
In some ways fsck_tag() is stricter than "mktag" was, namely:
D. fsck disallows zero-padded dates, but mktag didn't care. So
e.g. the timestamp "0000000000 +0000" produces an error now. A
test in "t1006-cat-file.sh" relied on this, it's been changed to
use "hash-object" (without fsck) instead.
There was one check I deemed worth keeping by porting it over to
fsck_tag():
E. "mktag" did not allow any custom headers, and by extension (as an
empty commit is allowed) also forbade an extra stray trailing
newline after the headers it knew about.
Add a new check in the "ignore" category to fsck and use it. This
somewhat abuses the facility added in efaba7cc77f (fsck:
optionally ignore specific fsck issues completely, 2015-06-22).
This is somewhat of hack, but probably the least invasive change
we can make here. The fsck command will shuffle these categories
around, e.g. under --strict the "info" becomes a "warn" and "warn"
becomes "error". Existing users of fsck's (and others,
e.g. index-pack) --strict option rely on this.
So we need to put something into a category that'll be ignored by
all existing users of the API. Pretending that
fsck.extraHeaderEntry=error ("ignore" by default) was set serves
to do this for us.
1. ec4465adb38 (Add "tag" objects that can be used to sign other
objects., 2005-04-25)
2. https://en.wikipedia.org/wiki/List_of_UTC_time_offsets
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-05 22:42:46 +03:00
|
|
|
/*
|
|
|
|
* fsck a tag, and pass info about it back to the caller. This is
|
|
|
|
* exposed fsck_object() internals for git-mktag(1).
|
|
|
|
*/
|
|
|
|
int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
|
|
|
|
unsigned long size, struct fsck_options *options,
|
|
|
|
struct object_id *tagged_oid,
|
|
|
|
int *tag_type);
|
|
|
|
|
fsck: detect gitmodules files
In preparation for performing fsck checks on .gitmodules
files, this commit plumbs in the actual detection of the
files. Note that unlike most other fsck checks, this cannot
be a property of a single object: we must know that the
object is found at a ".gitmodules" path at the root tree of
a commit.
Since the fsck code only sees one object at a time, we have
to mark the related objects to fit the puzzle together. When
we see a commit we mark its tree as a root tree, and when
we see a root tree with a .gitmodules file, we mark the
corresponding blob to be checked.
In an ideal world, we'd check the objects in topological
order: commits followed by trees followed by blobs. In that
case we can avoid ever loading an object twice, since all
markings would be complete by the time we get to the marked
objects. And indeed, if we are checking a single packfile,
this is the order in which Git will generally write the
objects. But we can't count on that:
1. git-fsck may show us the objects in arbitrary order
(loose objects are fed in sha1 order, but we may also
have multiple packs, and we process each pack fully in
sequence).
2. The type ordering is just what git-pack-objects happens
to write now. The pack format does not require a
specific order, and it's possible that future versions
of Git (or a custom version trying to fool official
Git's fsck checks!) may order it differently.
3. We may not even be fscking all of the relevant objects
at once. Consider pushing with transfer.fsckObjects,
where one push adds a blob at path "foo", and then a
second push adds the same blob at path ".gitmodules".
The blob is not part of the second push at all, but we
need to mark and check it.
So in the general case, we need to make up to three passes
over the objects: once to make sure we've seen all commits,
then once to cover any trees we might have missed, and then
a final pass to cover any .gitmodules blobs we found in the
second pass.
We can simplify things a bit by loosening the requirement
that we find .gitmodules only at root trees. Technically
a file like "subdir/.gitmodules" is not parsed by Git, but
it's not unreasonable for us to declare that Git is aware of
all ".gitmodules" files and make them eligible for checking.
That lets us drop the root-tree requirement, which
eliminates one pass entirely. And it makes our worst case
much better: instead of potentially queueing every root tree
to be re-examined, the worst case is that we queue each
unique .gitmodules blob for a second look.
This patch just adds the boilerplate to find .gitmodules
files. The actual content checks will come in a subsequent
commit.
Signed-off-by: Jeff King <peff@peff.net>
2018-05-03 00:20:08 +03:00
|
|
|
/*
|
|
|
|
* Some fsck checks are context-dependent, and may end up queued; run this
|
|
|
|
* after completing all fsck_object() calls in order to resolve any remaining
|
|
|
|
* checks.
|
|
|
|
*/
|
|
|
|
int fsck_finish(struct fsck_options *options);
|
|
|
|
|
fsck: unify object-name code
Commit 90cf590f53 (fsck: optionally show more helpful info for broken
links, 2016-07-17) added a system for decorating objects with names. The
code is split across builtin/fsck.c (which gives the initial names) and
fsck.c (which adds to the names as it traverses the object graph). This
leads to some duplication, where both sites have near-identical
describe_object() functions (the difference being that the one in
builtin/fsck.c uses a circular array of buffers to allow multiple calls
in a single printf).
Let's provide a unified object_name API for fsck. That lets us drop the
duplication, as well as making the interface boundaries more clear
(which will let us refactor the implementation more in a future patch).
We'll leave describe_object() in builtin/fsck.c as a thin wrapper around
the new API, as it relies on a static global to make its many callers a
bit shorter.
We'll also convert the bare add_decoration() calls in builtin/fsck.c to
put_object_name(). This fixes two minor bugs:
1. We leak many small strings. add_decoration() has a last-one-wins
approach: it updates the decoration to the new string and returns
the old one. But we ignore the return value, leaking the old
string. This is quite common to trigger, since we look at reflogs:
the tip of any ref will be described both by looking at the actual
ref, as well as the latest reflog entry. So we'd always end up
leaking one of those strings.
2. The last-one-wins approach gives us lousy names. For instance, we
first look at all of the refs, and then all of the reflogs. So
rather than seeing "refs/heads/master", we're likely to overwrite
it with "HEAD@{12345678}". We're generally better off using the
first name we find.
And indeed, the test in t1450 expects this ugly HEAD@{} name. After
this patch, we've switched to using fsck_put_object_name()'s
first-one-wins semantics, and we output the more human-friendly
"refs/tags/julius" (and the test is updated accordingly).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-10-18 07:56:13 +03:00
|
|
|
/*
|
|
|
|
* Subsystem for storing human-readable names for each object.
|
|
|
|
*
|
|
|
|
* If fsck_enable_object_names() has not been called, all other functions are
|
|
|
|
* noops.
|
|
|
|
*
|
|
|
|
* Use fsck_put_object_name() to seed initial names (e.g. from refnames); the
|
|
|
|
* fsck code will extend that while walking trees, etc.
|
|
|
|
*
|
|
|
|
* Use fsck_get_object_name() to get a single name (or NULL if none). Or the
|
|
|
|
* more convenient describe_object(), which always produces an output string
|
|
|
|
* with the oid combined with the name (if any). Note that the return value
|
|
|
|
* points to a rotating array of static buffers, and may be invalidated by a
|
|
|
|
* subsequent call.
|
|
|
|
*/
|
|
|
|
void fsck_enable_object_names(struct fsck_options *options);
|
|
|
|
const char *fsck_get_object_name(struct fsck_options *options,
|
2019-10-18 07:57:37 +03:00
|
|
|
const struct object_id *oid);
|
fsck: unify object-name code
Commit 90cf590f53 (fsck: optionally show more helpful info for broken
links, 2016-07-17) added a system for decorating objects with names. The
code is split across builtin/fsck.c (which gives the initial names) and
fsck.c (which adds to the names as it traverses the object graph). This
leads to some duplication, where both sites have near-identical
describe_object() functions (the difference being that the one in
builtin/fsck.c uses a circular array of buffers to allow multiple calls
in a single printf).
Let's provide a unified object_name API for fsck. That lets us drop the
duplication, as well as making the interface boundaries more clear
(which will let us refactor the implementation more in a future patch).
We'll leave describe_object() in builtin/fsck.c as a thin wrapper around
the new API, as it relies on a static global to make its many callers a
bit shorter.
We'll also convert the bare add_decoration() calls in builtin/fsck.c to
put_object_name(). This fixes two minor bugs:
1. We leak many small strings. add_decoration() has a last-one-wins
approach: it updates the decoration to the new string and returns
the old one. But we ignore the return value, leaking the old
string. This is quite common to trigger, since we look at reflogs:
the tip of any ref will be described both by looking at the actual
ref, as well as the latest reflog entry. So we'd always end up
leaking one of those strings.
2. The last-one-wins approach gives us lousy names. For instance, we
first look at all of the refs, and then all of the reflogs. So
rather than seeing "refs/heads/master", we're likely to overwrite
it with "HEAD@{12345678}". We're generally better off using the
first name we find.
And indeed, the test in t1450 expects this ugly HEAD@{} name. After
this patch, we've switched to using fsck_put_object_name()'s
first-one-wins semantics, and we output the more human-friendly
"refs/tags/julius" (and the test is updated accordingly).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-10-18 07:56:13 +03:00
|
|
|
__attribute__((format (printf,3,4)))
|
2019-10-18 07:57:37 +03:00
|
|
|
void fsck_put_object_name(struct fsck_options *options,
|
|
|
|
const struct object_id *oid,
|
fsck: unify object-name code
Commit 90cf590f53 (fsck: optionally show more helpful info for broken
links, 2016-07-17) added a system for decorating objects with names. The
code is split across builtin/fsck.c (which gives the initial names) and
fsck.c (which adds to the names as it traverses the object graph). This
leads to some duplication, where both sites have near-identical
describe_object() functions (the difference being that the one in
builtin/fsck.c uses a circular array of buffers to allow multiple calls
in a single printf).
Let's provide a unified object_name API for fsck. That lets us drop the
duplication, as well as making the interface boundaries more clear
(which will let us refactor the implementation more in a future patch).
We'll leave describe_object() in builtin/fsck.c as a thin wrapper around
the new API, as it relies on a static global to make its many callers a
bit shorter.
We'll also convert the bare add_decoration() calls in builtin/fsck.c to
put_object_name(). This fixes two minor bugs:
1. We leak many small strings. add_decoration() has a last-one-wins
approach: it updates the decoration to the new string and returns
the old one. But we ignore the return value, leaking the old
string. This is quite common to trigger, since we look at reflogs:
the tip of any ref will be described both by looking at the actual
ref, as well as the latest reflog entry. So we'd always end up
leaking one of those strings.
2. The last-one-wins approach gives us lousy names. For instance, we
first look at all of the refs, and then all of the reflogs. So
rather than seeing "refs/heads/master", we're likely to overwrite
it with "HEAD@{12345678}". We're generally better off using the
first name we find.
And indeed, the test in t1450 expects this ugly HEAD@{} name. After
this patch, we've switched to using fsck_put_object_name()'s
first-one-wins semantics, and we output the more human-friendly
"refs/tags/julius" (and the test is updated accordingly).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-10-18 07:56:13 +03:00
|
|
|
const char *fmt, ...);
|
|
|
|
const char *fsck_describe_object(struct fsck_options *options,
|
2019-10-18 07:57:37 +03:00
|
|
|
const struct object_id *oid);
|
fsck: unify object-name code
Commit 90cf590f53 (fsck: optionally show more helpful info for broken
links, 2016-07-17) added a system for decorating objects with names. The
code is split across builtin/fsck.c (which gives the initial names) and
fsck.c (which adds to the names as it traverses the object graph). This
leads to some duplication, where both sites have near-identical
describe_object() functions (the difference being that the one in
builtin/fsck.c uses a circular array of buffers to allow multiple calls
in a single printf).
Let's provide a unified object_name API for fsck. That lets us drop the
duplication, as well as making the interface boundaries more clear
(which will let us refactor the implementation more in a future patch).
We'll leave describe_object() in builtin/fsck.c as a thin wrapper around
the new API, as it relies on a static global to make its many callers a
bit shorter.
We'll also convert the bare add_decoration() calls in builtin/fsck.c to
put_object_name(). This fixes two minor bugs:
1. We leak many small strings. add_decoration() has a last-one-wins
approach: it updates the decoration to the new string and returns
the old one. But we ignore the return value, leaking the old
string. This is quite common to trigger, since we look at reflogs:
the tip of any ref will be described both by looking at the actual
ref, as well as the latest reflog entry. So we'd always end up
leaking one of those strings.
2. The last-one-wins approach gives us lousy names. For instance, we
first look at all of the refs, and then all of the reflogs. So
rather than seeing "refs/heads/master", we're likely to overwrite
it with "HEAD@{12345678}". We're generally better off using the
first name we find.
And indeed, the test in t1450 expects this ugly HEAD@{} name. After
this patch, we've switched to using fsck_put_object_name()'s
first-one-wins semantics, and we output the more human-friendly
"refs/tags/julius" (and the test is updated accordingly).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-10-18 07:56:13 +03:00
|
|
|
|
config: add ctx arg to config_fn_t
Add a new "const struct config_context *ctx" arg to config_fn_t to hold
additional information about the config iteration operation.
config_context has a "struct key_value_info kvi" member that holds
metadata about the config source being read (e.g. what kind of config
source it is, the filename, etc). In this series, we're only interested
in .kvi, so we could have just used "struct key_value_info" as an arg,
but config_context makes it possible to add/adjust members in the future
without changing the config_fn_t signature. We could also consider other
ways of organizing the args (e.g. moving the config name and value into
config_context or key_value_info), but in my experiments, the
incremental benefit doesn't justify the added complexity (e.g. a
config_fn_t will sometimes invoke another config_fn_t but with a
different config value).
In subsequent commits, the .kvi member will replace the global "struct
config_reader" in config.c, making config iteration a global-free
operation. It requires much more work for the machinery to provide
meaningful values of .kvi, so for now, merely change the signature and
call sites, pass NULL as a placeholder value, and don't rely on the arg
in any meaningful way.
Most of the changes are performed by
contrib/coccinelle/config_fn_ctx.pending.cocci, which, for every
config_fn_t:
- Modifies the signature to accept "const struct config_context *ctx"
- Passes "ctx" to any inner config_fn_t, if needed
- Adds UNUSED attributes to "ctx", if needed
Most config_fn_t instances are easily identified by seeing if they are
called by the various config functions. Most of the remaining ones are
manually named in the .cocci patch. Manual cleanups are still needed,
but the majority of it is trivial; it's either adjusting config_fn_t
that the .cocci patch didn't catch, or adding forward declarations of
"struct config_context ctx" to make the signatures make sense.
The non-trivial changes are in cases where we are invoking a config_fn_t
outside of config machinery, and we now need to decide what value of
"ctx" to pass. These cases are:
- trace2/tr2_cfg.c:tr2_cfg_set_fl()
This is indirectly called by git_config_set() so that the trace2
machinery can notice the new config values and update its settings
using the tr2 config parsing function, i.e. tr2_cfg_cb().
- builtin/checkout.c:checkout_main()
This calls git_xmerge_config() as a shorthand for parsing a CLI arg.
This might be worth refactoring away in the future, since
git_xmerge_config() can call git_default_config(), which can do much
more than just parsing.
Handle them by creating a KVI_INIT macro that initializes "struct
key_value_info" to a reasonable default, and use that to construct the
"ctx" arg.
Signed-off-by: Glen Choo <chooglen@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-28 22:26:22 +03:00
|
|
|
struct key_value_info;
|
2021-01-05 22:42:47 +03:00
|
|
|
/*
|
|
|
|
* git_config() callback for use by fsck-y tools that want to support
|
|
|
|
* fsck.<msg> fsck.skipList etc.
|
|
|
|
*/
|
config: add ctx arg to config_fn_t
Add a new "const struct config_context *ctx" arg to config_fn_t to hold
additional information about the config iteration operation.
config_context has a "struct key_value_info kvi" member that holds
metadata about the config source being read (e.g. what kind of config
source it is, the filename, etc). In this series, we're only interested
in .kvi, so we could have just used "struct key_value_info" as an arg,
but config_context makes it possible to add/adjust members in the future
without changing the config_fn_t signature. We could also consider other
ways of organizing the args (e.g. moving the config name and value into
config_context or key_value_info), but in my experiments, the
incremental benefit doesn't justify the added complexity (e.g. a
config_fn_t will sometimes invoke another config_fn_t but with a
different config value).
In subsequent commits, the .kvi member will replace the global "struct
config_reader" in config.c, making config iteration a global-free
operation. It requires much more work for the machinery to provide
meaningful values of .kvi, so for now, merely change the signature and
call sites, pass NULL as a placeholder value, and don't rely on the arg
in any meaningful way.
Most of the changes are performed by
contrib/coccinelle/config_fn_ctx.pending.cocci, which, for every
config_fn_t:
- Modifies the signature to accept "const struct config_context *ctx"
- Passes "ctx" to any inner config_fn_t, if needed
- Adds UNUSED attributes to "ctx", if needed
Most config_fn_t instances are easily identified by seeing if they are
called by the various config functions. Most of the remaining ones are
manually named in the .cocci patch. Manual cleanups are still needed,
but the majority of it is trivial; it's either adjusting config_fn_t
that the .cocci patch didn't catch, or adding forward declarations of
"struct config_context ctx" to make the signatures make sense.
The non-trivial changes are in cases where we are invoking a config_fn_t
outside of config machinery, and we now need to decide what value of
"ctx" to pass. These cases are:
- trace2/tr2_cfg.c:tr2_cfg_set_fl()
This is indirectly called by git_config_set() so that the trace2
machinery can notice the new config values and update its settings
using the tr2 config parsing function, i.e. tr2_cfg_cb().
- builtin/checkout.c:checkout_main()
This calls git_xmerge_config() as a shorthand for parsing a CLI arg.
This might be worth refactoring away in the future, since
git_xmerge_config() can call git_default_config(), which can do much
more than just parsing.
Handle them by creating a KVI_INIT macro that initializes "struct
key_value_info" to a reasonable default, and use that to construct the
"ctx" arg.
Signed-off-by: Glen Choo <chooglen@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-28 22:26:22 +03:00
|
|
|
int git_fsck_config(const char *var, const char *value,
|
|
|
|
const struct config_context *ctx, void *cb);
|
2021-01-05 22:42:47 +03:00
|
|
|
|
2008-02-26 00:46:04 +03:00
|
|
|
#endif
|