зеркало из https://github.com/microsoft/git.git
Merge branch 'jk/oi-delta-base'
Teach "cat-file --batch" to show delta-base object name for a packed object that is represented as a delta. * jk/oi-delta-base: cat-file: provide %(deltabase) batch format sha1_object_info_extended: provide delta base sha1s
This commit is contained in:
Коммит
b2132068c6
|
@ -109,6 +109,11 @@ newline. The available atoms are:
|
||||||
The size, in bytes, that the object takes up on disk. See the
|
The size, in bytes, that the object takes up on disk. See the
|
||||||
note about on-disk sizes in the `CAVEATS` section below.
|
note about on-disk sizes in the `CAVEATS` section below.
|
||||||
|
|
||||||
|
`deltabase`::
|
||||||
|
If the object is stored as a delta on-disk, this expands to the
|
||||||
|
40-hex sha1 of the delta base object. Otherwise, expands to the
|
||||||
|
null sha1 (40 zeroes). See `CAVEATS` below.
|
||||||
|
|
||||||
`rest`::
|
`rest`::
|
||||||
If this atom is used in the output string, input lines are split
|
If this atom is used in the output string, input lines are split
|
||||||
at the first whitespace boundary. All characters before that
|
at the first whitespace boundary. All characters before that
|
||||||
|
@ -152,10 +157,11 @@ should be taken in drawing conclusions about which refs or objects are
|
||||||
responsible for disk usage. The size of a packed non-delta object may be
|
responsible for disk usage. The size of a packed non-delta object may be
|
||||||
much larger than the size of objects which delta against it, but the
|
much larger than the size of objects which delta against it, but the
|
||||||
choice of which object is the base and which is the delta is arbitrary
|
choice of which object is the base and which is the delta is arbitrary
|
||||||
and is subject to change during a repack. Note also that multiple copies
|
and is subject to change during a repack.
|
||||||
of an object may be present in the object database; in this case, it is
|
|
||||||
undefined which copy's size will be reported.
|
|
||||||
|
|
||||||
|
Note also that multiple copies of an object may be present in the object
|
||||||
|
database; in this case, it is undefined which copy's size or delta base
|
||||||
|
will be reported.
|
||||||
|
|
||||||
GIT
|
GIT
|
||||||
---
|
---
|
||||||
|
|
|
@ -118,6 +118,7 @@ struct expand_data {
|
||||||
unsigned long size;
|
unsigned long size;
|
||||||
unsigned long disk_size;
|
unsigned long disk_size;
|
||||||
const char *rest;
|
const char *rest;
|
||||||
|
unsigned char delta_base_sha1[20];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If mark_query is true, we do not expand anything, but rather
|
* If mark_query is true, we do not expand anything, but rather
|
||||||
|
@ -174,6 +175,11 @@ static void expand_atom(struct strbuf *sb, const char *atom, int len,
|
||||||
data->split_on_whitespace = 1;
|
data->split_on_whitespace = 1;
|
||||||
else if (data->rest)
|
else if (data->rest)
|
||||||
strbuf_addstr(sb, data->rest);
|
strbuf_addstr(sb, data->rest);
|
||||||
|
} else if (is_atom("deltabase", atom, len)) {
|
||||||
|
if (data->mark_query)
|
||||||
|
data->info.delta_base_sha1 = data->delta_base_sha1;
|
||||||
|
else
|
||||||
|
strbuf_addstr(sb, sha1_to_hex(data->delta_base_sha1));
|
||||||
} else
|
} else
|
||||||
die("unknown format element: %.*s", len, atom);
|
die("unknown format element: %.*s", len, atom);
|
||||||
}
|
}
|
||||||
|
|
1
cache.h
1
cache.h
|
@ -1080,6 +1080,7 @@ struct object_info {
|
||||||
enum object_type *typep;
|
enum object_type *typep;
|
||||||
unsigned long *sizep;
|
unsigned long *sizep;
|
||||||
unsigned long *disk_sizep;
|
unsigned long *disk_sizep;
|
||||||
|
unsigned char *delta_base_sha1;
|
||||||
|
|
||||||
/* Response */
|
/* Response */
|
||||||
enum {
|
enum {
|
||||||
|
|
53
sha1_file.c
53
sha1_file.c
|
@ -1690,6 +1690,38 @@ static off_t get_delta_base(struct packed_git *p,
|
||||||
return base_offset;
|
return base_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Like get_delta_base above, but we return the sha1 instead of the pack
|
||||||
|
* offset. This means it is cheaper for REF deltas (we do not have to do
|
||||||
|
* the final object lookup), but more expensive for OFS deltas (we
|
||||||
|
* have to load the revidx to convert the offset back into a sha1).
|
||||||
|
*/
|
||||||
|
static const unsigned char *get_delta_base_sha1(struct packed_git *p,
|
||||||
|
struct pack_window **w_curs,
|
||||||
|
off_t curpos,
|
||||||
|
enum object_type type,
|
||||||
|
off_t delta_obj_offset)
|
||||||
|
{
|
||||||
|
if (type == OBJ_REF_DELTA) {
|
||||||
|
unsigned char *base = use_pack(p, w_curs, curpos, NULL);
|
||||||
|
return base;
|
||||||
|
} else if (type == OBJ_OFS_DELTA) {
|
||||||
|
struct revindex_entry *revidx;
|
||||||
|
off_t base_offset = get_delta_base(p, w_curs, &curpos,
|
||||||
|
type, delta_obj_offset);
|
||||||
|
|
||||||
|
if (!base_offset)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
revidx = find_pack_revindex(p, base_offset);
|
||||||
|
if (!revidx)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return nth_packed_object_sha1(p, revidx->nr);
|
||||||
|
} else
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
int unpack_object_header(struct packed_git *p,
|
int unpack_object_header(struct packed_git *p,
|
||||||
struct pack_window **w_curs,
|
struct pack_window **w_curs,
|
||||||
off_t *curpos,
|
off_t *curpos,
|
||||||
|
@ -1847,6 +1879,22 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (oi->delta_base_sha1) {
|
||||||
|
if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
|
||||||
|
const unsigned char *base;
|
||||||
|
|
||||||
|
base = get_delta_base_sha1(p, &w_curs, curpos,
|
||||||
|
type, obj_offset);
|
||||||
|
if (!base) {
|
||||||
|
type = OBJ_BAD;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
hashcpy(oi->delta_base_sha1, base);
|
||||||
|
} else
|
||||||
|
hashclr(oi->delta_base_sha1);
|
||||||
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
unuse_pack(&w_curs);
|
unuse_pack(&w_curs);
|
||||||
return type;
|
return type;
|
||||||
|
@ -2430,6 +2478,9 @@ static int sha1_loose_object_info(const unsigned char *sha1,
|
||||||
git_zstream stream;
|
git_zstream stream;
|
||||||
char hdr[32];
|
char hdr[32];
|
||||||
|
|
||||||
|
if (oi->delta_base_sha1)
|
||||||
|
hashclr(oi->delta_base_sha1);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we don't care about type or size, then we don't
|
* If we don't care about type or size, then we don't
|
||||||
* need to look inside the object at all. Note that we
|
* need to look inside the object at all. Note that we
|
||||||
|
@ -2481,6 +2532,8 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
|
||||||
*(oi->sizep) = co->size;
|
*(oi->sizep) = co->size;
|
||||||
if (oi->disk_sizep)
|
if (oi->disk_sizep)
|
||||||
*(oi->disk_sizep) = 0;
|
*(oi->disk_sizep) = 0;
|
||||||
|
if (oi->delta_base_sha1)
|
||||||
|
hashclr(oi->delta_base_sha1);
|
||||||
oi->whence = OI_CACHED;
|
oi->whence = OI_CACHED;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -262,4 +262,38 @@ test_expect_success "--batch-check with multiple sha1s gives correct format" '
|
||||||
"$(echo_without_newline "$batch_check_input" | git cat-file --batch-check)"
|
"$(echo_without_newline "$batch_check_input" | git cat-file --batch-check)"
|
||||||
'
|
'
|
||||||
|
|
||||||
|
test_expect_success 'setup blobs which are likely to delta' '
|
||||||
|
test-genrandom foo 10240 >foo &&
|
||||||
|
{ cat foo; echo plus; } >foo-plus &&
|
||||||
|
git add foo foo-plus &&
|
||||||
|
git commit -m foo &&
|
||||||
|
cat >blobs <<-\EOF
|
||||||
|
HEAD:foo
|
||||||
|
HEAD:foo-plus
|
||||||
|
EOF
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'confirm that neither loose blob is a delta' '
|
||||||
|
cat >expect <<-EOF
|
||||||
|
$_z40
|
||||||
|
$_z40
|
||||||
|
EOF
|
||||||
|
git cat-file --batch-check="%(deltabase)" <blobs >actual &&
|
||||||
|
test_cmp expect actual
|
||||||
|
'
|
||||||
|
|
||||||
|
# To avoid relying too much on the current delta heuristics,
|
||||||
|
# we will check only that one of the two objects is a delta
|
||||||
|
# against the other, but not the order. We can do so by just
|
||||||
|
# asking for the base of both, and checking whether either
|
||||||
|
# sha1 appears in the output.
|
||||||
|
test_expect_success '%(deltabase) reports packed delta bases' '
|
||||||
|
git repack -ad &&
|
||||||
|
git cat-file --batch-check="%(deltabase)" <blobs >actual &&
|
||||||
|
{
|
||||||
|
grep "$(git rev-parse HEAD:foo)" actual ||
|
||||||
|
grep "$(git rev-parse HEAD:foo-plus)" actual
|
||||||
|
}
|
||||||
|
'
|
||||||
|
|
||||||
test_done
|
test_done
|
||||||
|
|
Загрузка…
Ссылка в новой задаче