зеркало из https://github.com/microsoft/git.git
binary diff: further updates.
This updates the user interface and generated diff data format. * "diff --binary" is used to signal that we want an e-mailable binary patch. It implies --full-index and -p. * "apply --allow-binary-replacement" acquired a short synonym "apply --binary". * After the "GIT binary patch\n" header line there is a token to record which binary patch mechanism was used, so that we can extend it later. Currently there are two mechanisms defined: "literal" and "delta". The former records the deflated postimage and the latter records the deflated delta from the preimage to postimage. For purely implementation convenience, I added the deflated length after these "literal/delta" tokens (otherwise the decoding side needs to guess and reallocate the buffer while inflating). Improvement patches are very welcomed. Signed-off-by: Junio C Hamano <junkio@cox.net>
This commit is contained in:
Родитель
051308f6e9
Коммит
0660626caf
130
apply.c
130
apply.c
|
@ -114,6 +114,9 @@ struct patch {
|
|||
char *new_name, *old_name, *def_name;
|
||||
unsigned int old_mode, new_mode;
|
||||
int is_rename, is_copy, is_new, is_delete, is_binary;
|
||||
#define BINARY_DELTA_DEFLATED 1
|
||||
#define BINARY_LITERAL_DEFLATED 2
|
||||
unsigned long deflate_origlen;
|
||||
int lines_added, lines_deleted;
|
||||
int score;
|
||||
struct fragment *fragments;
|
||||
|
@ -969,9 +972,11 @@ static inline int metadata_changes(struct patch *patch)
|
|||
|
||||
static int parse_binary(char *buffer, unsigned long size, struct patch *patch)
|
||||
{
|
||||
/* We have read "GIT binary patch\n"; what follows is a
|
||||
* sequence of 'length-byte' followed by base-85 encoded
|
||||
* delta data.
|
||||
/* We have read "GIT binary patch\n"; what follows is a line
|
||||
* that says the patch method (currently, either "deflated
|
||||
* literal" or "deflated delta") and the length of data before
|
||||
* deflating; a sequence of 'length-byte' followed by base-85
|
||||
* encoded data follows.
|
||||
*
|
||||
* Each 5-byte sequence of base-85 encodes up to 4 bytes,
|
||||
* and we would limit the patch line to 66 characters,
|
||||
|
@ -982,11 +987,27 @@ static int parse_binary(char *buffer, unsigned long size, struct patch *patch)
|
|||
*/
|
||||
int llen, used;
|
||||
struct fragment *fragment;
|
||||
char *delta = NULL;
|
||||
char *data = NULL;
|
||||
|
||||
patch->is_binary = 1;
|
||||
patch->fragments = fragment = xcalloc(1, sizeof(*fragment));
|
||||
used = 0;
|
||||
|
||||
/* Grab the type of patch */
|
||||
llen = linelen(buffer, size);
|
||||
used = llen;
|
||||
linenr++;
|
||||
|
||||
if (!strncmp(buffer, "delta ", 6)) {
|
||||
patch->is_binary = BINARY_DELTA_DEFLATED;
|
||||
patch->deflate_origlen = strtoul(buffer + 6, NULL, 10);
|
||||
}
|
||||
else if (!strncmp(buffer, "literal ", 8)) {
|
||||
patch->is_binary = BINARY_LITERAL_DEFLATED;
|
||||
patch->deflate_origlen = strtoul(buffer + 8, NULL, 10);
|
||||
}
|
||||
else
|
||||
return error("unrecognized binary patch at line %d: %.*s",
|
||||
linenr-1, llen-1, buffer);
|
||||
buffer += llen;
|
||||
while (1) {
|
||||
int byte_length, max_byte_length, newsize;
|
||||
llen = linelen(buffer, size);
|
||||
|
@ -1015,8 +1036,8 @@ static int parse_binary(char *buffer, unsigned long size, struct patch *patch)
|
|||
byte_length <= max_byte_length - 4)
|
||||
goto corrupt;
|
||||
newsize = fragment->size + byte_length;
|
||||
delta = xrealloc(delta, newsize);
|
||||
if (decode_85(delta + fragment->size,
|
||||
data = xrealloc(data, newsize);
|
||||
if (decode_85(data + fragment->size,
|
||||
buffer + 1,
|
||||
byte_length))
|
||||
goto corrupt;
|
||||
|
@ -1024,7 +1045,7 @@ static int parse_binary(char *buffer, unsigned long size, struct patch *patch)
|
|||
buffer += llen;
|
||||
size -= llen;
|
||||
}
|
||||
fragment->patch = delta;
|
||||
fragment->patch = data;
|
||||
return used;
|
||||
corrupt:
|
||||
return error("corrupt binary patch at line %d: %.*s",
|
||||
|
@ -1425,6 +1446,61 @@ static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag)
|
|||
return offset;
|
||||
}
|
||||
|
||||
static char *inflate_it(const void *data, unsigned long size,
|
||||
unsigned long inflated_size)
|
||||
{
|
||||
z_stream stream;
|
||||
void *out;
|
||||
int st;
|
||||
|
||||
memset(&stream, 0, sizeof(stream));
|
||||
|
||||
stream.next_in = (unsigned char *)data;
|
||||
stream.avail_in = size;
|
||||
stream.next_out = out = xmalloc(inflated_size);
|
||||
stream.avail_out = inflated_size;
|
||||
inflateInit(&stream);
|
||||
st = inflate(&stream, Z_FINISH);
|
||||
if ((st != Z_STREAM_END) || stream.total_out != inflated_size) {
|
||||
free(out);
|
||||
return NULL;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
static int apply_binary_fragment(struct buffer_desc *desc, struct patch *patch)
|
||||
{
|
||||
unsigned long dst_size;
|
||||
struct fragment *fragment = patch->fragments;
|
||||
void *data;
|
||||
void *result;
|
||||
|
||||
data = inflate_it(fragment->patch, fragment->size,
|
||||
patch->deflate_origlen);
|
||||
if (!data)
|
||||
return error("corrupt patch data");
|
||||
switch (patch->is_binary) {
|
||||
case BINARY_DELTA_DEFLATED:
|
||||
result = patch_delta(desc->buffer, desc->size,
|
||||
data,
|
||||
patch->deflate_origlen,
|
||||
&dst_size);
|
||||
free(desc->buffer);
|
||||
desc->buffer = result;
|
||||
free(data);
|
||||
break;
|
||||
case BINARY_LITERAL_DEFLATED:
|
||||
free(desc->buffer);
|
||||
desc->buffer = data;
|
||||
dst_size = patch->deflate_origlen;
|
||||
break;
|
||||
}
|
||||
if (!desc->buffer)
|
||||
return -1;
|
||||
desc->size = desc->alloc = dst_size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int apply_binary(struct buffer_desc *desc, struct patch *patch)
|
||||
{
|
||||
const char *name = patch->old_name ? patch->old_name : patch->new_name;
|
||||
|
@ -1466,18 +1542,20 @@ static int apply_binary(struct buffer_desc *desc, struct patch *patch)
|
|||
"'%s' but it is not empty", name);
|
||||
}
|
||||
|
||||
if (desc->buffer) {
|
||||
get_sha1_hex(patch->new_sha1_prefix, sha1);
|
||||
if (!memcmp(sha1, null_sha1, 20)) {
|
||||
free(desc->buffer);
|
||||
desc->alloc = desc->size = 0;
|
||||
}
|
||||
get_sha1_hex(patch->new_sha1_prefix, sha1);
|
||||
if (!memcmp(sha1, null_sha1, 20))
|
||||
desc->buffer = NULL;
|
||||
return 0; /* deletion patch */
|
||||
}
|
||||
|
||||
if (has_sha1_file(sha1)) {
|
||||
/* We already have the postimage */
|
||||
char type[10];
|
||||
unsigned long size;
|
||||
|
||||
free(desc->buffer);
|
||||
desc->buffer = read_sha1_file(sha1, type, &size);
|
||||
if (!desc->buffer)
|
||||
return error("the necessary postimage %s for "
|
||||
|
@ -1486,28 +1564,13 @@ static int apply_binary(struct buffer_desc *desc, struct patch *patch)
|
|||
desc->alloc = desc->size = size;
|
||||
}
|
||||
else {
|
||||
char type[10];
|
||||
unsigned long src_size, dst_size;
|
||||
void *src;
|
||||
|
||||
get_sha1_hex(patch->old_sha1_prefix, sha1);
|
||||
src = read_sha1_file(sha1, type, &src_size);
|
||||
if (!src)
|
||||
return error("the necessary preimage %s for "
|
||||
"'%s' cannot be read",
|
||||
patch->old_sha1_prefix, name);
|
||||
|
||||
/* patch->fragment->patch has the delta data and
|
||||
* we should apply it to the preimage.
|
||||
/* We have verified desc matches the preimage;
|
||||
* apply the patch data to it, which is stored
|
||||
* in the patch->fragments->{patch,size}.
|
||||
*/
|
||||
desc->buffer = patch_delta(src, src_size,
|
||||
(void*) patch->fragments->patch,
|
||||
patch->fragments->size,
|
||||
&dst_size);
|
||||
if (!desc->buffer)
|
||||
if (apply_binary_fragment(desc, patch))
|
||||
return error("binary patch does not apply to '%s'",
|
||||
name);
|
||||
desc->size = desc->alloc = dst_size;
|
||||
|
||||
/* verify that the result matches */
|
||||
write_sha1_file_prepare(desc->buffer, desc->size, blob_type,
|
||||
|
@ -2102,7 +2165,8 @@ int main(int argc, char **argv)
|
|||
diffstat = 1;
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(arg, "--allow-binary-replacement")) {
|
||||
if (!strcmp(arg, "--allow-binary-replacement") ||
|
||||
!strcmp(arg, "--binary")) {
|
||||
allow_binary_replacement = 1;
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,134 @@
|
|||
#include "cache.h"
|
||||
|
||||
#undef DEBUG_85
|
||||
|
||||
#ifdef DEBUG_85
|
||||
#define say(a) fprintf(stderr, a)
|
||||
#define say1(a,b) fprintf(stderr, a, b)
|
||||
#define say2(a,b,c) fprintf(stderr, a, b, c)
|
||||
#else
|
||||
#define say(a) do {} while(0)
|
||||
#define say1(a,b) do {} while(0)
|
||||
#define say2(a,b,c) do {} while(0)
|
||||
#endif
|
||||
|
||||
static const char en85[] = {
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
|
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
|
||||
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
|
||||
'U', 'V', 'W', 'X', 'Y', 'Z',
|
||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
|
||||
'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
|
||||
'u', 'v', 'w', 'x', 'y', 'z',
|
||||
'!', '#', '$', '%', '&', '(', ')', '*', '+', '-',
|
||||
';', '<', '=', '>', '?', '@', '^', '_', '`', '{',
|
||||
'|', '}', '~'
|
||||
};
|
||||
|
||||
static char de85[256];
|
||||
static void prep_base85(void)
|
||||
{
|
||||
int i;
|
||||
if (de85['Z'])
|
||||
return;
|
||||
for (i = 0; i < ARRAY_SIZE(en85); i++) {
|
||||
int ch = en85[i];
|
||||
de85[ch] = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
int decode_85(char *dst, char *buffer, int len)
|
||||
{
|
||||
prep_base85();
|
||||
|
||||
say2("decode 85 <%.*s>", len/4*5, buffer);
|
||||
while (len) {
|
||||
unsigned acc = 0;
|
||||
int cnt;
|
||||
for (cnt = 0; cnt < 5; cnt++, buffer++) {
|
||||
int ch = *((unsigned char *)buffer);
|
||||
int de = de85[ch];
|
||||
if (!de)
|
||||
return error("invalid base85 alphabet %c", ch);
|
||||
de--;
|
||||
if (cnt == 4) {
|
||||
/*
|
||||
* Detect overflow. The largest
|
||||
* 5-letter possible is "|NsC0" to
|
||||
* encode 0xffffffff, and "|NsC" gives
|
||||
* 0x03030303 at this point (i.e.
|
||||
* 0xffffffff = 0x03030303 * 85).
|
||||
*/
|
||||
if (0x03030303 < acc ||
|
||||
(0x03030303 == acc && de))
|
||||
error("invalid base85 sequence %.5s",
|
||||
buffer-3);
|
||||
}
|
||||
acc = acc * 85 + de;
|
||||
say1(" <%08x>", acc);
|
||||
}
|
||||
say1(" %08x", acc);
|
||||
for (cnt = 0; cnt < 4 && len; cnt++, len--) {
|
||||
*dst++ = (acc >> 24) & 0xff;
|
||||
acc = acc << 8;
|
||||
}
|
||||
}
|
||||
say("\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void encode_85(char *buf, unsigned char *data, int bytes)
|
||||
{
|
||||
prep_base85();
|
||||
|
||||
say("encode 85");
|
||||
while (bytes) {
|
||||
unsigned acc = 0;
|
||||
int cnt;
|
||||
for (cnt = 0; cnt < 4 && bytes; cnt++, bytes--) {
|
||||
int ch = *data++;
|
||||
acc |= ch << ((3-cnt)*8);
|
||||
}
|
||||
say1(" %08x", acc);
|
||||
for (cnt = 0; cnt < 5; cnt++) {
|
||||
int val = acc % 85;
|
||||
acc /= 85;
|
||||
buf[4-cnt] = en85[val];
|
||||
}
|
||||
buf += 5;
|
||||
}
|
||||
say("\n");
|
||||
|
||||
*buf = 0;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_85
|
||||
int main(int ac, char **av)
|
||||
{
|
||||
char buf[1024];
|
||||
|
||||
if (!strcmp(av[1], "-e")) {
|
||||
int len = strlen(av[2]);
|
||||
encode_85(buf, av[2], len);
|
||||
if (len <= 26) len = len + 'A' - 1;
|
||||
else len = len + 'a' - 26 + 1;
|
||||
printf("encoded: %c%s\n", len, buf);
|
||||
return 0;
|
||||
}
|
||||
if (!strcmp(av[1], "-d")) {
|
||||
int len = *av[2];
|
||||
if ('A' <= len && len <= 'Z') len = len - 'A' + 1;
|
||||
else len = len - 'a' + 26 + 1;
|
||||
decode_85(buf, av[2]+1, len);
|
||||
printf("decoded: %.*s\n", len, buf);
|
||||
return 0;
|
||||
}
|
||||
if (!strcmp(av[1], "-t")) {
|
||||
char t[4] = { -1,-1,-1,-1 };
|
||||
encode_85(buf, t, 4);
|
||||
printf("encoded: D%s\n", buf);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
#endif
|
1
cache.h
1
cache.h
|
@ -365,5 +365,6 @@ extern void setup_pager(void);
|
|||
|
||||
/* base85 */
|
||||
int decode_85(char *dst, char *line, int linelen);
|
||||
void encode_85(char *buf, unsigned char *data, int bytes);
|
||||
|
||||
#endif /* CACHE_H */
|
||||
|
|
132
diff.c
132
diff.c
|
@ -392,78 +392,78 @@ static void show_stats(struct diffstat_t* data)
|
|||
total_files, adds, dels);
|
||||
}
|
||||
|
||||
static void *encode_delta_size(void *data, unsigned long size)
|
||||
static unsigned char *deflate_it(char *data,
|
||||
unsigned long size,
|
||||
unsigned long *result_size)
|
||||
{
|
||||
unsigned char *cp = data;
|
||||
*cp++ = size;
|
||||
size >>= 7;
|
||||
while (size) {
|
||||
cp[-1] |= 0x80;
|
||||
*cp++ = size;
|
||||
size >>= 7;
|
||||
}
|
||||
return cp;
|
||||
}
|
||||
int bound;
|
||||
unsigned char *deflated;
|
||||
z_stream stream;
|
||||
|
||||
static void *safe_diff_delta(const unsigned char *src, unsigned long src_size,
|
||||
const unsigned char *dst, unsigned long dst_size,
|
||||
unsigned long *delta_size)
|
||||
{
|
||||
unsigned long bufsize;
|
||||
unsigned char *data;
|
||||
unsigned char *cp;
|
||||
memset(&stream, 0, sizeof(stream));
|
||||
deflateInit(&stream, Z_BEST_COMPRESSION);
|
||||
bound = deflateBound(&stream, size);
|
||||
deflated = xmalloc(bound);
|
||||
stream.next_out = deflated;
|
||||
stream.avail_out = bound;
|
||||
|
||||
if (src_size && dst_size)
|
||||
return diff_delta(src, src_size, dst, dst_size, delta_size, 0);
|
||||
|
||||
/* diff-delta does not like to do delta with empty, so
|
||||
* we do that by hand here. Sigh...
|
||||
*/
|
||||
|
||||
if (!src_size)
|
||||
/* literal copy can be done only 127-byte at a time.
|
||||
*/
|
||||
bufsize = dst_size + (dst_size / 127) + 40;
|
||||
else
|
||||
bufsize = 40;
|
||||
data = xmalloc(bufsize);
|
||||
cp = encode_delta_size(data, src_size);
|
||||
cp = encode_delta_size(cp, dst_size);
|
||||
|
||||
if (dst_size) {
|
||||
/* copy out literally */
|
||||
while (dst_size) {
|
||||
int sz = (127 < dst_size) ? 127 : dst_size;
|
||||
*cp++ = sz;
|
||||
dst_size -= sz;
|
||||
while (sz) {
|
||||
*cp++ = *dst++;
|
||||
sz--;
|
||||
}
|
||||
}
|
||||
}
|
||||
*delta_size = (cp - data);
|
||||
return data;
|
||||
stream.next_in = (unsigned char *)data;
|
||||
stream.avail_in = size;
|
||||
while (deflate(&stream, Z_FINISH) == Z_OK)
|
||||
; /* nothing */
|
||||
deflateEnd(&stream);
|
||||
*result_size = stream.total_out;
|
||||
return deflated;
|
||||
}
|
||||
|
||||
static void emit_binary_diff(mmfile_t *one, mmfile_t *two)
|
||||
{
|
||||
void *delta, *cp;
|
||||
void *cp;
|
||||
void *delta;
|
||||
void *deflated;
|
||||
void *data;
|
||||
unsigned long orig_size;
|
||||
unsigned long delta_size;
|
||||
unsigned long deflate_size;
|
||||
unsigned long data_size;
|
||||
|
||||
printf("GIT binary patch\n");
|
||||
delta = safe_diff_delta(one->ptr, one->size,
|
||||
two->ptr, two->size,
|
||||
&delta_size);
|
||||
if (!delta)
|
||||
die("unable to generate binary diff");
|
||||
/* We could do deflated delta, or we could do just deflated two,
|
||||
* whichever is smaller.
|
||||
*/
|
||||
delta = NULL;
|
||||
deflated = deflate_it(two->ptr, two->size, &deflate_size);
|
||||
if (one->size && two->size) {
|
||||
delta = diff_delta(one->ptr, one->size,
|
||||
two->ptr, two->size,
|
||||
&delta_size, deflate_size);
|
||||
if (delta) {
|
||||
void *to_free = delta;
|
||||
orig_size = delta_size;
|
||||
delta = deflate_it(delta, delta_size, &delta_size);
|
||||
free(to_free);
|
||||
}
|
||||
}
|
||||
|
||||
/* emit delta encoded in base85 */
|
||||
cp = delta;
|
||||
while (delta_size) {
|
||||
int bytes = (52 < delta_size) ? 52 : delta_size;
|
||||
if (delta && delta_size < deflate_size) {
|
||||
printf("delta %lu\n", orig_size);
|
||||
free(deflated);
|
||||
data = delta;
|
||||
data_size = delta_size;
|
||||
}
|
||||
else {
|
||||
printf("literal %lu\n", two->size);
|
||||
free(delta);
|
||||
data = deflated;
|
||||
data_size = deflate_size;
|
||||
}
|
||||
|
||||
/* emit data encoded in base85 */
|
||||
cp = data;
|
||||
while (data_size) {
|
||||
int bytes = (52 < data_size) ? 52 : data_size;
|
||||
char line[70];
|
||||
delta_size -= bytes;
|
||||
data_size -= bytes;
|
||||
if (bytes <= 26)
|
||||
line[0] = bytes + 'A' - 1;
|
||||
else
|
||||
|
@ -473,7 +473,7 @@ static void emit_binary_diff(mmfile_t *one, mmfile_t *two)
|
|||
puts(line);
|
||||
}
|
||||
printf("\n");
|
||||
free(delta);
|
||||
free(data);
|
||||
}
|
||||
|
||||
#define FIRST_FEW_BYTES 8000
|
||||
|
@ -538,7 +538,11 @@ static void builtin_diff(const char *name_a,
|
|||
die("unable to read files to diff");
|
||||
|
||||
if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2)) {
|
||||
if (o->full_index)
|
||||
/* Quite common confusing case */
|
||||
if (mf1.size == mf2.size &&
|
||||
!memcmp(mf1.ptr, mf2.ptr, mf1.size))
|
||||
goto free_ab_and_return;
|
||||
if (o->binary)
|
||||
emit_binary_diff(&mf1, &mf2);
|
||||
else
|
||||
printf("Binary files %s and %s differ\n",
|
||||
|
@ -1239,6 +1243,10 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
|
|||
options->rename_limit = strtoul(arg+2, NULL, 10);
|
||||
else if (!strcmp(arg, "--full-index"))
|
||||
options->full_index = 1;
|
||||
else if (!strcmp(arg, "--binary")) {
|
||||
options->output_format = DIFF_FORMAT_PATCH;
|
||||
options->full_index = options->binary = 1;
|
||||
}
|
||||
else if (!strcmp(arg, "--name-only"))
|
||||
options->output_format = DIFF_FORMAT_NAME;
|
||||
else if (!strcmp(arg, "--name-status"))
|
||||
|
|
1
diff.h
1
diff.h
|
@ -28,6 +28,7 @@ struct diff_options {
|
|||
with_raw:1,
|
||||
with_stat:1,
|
||||
tree_in_recursive:1,
|
||||
binary:1,
|
||||
full_index:1,
|
||||
silent_on_remove:1,
|
||||
find_copies_harder:1;
|
||||
|
|
Загрузка…
Ссылка в новой задаче