Merge branch 'jk/cached-textconv'

* jk/cached-textconv:
  diff: avoid useless filespec population
  diff: cache textconv output
  textconv: refactor calls to run_textconv
  introduce notes-cache interface
  make commit_tree a library function
This commit is contained in:
Junio C Hamano 2010-05-08 22:33:08 -07:00
Родитель 3ecaa3b6a5 b337398266
Коммит dd75d07899
14 изменённых файлов: 403 добавлений и 118 удалений

Просмотреть файл

@ -414,6 +414,26 @@ because it quickly conveys the changes you have made), you
should generate it separately and send it as a comment _in
addition to_ the usual binary diff that you might send.
Because text conversion can be slow, especially when doing a
large number of them with `git log -p`, git provides a mechanism
to cache the output and use it in future diffs. To enable
caching, set the "cachetextconv" variable in your diff driver's
config. For example:
------------------------
[diff "jpg"]
textconv = exif
cachetextconv = true
------------------------
This will cache the result of running "exif" on each blob
indefinitely. If you change the textconv config variable for a
diff driver, git will automatically invalidate the cache entries
and re-run the textconv filter. If you want to invalidate the
cache manually (e.g., because your version of "exif" was updated
and now produces better output), you can remove the cache
manually with `git update-ref -d refs/notes/textconv/jpg` (where
"jpg" is the name of the diff driver, as in the example above).
Performing a three-way merge
~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Просмотреть файл

@ -486,6 +486,7 @@ LIB_H += log-tree.h
LIB_H += mailmap.h
LIB_H += merge-recursive.h
LIB_H += notes.h
LIB_H += notes-cache.h
LIB_H += object.h
LIB_H += pack.h
LIB_H += pack-refs.h
@ -575,6 +576,7 @@ LIB_OBJS += merge-file.o
LIB_OBJS += merge-recursive.o
LIB_OBJS += name-hash.o
LIB_OBJS += notes.o
LIB_OBJS += notes-cache.o
LIB_OBJS += object.o
LIB_OBJS += pack-check.o
LIB_OBJS += pack-refs.o

Просмотреть файл

@ -16,9 +16,6 @@ extern const char *help_unknown_cmd(const char *cmd);
extern void prune_packed_objects(int);
extern int fmt_merge_msg(int merge_summary, struct strbuf *in,
struct strbuf *out);
extern int commit_tree(const char *msg, unsigned char *tree,
struct commit_list *parents, unsigned char *ret,
const char *author);
extern int commit_notes(struct notes_tree *t, const char *msg);
struct notes_rewrite_cfg {

Просмотреть файл

@ -9,19 +9,6 @@
#include "builtin.h"
#include "utf8.h"
/*
* FIXME! Share the code with "write-tree.c"
*/
static void check_valid(unsigned char *sha1, enum object_type expect)
{
enum object_type type = sha1_object_info(sha1, NULL);
if (type < 0)
die("%s is not a valid object", sha1_to_hex(sha1));
if (type != expect)
die("%s is not a valid '%s' object", sha1_to_hex(sha1),
typename(expect));
}
static const char commit_tree_usage[] = "git commit-tree <sha1> [-p <sha1>]* < changelog";
static void new_parent(struct commit *parent, struct commit_list **parents_p)
@ -38,61 +25,6 @@ static void new_parent(struct commit *parent, struct commit_list **parents_p)
commit_list_insert(parent, parents_p);
}
static const char commit_utf8_warn[] =
"Warning: commit message does not conform to UTF-8.\n"
"You may want to amend it after fixing the message, or set the config\n"
"variable i18n.commitencoding to the encoding your project uses.\n";
int commit_tree(const char *msg, unsigned char *tree,
struct commit_list *parents, unsigned char *ret,
const char *author)
{
int result;
int encoding_is_utf8;
struct strbuf buffer;
check_valid(tree, OBJ_TREE);
/* Not having i18n.commitencoding is the same as having utf-8 */
encoding_is_utf8 = is_encoding_utf8(git_commit_encoding);
strbuf_init(&buffer, 8192); /* should avoid reallocs for the headers */
strbuf_addf(&buffer, "tree %s\n", sha1_to_hex(tree));
/*
* NOTE! This ordering means that the same exact tree merged with a
* different order of parents will be a _different_ changeset even
* if everything else stays the same.
*/
while (parents) {
struct commit_list *next = parents->next;
strbuf_addf(&buffer, "parent %s\n",
sha1_to_hex(parents->item->object.sha1));
free(parents);
parents = next;
}
/* Person/date information */
if (!author)
author = git_author_info(IDENT_ERROR_ON_NO_NAME);
strbuf_addf(&buffer, "author %s\n", author);
strbuf_addf(&buffer, "committer %s\n", git_committer_info(IDENT_ERROR_ON_NO_NAME));
if (!encoding_is_utf8)
strbuf_addf(&buffer, "encoding %s\n", git_commit_encoding);
strbuf_addch(&buffer, '\n');
/* And add the comment */
strbuf_addstr(&buffer, msg);
/* And check the encoding */
if (encoding_is_utf8 && !is_utf8(buffer.buf))
fprintf(stderr, commit_utf8_warn);
result = write_sha1_file(buffer.buf, buffer.len, commit_type, ret);
strbuf_release(&buffer);
return result;
}
int cmd_commit_tree(int argc, const char **argv, const char *prefix)
{
int i;
@ -117,7 +49,7 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix)
if (get_sha1(b, sha1))
die("Not a valid object name %s", b);
check_valid(sha1, OBJ_COMMIT);
assert_sha1_type(sha1, OBJ_COMMIT);
new_parent(lookup_commit(sha1), &parents);
}

Просмотреть файл

@ -718,6 +718,8 @@ extern int has_loose_object_nonlocal(const unsigned char *sha1);
extern int has_pack_index(const unsigned char *sha1);
extern void assert_sha1_type(const unsigned char *sha1, enum object_type expect);
extern const signed char hexval_table[256];
static inline unsigned int hexval(unsigned char c)
{

Просмотреть файл

@ -790,3 +790,58 @@ struct commit_list *reduce_heads(struct commit_list *heads)
free(other);
return result;
}
static const char commit_utf8_warn[] =
"Warning: commit message does not conform to UTF-8.\n"
"You may want to amend it after fixing the message, or set the config\n"
"variable i18n.commitencoding to the encoding your project uses.\n";
int commit_tree(const char *msg, unsigned char *tree,
struct commit_list *parents, unsigned char *ret,
const char *author)
{
int result;
int encoding_is_utf8;
struct strbuf buffer;
assert_sha1_type(tree, OBJ_TREE);
/* Not having i18n.commitencoding is the same as having utf-8 */
encoding_is_utf8 = is_encoding_utf8(git_commit_encoding);
strbuf_init(&buffer, 8192); /* should avoid reallocs for the headers */
strbuf_addf(&buffer, "tree %s\n", sha1_to_hex(tree));
/*
* NOTE! This ordering means that the same exact tree merged with a
* different order of parents will be a _different_ changeset even
* if everything else stays the same.
*/
while (parents) {
struct commit_list *next = parents->next;
strbuf_addf(&buffer, "parent %s\n",
sha1_to_hex(parents->item->object.sha1));
free(parents);
parents = next;
}
/* Person/date information */
if (!author)
author = git_author_info(IDENT_ERROR_ON_NO_NAME);
strbuf_addf(&buffer, "author %s\n", author);
strbuf_addf(&buffer, "committer %s\n", git_committer_info(IDENT_ERROR_ON_NO_NAME));
if (!encoding_is_utf8)
strbuf_addf(&buffer, "encoding %s\n", git_commit_encoding);
strbuf_addch(&buffer, '\n');
/* And add the comment */
strbuf_addstr(&buffer, msg);
/* And check the encoding */
if (encoding_is_utf8 && !is_utf8(buffer.buf))
fprintf(stderr, commit_utf8_warn);
result = write_sha1_file(buffer.buf, buffer.len, commit_type, ret);
strbuf_release(&buffer);
return result;
}

Просмотреть файл

@ -163,4 +163,8 @@ static inline int single_parent(struct commit *commit)
struct commit_list *reduce_heads(struct commit_list *heads);
extern int commit_tree(const char *msg, unsigned char *tree,
struct commit_list *parents, unsigned char *ret,
const char *author);
#endif /* COMMIT_H */

119
diff.c
Просмотреть файл

@ -44,7 +44,8 @@ static char diff_colors[][COLOR_MAXLEN] = {
};
static void diff_filespec_load_driver(struct diff_filespec *one);
static char *run_textconv(const char *, struct diff_filespec *, size_t *);
static size_t fill_textconv(struct userdiff_driver *driver,
struct diff_filespec *df, char **outbuf);
static int parse_diff_color_slot(const char *var, int ofs)
{
@ -466,8 +467,8 @@ static void emit_rewrite_diff(const char *name_a,
const char *name_b,
struct diff_filespec *one,
struct diff_filespec *two,
const char *textconv_one,
const char *textconv_two,
struct userdiff_driver *textconv_one,
struct userdiff_driver *textconv_two,
struct diff_options *o)
{
int lc_a, lc_b;
@ -478,7 +479,7 @@ static void emit_rewrite_diff(const char *name_a,
const char *reset = diff_get_color(color_diff, DIFF_RESET);
static struct strbuf a_name = STRBUF_INIT, b_name = STRBUF_INIT;
const char *a_prefix, *b_prefix;
const char *data_one, *data_two;
char *data_one, *data_two;
size_t size_one, size_two;
struct emit_callback ecbdata;
@ -500,26 +501,8 @@ static void emit_rewrite_diff(const char *name_a,
quote_two_c_style(&a_name, a_prefix, name_a, 0);
quote_two_c_style(&b_name, b_prefix, name_b, 0);
diff_populate_filespec(one, 0);
diff_populate_filespec(two, 0);
if (textconv_one) {
data_one = run_textconv(textconv_one, one, &size_one);
if (!data_one)
die("unable to read files to diff");
}
else {
data_one = one->data;
size_one = one->size;
}
if (textconv_two) {
data_two = run_textconv(textconv_two, two, &size_two);
if (!data_two)
die("unable to read files to diff");
}
else {
data_two = two->data;
size_two = two->size;
}
size_one = fill_textconv(textconv_one, one, &data_one);
size_two = fill_textconv(textconv_two, two, &data_two);
memset(&ecbdata, 0, sizeof(ecbdata));
ecbdata.color_diff = color_diff;
@ -1585,14 +1568,26 @@ void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const
options->b_prefix = b;
}
static const char *get_textconv(struct diff_filespec *one)
static struct userdiff_driver *get_textconv(struct diff_filespec *one)
{
if (!DIFF_FILE_VALID(one))
return NULL;
if (!S_ISREG(one->mode))
return NULL;
diff_filespec_load_driver(one);
return one->driver->textconv;
if (!one->driver->textconv)
return NULL;
if (one->driver->textconv_want_cache && !one->driver->textconv_cache) {
struct notes_cache *c = xmalloc(sizeof(*c));
struct strbuf name = STRBUF_INIT;
strbuf_addf(&name, "textconv/%s", one->driver->name);
notes_cache_init(c, name.buf, one->driver->textconv);
one->driver->textconv_cache = c;
}
return one->driver;
}
static void builtin_diff(const char *name_a,
@ -1609,7 +1604,8 @@ static void builtin_diff(const char *name_a,
const char *set = diff_get_color_opt(o, DIFF_METAINFO);
const char *reset = diff_get_color_opt(o, DIFF_RESET);
const char *a_prefix, *b_prefix;
const char *textconv_one = NULL, *textconv_two = NULL;
struct userdiff_driver *textconv_one = NULL;
struct userdiff_driver *textconv_two = NULL;
struct strbuf header = STRBUF_INIT;
if (DIFF_OPT_TST(o, SUBMODULE_LOG) &&
@ -1683,12 +1679,11 @@ static void builtin_diff(const char *name_a,
}
}
if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
die("unable to read files to diff");
if (!DIFF_OPT_TST(o, TEXT) &&
( (diff_filespec_is_binary(one) && !textconv_one) ||
(diff_filespec_is_binary(two) && !textconv_two) )) {
( (!textconv_one && diff_filespec_is_binary(one)) ||
(!textconv_two && diff_filespec_is_binary(two)) )) {
if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
die("unable to read files to diff");
/* Quite common confusing case */
if (mf1.size == mf2.size &&
!memcmp(mf1.ptr, mf2.ptr, mf1.size))
@ -1715,20 +1710,8 @@ static void builtin_diff(const char *name_a,
strbuf_reset(&header);
}
if (textconv_one) {
size_t size;
mf1.ptr = run_textconv(textconv_one, one, &size);
if (!mf1.ptr)
die("unable to read files to diff");
mf1.size = size;
}
if (textconv_two) {
size_t size;
mf2.ptr = run_textconv(textconv_two, two, &size);
if (!mf2.ptr)
die("unable to read files to diff");
mf2.size = size;
}
mf1.size = fill_textconv(textconv_one, one, &mf1.ptr);
mf2.size = fill_textconv(textconv_two, two, &mf2.ptr);
pe = diff_funcname_pattern(one);
if (!pe)
@ -3912,3 +3895,47 @@ static char *run_textconv(const char *pgm, struct diff_filespec *spec,
return strbuf_detach(&buf, outsize);
}
static size_t fill_textconv(struct userdiff_driver *driver,
struct diff_filespec *df,
char **outbuf)
{
size_t size;
if (!driver || !driver->textconv) {
if (!DIFF_FILE_VALID(df)) {
*outbuf = "";
return 0;
}
if (diff_populate_filespec(df, 0))
die("unable to read files to diff");
*outbuf = df->data;
return df->size;
}
if (driver->textconv_cache) {
*outbuf = notes_cache_get(driver->textconv_cache, df->sha1,
&size);
if (*outbuf)
return size;
}
*outbuf = run_textconv(driver->textconv, df, &size);
if (!*outbuf)
die("unable to read files to diff");
if (driver->textconv_cache) {
/* ignore errors, as we might be in a readonly repository */
notes_cache_put(driver->textconv_cache, df->sha1, *outbuf,
size);
/*
* we could save up changes and flush them all at the end,
* but we would need an extra call after all diffing is done.
* Since generating a cache entry is the slow path anyway,
* this extra overhead probably isn't a big deal.
*/
notes_cache_write(driver->textconv_cache);
}
return size;
}

94
notes-cache.c Normal file
Просмотреть файл

@ -0,0 +1,94 @@
#include "cache.h"
#include "notes-cache.h"
#include "commit.h"
#include "refs.h"
static int notes_cache_match_validity(const char *ref, const char *validity)
{
unsigned char sha1[20];
struct commit *commit;
struct pretty_print_context pretty_ctx;
struct strbuf msg = STRBUF_INIT;
int ret;
if (read_ref(ref, sha1) < 0)
return 0;
commit = lookup_commit_reference_gently(sha1, 1);
if (!commit)
return 0;
memset(&pretty_ctx, 0, sizeof(pretty_ctx));
format_commit_message(commit, "%s", &msg, &pretty_ctx);
strbuf_trim(&msg);
ret = !strcmp(msg.buf, validity);
strbuf_release(&msg);
return ret;
}
void notes_cache_init(struct notes_cache *c, const char *name,
const char *validity)
{
struct strbuf ref = STRBUF_INIT;
int flags = 0;
memset(c, 0, sizeof(*c));
c->validity = xstrdup(validity);
strbuf_addf(&ref, "refs/notes/%s", name);
if (!notes_cache_match_validity(ref.buf, validity))
flags = NOTES_INIT_EMPTY;
init_notes(&c->tree, ref.buf, combine_notes_overwrite, flags);
strbuf_release(&ref);
}
int notes_cache_write(struct notes_cache *c)
{
unsigned char tree_sha1[20];
unsigned char commit_sha1[20];
if (!c || !c->tree.initialized || !c->tree.ref || !*c->tree.ref)
return -1;
if (!c->tree.dirty)
return 0;
if (write_notes_tree(&c->tree, tree_sha1))
return -1;
if (commit_tree(c->validity, tree_sha1, NULL, commit_sha1, NULL) < 0)
return -1;
if (update_ref("update notes cache", c->tree.ref, commit_sha1, NULL,
0, QUIET_ON_ERR) < 0)
return -1;
return 0;
}
char *notes_cache_get(struct notes_cache *c, unsigned char key_sha1[20],
size_t *outsize)
{
const unsigned char *value_sha1;
enum object_type type;
char *value;
unsigned long size;
value_sha1 = get_note(&c->tree, key_sha1);
if (!value_sha1)
return NULL;
value = read_sha1_file(value_sha1, &type, &size);
*outsize = size;
return value;
}
int notes_cache_put(struct notes_cache *c, unsigned char key_sha1[20],
const char *data, size_t size)
{
unsigned char value_sha1[20];
if (write_sha1_file(data, size, "blob", value_sha1) < 0)
return -1;
add_note(&c->tree, key_sha1, value_sha1, NULL);
return 0;
}

20
notes-cache.h Normal file
Просмотреть файл

@ -0,0 +1,20 @@
#ifndef NOTES_CACHE_H
#define NOTES_CACHE_H
#include "notes.h"
struct notes_cache {
struct notes_tree tree;
char *validity;
};
void notes_cache_init(struct notes_cache *c, const char *name,
const char *validity);
int notes_cache_write(struct notes_cache *c);
char *notes_cache_get(struct notes_cache *c, unsigned char sha1[20], size_t
*outsize);
int notes_cache_put(struct notes_cache *c, unsigned char sha1[20],
const char *data, size_t size);
#endif /* NOTES_CACHE_H */

Просмотреть файл

@ -2516,3 +2516,13 @@ int read_pack_header(int fd, struct pack_header *header)
return PH_ERROR_PROTOCOL;
return 0;
}
void assert_sha1_type(const unsigned char *sha1, enum object_type expect)
{
enum object_type type = sha1_object_info(sha1, NULL);
if (type < 0)
die("%s is not a valid object", sha1_to_hex(sha1));
if (type != expect)
die("%s is not a valid '%s' object", sha1_to_hex(sha1),
typename(expect));
}

109
t/t4042-diff-textconv-caching.sh Executable file
Просмотреть файл

@ -0,0 +1,109 @@
#!/bin/sh
test_description='test textconv caching'
. ./test-lib.sh
cat >helper <<'EOF'
#!/bin/sh
sed 's/^/converted: /' "$@" >helper.out
cat helper.out
EOF
chmod +x helper
test_expect_success 'setup' '
echo foo content 1 >foo.bin &&
echo bar content 1 >bar.bin &&
git add . &&
git commit -m one &&
echo foo content 2 >foo.bin &&
echo bar content 2 >bar.bin &&
git commit -a -m two &&
echo "*.bin diff=magic" >.gitattributes &&
git config diff.magic.textconv ./helper &&
git config diff.magic.cachetextconv true
'
cat >expect <<EOF
diff --git a/bar.bin b/bar.bin
index fcf9166..28283d5 100644
--- a/bar.bin
+++ b/bar.bin
@@ -1 +1 @@
-converted: bar content 1
+converted: bar content 2
diff --git a/foo.bin b/foo.bin
index d5b9fe3..1345db2 100644
--- a/foo.bin
+++ b/foo.bin
@@ -1 +1 @@
-converted: foo content 1
+converted: foo content 2
EOF
test_expect_success 'first textconv works' '
git diff HEAD^ HEAD >actual &&
test_cmp expect actual
'
test_expect_success 'cached textconv produces same output' '
git diff HEAD^ HEAD >actual &&
test_cmp expect actual
'
test_expect_success 'cached textconv does not run helper' '
rm -f helper.out &&
git diff HEAD^ HEAD >actual &&
test_cmp expect actual &&
! test -r helper.out
'
cat >expect <<EOF
diff --git a/bar.bin b/bar.bin
index fcf9166..28283d5 100644
--- a/bar.bin
+++ b/bar.bin
@@ -1,2 +1,2 @@
converted: other
-converted: bar content 1
+converted: bar content 2
diff --git a/foo.bin b/foo.bin
index d5b9fe3..1345db2 100644
--- a/foo.bin
+++ b/foo.bin
@@ -1,2 +1,2 @@
converted: other
-converted: foo content 1
+converted: foo content 2
EOF
test_expect_success 'changing textconv invalidates cache' '
echo other >other &&
git config diff.magic.textconv "./helper other" &&
git diff HEAD^ HEAD >actual &&
test_cmp expect actual
'
cat >expect <<EOF
diff --git a/bar.bin b/bar.bin
index fcf9166..28283d5 100644
--- a/bar.bin
+++ b/bar.bin
@@ -1,2 +1,2 @@
converted: other
-converted: bar content 1
+converted: bar content 2
diff --git a/foo.bin b/foo.bin
index d5b9fe3..1345db2 100644
--- a/foo.bin
+++ b/foo.bin
@@ -1 +1 @@
-converted: foo content 1
+converted: foo content 2
EOF
test_expect_success 'switching diff driver produces correct results' '
git config diff.moremagic.textconv ./helper &&
echo foo.bin diff=moremagic >>.gitattributes &&
git diff HEAD^ HEAD >actual &&
test_cmp expect actual
'
test_done

Просмотреть файл

@ -1,3 +1,4 @@
#include "cache.h"
#include "userdiff.h"
#include "cache.h"
#include "attr.h"
@ -167,6 +168,12 @@ static int parse_tristate(int *b, const char *k, const char *v)
return 1;
}
static int parse_bool(int *b, const char *k, const char *v)
{
*b = git_config_bool(k, v);
return 1;
}
int userdiff_config(const char *k, const char *v)
{
struct userdiff_driver *drv;
@ -181,6 +188,8 @@ int userdiff_config(const char *k, const char *v)
return parse_string(&drv->external, k, v);
if ((drv = parse_driver(k, v, "textconv")))
return parse_string(&drv->textconv, k, v);
if ((drv = parse_driver(k, v, "cachetextconv")))
return parse_bool(&drv->textconv_want_cache, k, v);
if ((drv = parse_driver(k, v, "wordregex")))
return parse_string(&drv->word_regex, k, v);

Просмотреть файл

@ -1,6 +1,8 @@
#ifndef USERDIFF_H
#define USERDIFF_H
#include "notes-cache.h"
struct userdiff_funcname {
const char *pattern;
int cflags;
@ -13,6 +15,8 @@ struct userdiff_driver {
struct userdiff_funcname funcname;
const char *word_regex;
const char *textconv;
struct notes_cache *textconv_cache;
int textconv_want_cache;
};
int userdiff_config(const char *k, const char *v);