From 3fed15f568c24ec00ef78fddc6cbb881fbbb0277 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 21 Apr 2007 19:09:02 -0700 Subject: [PATCH 1/2] Add 'ident' conversion. The 'ident' attribute set to path squashes "$ident:$" to "$ident$" upon checkin, and expands it to "$ident: $" upon checkout. As we have two conversions that affect checkin/checkout paths, clarify how they interact with each other. Signed-off-by: Junio C Hamano --- Documentation/gitattributes.txt | 31 ++++- convert.c | 194 ++++++++++++++++++++++++++++++-- t/t0021-conversion.sh | 39 +++++++ 3 files changed, 252 insertions(+), 12 deletions(-) create mode 100755 t/t0021-conversion.sh diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index 857d55a409..b6f90f6f37 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -78,12 +78,17 @@ are attributes-aware. Checking-out and checking-in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The attribute `crlf` affects how the contents stored in the +These attributes affect how the contents stored in the repository are copied to the working tree files when commands -such as `git checkout` and `git merge` run. It also affects how +such as `git checkout` and `git merge` run. They also affect how git stores the contents you prepare in the working tree in the repository upon `git add` and `git commit`. +`crlf` +^^^^^^ + +This attribute controls the line-ending convention. + Set:: Setting the `crlf` attribute on a path is meant to mark @@ -129,6 +134,28 @@ converted to LF upon checkin, but there is no conversion done upon checkout. +`ident` +^^^^^^^ + +When the attribute `ident` is set to a path, git replaces +`$ident$` in the blob object with `$ident:`, followed by +40-character hexadecimal blob object name, followed by a dollar +sign `$` upon checkout. Any byte sequence that begins with +`$ident:` and ends with `$` in the worktree file is replaced +with `$ident$` upon check-in. + + +Interaction between checkin/checkout attributes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In the check-in codepath, the worktree file is first converted +with `ident` (if specified), and then with `crlf` (again, if +specified and applicable). + +In the check-out codepath, the blob content is first converted +with `crlf`, and then `ident`. + + Generating diff text ~~~~~~~~~~~~~~~~~~~~ diff --git a/convert.c b/convert.c index ad106ef35f..5fdaee7571 100644 --- a/convert.c +++ b/convert.c @@ -1,5 +1,6 @@ #include "cache.h" #include "attr.h" +#include "run-command.h" /* * convert.c - convert a file when checking it out and checking it in. @@ -203,10 +204,152 @@ static char *crlf_to_worktree(const char *path, const char *src, unsigned long * static void setup_convert_check(struct git_attr_check *check) { static struct git_attr *attr_crlf; + static struct git_attr *attr_ident; - if (!attr_crlf) + if (!attr_crlf) { attr_crlf = git_attr("crlf", 4); - check->attr = attr_crlf; + attr_ident = git_attr("ident", 5); + } + check[0].attr = attr_crlf; + check[1].attr = attr_ident; +} + +static int count_ident(const char *cp, unsigned long size) +{ + /* + * "$ident: 0000000000000000000000000000000000000000 $" <=> "$ident$" + */ + int cnt = 0; + char ch; + + while (size) { + ch = *cp++; + size--; + if (ch != '$') + continue; + if (size < 6) + break; + if (memcmp("ident", cp, 5)) + continue; + ch = cp[5]; + cp += 6; + size -= 6; + if (ch == '$') + cnt++; /* $ident$ */ + if (ch != ':') + continue; + + /* + * "$ident: ... "; scan up to the closing dollar sign and discard. + */ + while (size) { + ch = *cp++; + size--; + if (ch == '$') { + cnt++; + break; + } + } + } + return cnt; +} + +static char *ident_to_git(const char *path, const char *src, unsigned long *sizep, int ident) +{ + int cnt; + unsigned long size; + char *dst, *buf; + + if (!ident) + return NULL; + size = *sizep; + cnt = count_ident(src, size); + if (!cnt) + return NULL; + buf = xmalloc(size); + + for (dst = buf; size; size--) { + char ch = *src++; + *dst++ = ch; + if ((ch == '$') && (6 <= size) && + !memcmp("ident:", src, 6)) { + unsigned long rem = size - 6; + const char *cp = src + 6; + do { + ch = *cp++; + if (ch == '$') + break; + rem--; + } while (rem); + if (!rem) + continue; + memcpy(dst, "ident$", 6); + dst += 6; + size -= (cp - src); + src = cp; + } + } + + *sizep = dst - buf; + return buf; +} + +static char *ident_to_worktree(const char *path, const char *src, unsigned long *sizep, int ident) +{ + int cnt; + unsigned long size; + char *dst, *buf; + unsigned char sha1[20]; + + if (!ident) + return NULL; + + size = *sizep; + cnt = count_ident(src, size); + if (!cnt) + return NULL; + + hash_sha1_file(src, size, "blob", sha1); + buf = xmalloc(size + cnt * 43); + + for (dst = buf; size; size--) { + const char *cp; + char ch = *src++; + *dst++ = ch; + if ((ch != '$') || (size < 6) || memcmp("ident", src, 5)) + continue; + + if (src[5] == ':') { + /* discard up to but not including the closing $ */ + unsigned long rem = size - 6; + cp = src + 6; + do { + ch = *cp++; + if (ch == '$') + break; + rem--; + } while (rem); + if (!rem) + continue; + size -= (cp - src); + } else if (src[5] == '$') + cp = src + 5; + else + continue; + + memcpy(dst, "ident: ", 7); + dst += 7; + memcpy(dst, sha1_to_hex(sha1), 40); + dst += 40; + *dst++ = ' '; + size -= (cp - src); + src = cp; + *dst++ = *src++; + size--; + } + + *sizep = dst - buf; + return buf; } static int git_path_check_crlf(const char *path, struct git_attr_check *check) @@ -224,26 +367,57 @@ static int git_path_check_crlf(const char *path, struct git_attr_check *check) return CRLF_GUESS; } +static int git_path_check_ident(const char *path, struct git_attr_check *check) +{ + const char *value = check->value; + + return !!ATTR_TRUE(value); +} + char *convert_to_git(const char *path, const char *src, unsigned long *sizep) { - struct git_attr_check check[1]; + struct git_attr_check check[2]; int crlf = CRLF_GUESS; + int ident = 0; + char *buf, *buf2; setup_convert_check(check); - if (!git_checkattr(path, 1, check)) { - crlf = git_path_check_crlf(path, check); + if (!git_checkattr(path, ARRAY_SIZE(check), check)) { + crlf = git_path_check_crlf(path, check + 0); + ident = git_path_check_ident(path, check + 1); } - return crlf_to_git(path, src, sizep, crlf); + + buf = crlf_to_git(path, src, sizep, crlf); + + buf2 = ident_to_git(path, buf ? buf : src, sizep, ident); + if (buf2) { + free(buf); + buf = buf2; + } + + return buf; } char *convert_to_working_tree(const char *path, const char *src, unsigned long *sizep) { - struct git_attr_check check[1]; + struct git_attr_check check[2]; int crlf = CRLF_GUESS; + int ident = 0; + char *buf, *buf2; setup_convert_check(check); - if (!git_checkattr(path, 1, check)) { - crlf = git_path_check_crlf(path, check); + if (!git_checkattr(path, ARRAY_SIZE(check), check)) { + crlf = git_path_check_crlf(path, check + 0); + ident = git_path_check_ident(path, check + 1); } - return crlf_to_worktree(path, src, sizep, crlf); + + buf = ident_to_worktree(path, src, sizep, ident); + + buf2 = crlf_to_worktree(path, buf ? buf : src, sizep, crlf); + if (buf2) { + free(buf); + buf = buf2; + } + + return buf; } diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh new file mode 100755 index 0000000000..ad952c9ce2 --- /dev/null +++ b/t/t0021-conversion.sh @@ -0,0 +1,39 @@ +#!/bin/sh + +test_description='blob conversion via gitattributes' + +. ./test-lib.sh + +test_expect_success setup ' + { + echo "*.i ident" + } >.gitattributes && + + { + echo a b c d e f g h i j k l m + echo n o p q r s t u v w x y z + echo '\''$ident$'\'' + } >test && + cat test >test.t && + cat test >test.o && + cat test >test.i && + git add test test.t test.i && + rm -f test test.t test.i && + git checkout -- test test.t test.i +' + +script='s/^\$ident: \([0-9a-f]*\) \$/\1/p' + +test_expect_success check ' + + cmp test.o test && + cmp test.o test.t && + + # ident should be stripped in the repository + git diff --raw --exit-code :test :test.i && + id=$(git rev-parse --verify :test) && + embedded=$(sed -ne "$script" test.i) && + test "z$id" = "z$embedded" +' + +test_done From aa4ed402c9721170fde2e9e43c3825562070e65e Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 21 Apr 2007 03:14:13 -0700 Subject: [PATCH 2/2] Add 'filter' attribute and external filter driver definition. The interface is similar to the custom low-level merge drivers. First you configure your filter driver by defining 'filter..*' variables in the configuration. filter..clean filter command to run upon checkin filter..smudge filter command to run upon checkout Then you assign filter attribute to each path, whose name matches the custom filter driver's name. Example: (in .gitattributes) *.c filter=indent (in config) [filter "indent"] clean = indent smudge = cat Signed-off-by: Junio C Hamano --- Documentation/gitattributes.txt | 39 ++++++ convert.c | 237 +++++++++++++++++++++++++++++++- t/t0021-conversion.sh | 9 ++ 3 files changed, 282 insertions(+), 3 deletions(-) diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index b6f90f6f37..87723105d1 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -156,6 +156,45 @@ In the check-out codepath, the blob content is first converted with `crlf`, and then `ident`. +`filter` +^^^^^^^^ + +A `filter` attribute can be set to a string value. This names +filter driver specified in the configuration. + +A filter driver consists of `clean` command and `smudge` +command, either of which can be left unspecified. Upon +checkout, when `smudge` command is specified, the command is fed +the blob object from its standard input, and its standard output +is used to update the worktree file. Similarly, `clean` command +is used to convert the contents of worktree file upon checkin. + +Missing filter driver definition in the config is not an error +but makes the filter a no-op passthru. + +The content filtering is done to massage the content into a +shape that is more convenient for the platform, filesystem, and +the user to use. The keyword here is "more convenient" and not +"turning something unusable into usable". In other words, it is +"hanging yourself because we gave you a long rope" if your +project uses filtering mechanism in such a way that it makes +your project unusable unless the checkout is done with a +specific filter in effect. + + +Interaction between checkin/checkout attributes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In the check-in codepath, the worktree file is first converted +with `filter` driver (if specified and corresponding driver +defined), then the result is processed with `ident` (if +specified), and then finally with `crlf` (again, if specified +and applicable). + +In the check-out codepath, the blob content is first converted +with `crlf`, and then `ident` and fed to `filter`. + + Generating diff text ~~~~~~~~~~~~~~~~~~~~ diff --git a/convert.c b/convert.c index 5fdaee7571..9ee31b0ee0 100644 --- a/convert.c +++ b/convert.c @@ -201,17 +201,212 @@ static char *crlf_to_worktree(const char *path, const char *src, unsigned long * return buffer; } +static int filter_buffer(const char *path, const char *src, + unsigned long size, const char *cmd) +{ + /* + * Spawn cmd and feed the buffer contents through its stdin. + */ + struct child_process child_process; + int pipe_feed[2]; + int write_err, status; + + memset(&child_process, 0, sizeof(child_process)); + + if (pipe(pipe_feed) < 0) { + error("cannot create pipe to run external filter %s", cmd); + return 1; + } + + child_process.pid = fork(); + if (child_process.pid < 0) { + error("cannot fork to run external filter %s", cmd); + close(pipe_feed[0]); + close(pipe_feed[1]); + return 1; + } + if (!child_process.pid) { + dup2(pipe_feed[0], 0); + close(pipe_feed[0]); + close(pipe_feed[1]); + execlp("sh", "sh", "-c", cmd, NULL); + return 1; + } + close(pipe_feed[0]); + + write_err = (write_in_full(pipe_feed[1], src, size) < 0); + if (close(pipe_feed[1])) + write_err = 1; + if (write_err) + error("cannot feed the input to external filter %s", cmd); + + status = finish_command(&child_process); + if (status) + error("external filter %s failed %d", cmd, -status); + return (write_err || status); +} + +static char *apply_filter(const char *path, const char *src, + unsigned long *sizep, const char *cmd) +{ + /* + * Create a pipeline to have the command filter the buffer's + * contents. + * + * (child --> cmd) --> us + */ + const int SLOP = 4096; + int pipe_feed[2]; + int status; + char *dst; + unsigned long dstsize, dstalloc; + struct child_process child_process; + + if (!cmd) + return NULL; + + memset(&child_process, 0, sizeof(child_process)); + + if (pipe(pipe_feed) < 0) { + error("cannot create pipe to run external filter %s", cmd); + return NULL; + } + + fflush(NULL); + child_process.pid = fork(); + if (child_process.pid < 0) { + error("cannot fork to run external filter %s", cmd); + close(pipe_feed[0]); + close(pipe_feed[1]); + return NULL; + } + if (!child_process.pid) { + dup2(pipe_feed[1], 1); + close(pipe_feed[0]); + close(pipe_feed[1]); + exit(filter_buffer(path, src, *sizep, cmd)); + } + close(pipe_feed[1]); + + dstalloc = *sizep; + dst = xmalloc(dstalloc); + dstsize = 0; + + while (1) { + ssize_t numread = xread(pipe_feed[0], dst + dstsize, + dstalloc - dstsize); + + if (numread <= 0) { + if (!numread) + break; + error("read from external filter %s failed", cmd); + free(dst); + dst = NULL; + break; + } + dstsize += numread; + if (dstalloc <= dstsize + SLOP) { + dstalloc = dstsize + SLOP; + dst = xrealloc(dst, dstalloc); + } + } + if (close(pipe_feed[0])) { + error("read from external filter %s failed", cmd); + free(dst); + dst = NULL; + } + + status = finish_command(&child_process); + if (status) { + error("external filter %s failed %d", cmd, -status); + free(dst); + dst = NULL; + } + + if (dst) + *sizep = dstsize; + return dst; +} + +static struct convert_driver { + const char *name; + struct convert_driver *next; + char *smudge; + char *clean; +} *user_convert, **user_convert_tail; + +static int read_convert_config(const char *var, const char *value) +{ + const char *ep, *name; + int namelen; + struct convert_driver *drv; + + /* + * External conversion drivers are configured using + * "filter..variable". + */ + if (prefixcmp(var, "filter.") || (ep = strrchr(var, '.')) == var + 6) + return 0; + name = var + 7; + namelen = ep - name; + for (drv = user_convert; drv; drv = drv->next) + if (!strncmp(drv->name, name, namelen) && !drv->name[namelen]) + break; + if (!drv) { + char *namebuf; + drv = xcalloc(1, sizeof(struct convert_driver)); + namebuf = xmalloc(namelen + 1); + memcpy(namebuf, name, namelen); + namebuf[namelen] = 0; + drv->name = namebuf; + drv->next = NULL; + *user_convert_tail = drv; + user_convert_tail = &(drv->next); + } + + ep++; + + /* + * filter..smudge and filter..clean specifies + * the command line: + * + * command-line + * + * The command-line will not be interpolated in any way. + */ + + if (!strcmp("smudge", ep)) { + if (!value) + return error("%s: lacks value", var); + drv->smudge = strdup(value); + return 0; + } + + if (!strcmp("clean", ep)) { + if (!value) + return error("%s: lacks value", var); + drv->clean = strdup(value); + return 0; + } + return 0; +} + static void setup_convert_check(struct git_attr_check *check) { static struct git_attr *attr_crlf; static struct git_attr *attr_ident; + static struct git_attr *attr_filter; if (!attr_crlf) { attr_crlf = git_attr("crlf", 4); attr_ident = git_attr("ident", 5); + attr_filter = git_attr("filter", 6); + user_convert_tail = &user_convert; + git_config(read_convert_config); } check[0].attr = attr_crlf; check[1].attr = attr_ident; + check[2].attr = attr_filter; } static int count_ident(const char *cp, unsigned long size) @@ -367,6 +562,20 @@ static int git_path_check_crlf(const char *path, struct git_attr_check *check) return CRLF_GUESS; } +static struct convert_driver *git_path_check_convert(const char *path, + struct git_attr_check *check) +{ + const char *value = check->value; + struct convert_driver *drv; + + if (ATTR_TRUE(value) || ATTR_FALSE(value) || ATTR_UNSET(value)) + return NULL; + for (drv = user_convert; drv; drv = drv->next) + if (!strcmp(value, drv->name)) + return drv; + return NULL; +} + static int git_path_check_ident(const char *path, struct git_attr_check *check) { const char *value = check->value; @@ -376,18 +585,29 @@ static int git_path_check_ident(const char *path, struct git_attr_check *check) char *convert_to_git(const char *path, const char *src, unsigned long *sizep) { - struct git_attr_check check[2]; + struct git_attr_check check[3]; int crlf = CRLF_GUESS; int ident = 0; + char *filter = NULL; char *buf, *buf2; setup_convert_check(check); if (!git_checkattr(path, ARRAY_SIZE(check), check)) { + struct convert_driver *drv; crlf = git_path_check_crlf(path, check + 0); ident = git_path_check_ident(path, check + 1); + drv = git_path_check_convert(path, check + 2); + if (drv && drv->clean) + filter = drv->clean; } - buf = crlf_to_git(path, src, sizep, crlf); + buf = apply_filter(path, src, sizep, filter); + + buf2 = crlf_to_git(path, buf ? buf : src, sizep, crlf); + if (buf2) { + free(buf); + buf = buf2; + } buf2 = ident_to_git(path, buf ? buf : src, sizep, ident); if (buf2) { @@ -400,15 +620,20 @@ char *convert_to_git(const char *path, const char *src, unsigned long *sizep) char *convert_to_working_tree(const char *path, const char *src, unsigned long *sizep) { - struct git_attr_check check[2]; + struct git_attr_check check[3]; int crlf = CRLF_GUESS; int ident = 0; + char *filter = NULL; char *buf, *buf2; setup_convert_check(check); if (!git_checkattr(path, ARRAY_SIZE(check), check)) { + struct convert_driver *drv; crlf = git_path_check_crlf(path, check + 0); ident = git_path_check_ident(path, check + 1); + drv = git_path_check_convert(path, check + 2); + if (drv && drv->smudge) + filter = drv->smudge; } buf = ident_to_worktree(path, src, sizep, ident); @@ -419,5 +644,11 @@ char *convert_to_working_tree(const char *path, const char *src, unsigned long * buf = buf2; } + buf2 = apply_filter(path, buf ? buf : src, sizep, filter); + if (buf2) { + free(buf); + buf = buf2; + } + return buf; } diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh index ad952c9ce2..bab9ecc34e 100755 --- a/t/t0021-conversion.sh +++ b/t/t0021-conversion.sh @@ -4,8 +4,17 @@ test_description='blob conversion via gitattributes' . ./test-lib.sh +cat <<\EOF >rot13.sh +tr '[a-zA-Z]' '[n-za-mN-ZA-M]' +EOF +chmod +x rot13.sh + test_expect_success setup ' + git config filter.rot13.smudge ./rot13.sh && + git config filter.rot13.clean ./rot13.sh && + { + echo "*.t filter=rot13" echo "*.i ident" } >.gitattributes &&