зеркало из https://github.com/microsoft/git.git
Merge branch 'jk/fast-export-anonymize'
Sometimes users want to report a bug they experience on their repository, but they are not at liberty to share the contents of the repository. "fast-export" was taught an "--anonymize" option to replace blob contents, names of people and paths and log messages with bland and simple strings to help them. * jk/fast-export-anonymize: docs/fast-export: explain --anonymize more completely teach fast-export an --anonymize option
This commit is contained in:
Коммит
b1de6b21f3
|
@ -105,6 +105,11 @@ marks the same across runs.
|
||||||
in the commit (as opposed to just listing the files which are
|
in the commit (as opposed to just listing the files which are
|
||||||
different from the commit's first parent).
|
different from the commit's first parent).
|
||||||
|
|
||||||
|
--anonymize::
|
||||||
|
Anonymize the contents of the repository while still retaining
|
||||||
|
the shape of the history and stored tree. See the section on
|
||||||
|
`ANONYMIZING` below.
|
||||||
|
|
||||||
--refspec::
|
--refspec::
|
||||||
Apply the specified refspec to each ref exported. Multiple of them can
|
Apply the specified refspec to each ref exported. Multiple of them can
|
||||||
be specified.
|
be specified.
|
||||||
|
@ -141,6 +146,62 @@ referenced by that revision range contains the string
|
||||||
'refs/heads/master'.
|
'refs/heads/master'.
|
||||||
|
|
||||||
|
|
||||||
|
ANONYMIZING
|
||||||
|
-----------
|
||||||
|
|
||||||
|
If the `--anonymize` option is given, git will attempt to remove all
|
||||||
|
identifying information from the repository while still retaining enough
|
||||||
|
of the original tree and history patterns to reproduce some bugs. The
|
||||||
|
goal is that a git bug which is found on a private repository will
|
||||||
|
persist in the anonymized repository, and the latter can be shared with
|
||||||
|
git developers to help solve the bug.
|
||||||
|
|
||||||
|
With this option, git will replace all refnames, paths, blob contents,
|
||||||
|
commit and tag messages, names, and email addresses in the output with
|
||||||
|
anonymized data. Two instances of the same string will be replaced
|
||||||
|
equivalently (e.g., two commits with the same author will have the same
|
||||||
|
anonymized author in the output, but bear no resemblance to the original
|
||||||
|
author string). The relationship between commits, branches, and tags is
|
||||||
|
retained, as well as the commit timestamps (but the commit messages and
|
||||||
|
refnames bear no resemblance to the originals). The relative makeup of
|
||||||
|
the tree is retained (e.g., if you have a root tree with 10 files and 3
|
||||||
|
trees, so will the output), but their names and the contents of the
|
||||||
|
files will be replaced.
|
||||||
|
|
||||||
|
If you think you have found a git bug, you can start by exporting an
|
||||||
|
anonymized stream of the whole repository:
|
||||||
|
|
||||||
|
---------------------------------------------------
|
||||||
|
$ git fast-export --anonymize --all >anon-stream
|
||||||
|
---------------------------------------------------
|
||||||
|
|
||||||
|
Then confirm that the bug persists in a repository created from that
|
||||||
|
stream (many bugs will not, as they really do depend on the exact
|
||||||
|
repository contents):
|
||||||
|
|
||||||
|
---------------------------------------------------
|
||||||
|
$ git init anon-repo
|
||||||
|
$ cd anon-repo
|
||||||
|
$ git fast-import <../anon-stream
|
||||||
|
$ ... test your bug ...
|
||||||
|
---------------------------------------------------
|
||||||
|
|
||||||
|
If the anonymized repository shows the bug, it may be worth sharing
|
||||||
|
`anon-stream` along with a regular bug report. Note that the anonymized
|
||||||
|
stream compresses very well, so gzipping it is encouraged. If you want
|
||||||
|
to examine the stream to see that it does not contain any private data,
|
||||||
|
you can peruse it directly before sending. You may also want to try:
|
||||||
|
|
||||||
|
---------------------------------------------------
|
||||||
|
$ perl -pe 's/\d+/X/g' <anon-stream | sort -u | less
|
||||||
|
---------------------------------------------------
|
||||||
|
|
||||||
|
which shows all of the unique lines (with numbers converted to "X", to
|
||||||
|
collapse "User 0", "User 1", etc into "User X"). This produces a much
|
||||||
|
smaller output, and it is usually easy to quickly confirm that there is
|
||||||
|
no private data in the stream.
|
||||||
|
|
||||||
|
|
||||||
Limitations
|
Limitations
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
#include "parse-options.h"
|
#include "parse-options.h"
|
||||||
#include "quote.h"
|
#include "quote.h"
|
||||||
#include "remote.h"
|
#include "remote.h"
|
||||||
|
#include "blob.h"
|
||||||
|
|
||||||
static const char *fast_export_usage[] = {
|
static const char *fast_export_usage[] = {
|
||||||
N_("git fast-export [rev-list-opts]"),
|
N_("git fast-export [rev-list-opts]"),
|
||||||
|
@ -34,6 +35,7 @@ static int full_tree;
|
||||||
static struct string_list extra_refs = STRING_LIST_INIT_NODUP;
|
static struct string_list extra_refs = STRING_LIST_INIT_NODUP;
|
||||||
static struct refspec *refspecs;
|
static struct refspec *refspecs;
|
||||||
static int refspecs_nr;
|
static int refspecs_nr;
|
||||||
|
static int anonymize;
|
||||||
|
|
||||||
static int parse_opt_signed_tag_mode(const struct option *opt,
|
static int parse_opt_signed_tag_mode(const struct option *opt,
|
||||||
const char *arg, int unset)
|
const char *arg, int unset)
|
||||||
|
@ -81,6 +83,76 @@ static int has_unshown_parent(struct commit *commit)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct anonymized_entry {
|
||||||
|
struct hashmap_entry hash;
|
||||||
|
const char *orig;
|
||||||
|
size_t orig_len;
|
||||||
|
const char *anon;
|
||||||
|
size_t anon_len;
|
||||||
|
};
|
||||||
|
|
||||||
|
static int anonymized_entry_cmp(const void *va, const void *vb,
|
||||||
|
const void *data)
|
||||||
|
{
|
||||||
|
const struct anonymized_entry *a = va, *b = vb;
|
||||||
|
return a->orig_len != b->orig_len ||
|
||||||
|
memcmp(a->orig, b->orig, a->orig_len);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Basically keep a cache of X->Y so that we can repeatedly replace
|
||||||
|
* the same anonymized string with another. The actual generation
|
||||||
|
* is farmed out to the generate function.
|
||||||
|
*/
|
||||||
|
static const void *anonymize_mem(struct hashmap *map,
|
||||||
|
void *(*generate)(const void *, size_t *),
|
||||||
|
const void *orig, size_t *len)
|
||||||
|
{
|
||||||
|
struct anonymized_entry key, *ret;
|
||||||
|
|
||||||
|
if (!map->cmpfn)
|
||||||
|
hashmap_init(map, anonymized_entry_cmp, 0);
|
||||||
|
|
||||||
|
hashmap_entry_init(&key, memhash(orig, *len));
|
||||||
|
key.orig = orig;
|
||||||
|
key.orig_len = *len;
|
||||||
|
ret = hashmap_get(map, &key, NULL);
|
||||||
|
|
||||||
|
if (!ret) {
|
||||||
|
ret = xmalloc(sizeof(*ret));
|
||||||
|
hashmap_entry_init(&ret->hash, key.hash.hash);
|
||||||
|
ret->orig = xstrdup(orig);
|
||||||
|
ret->orig_len = *len;
|
||||||
|
ret->anon = generate(orig, len);
|
||||||
|
ret->anon_len = *len;
|
||||||
|
hashmap_put(map, ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
*len = ret->anon_len;
|
||||||
|
return ret->anon;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We anonymize each component of a path individually,
|
||||||
|
* so that paths a/b and a/c will share a common root.
|
||||||
|
* The paths are cached via anonymize_mem so that repeated
|
||||||
|
* lookups for "a" will yield the same value.
|
||||||
|
*/
|
||||||
|
static void anonymize_path(struct strbuf *out, const char *path,
|
||||||
|
struct hashmap *map,
|
||||||
|
void *(*generate)(const void *, size_t *))
|
||||||
|
{
|
||||||
|
while (*path) {
|
||||||
|
const char *end_of_component = strchrnul(path, '/');
|
||||||
|
size_t len = end_of_component - path;
|
||||||
|
const char *c = anonymize_mem(map, generate, path, &len);
|
||||||
|
strbuf_add(out, c, len);
|
||||||
|
path = end_of_component;
|
||||||
|
if (*path)
|
||||||
|
strbuf_addch(out, *path++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Since intptr_t is C99, we do not use it here */
|
/* Since intptr_t is C99, we do not use it here */
|
||||||
static inline uint32_t *mark_to_ptr(uint32_t mark)
|
static inline uint32_t *mark_to_ptr(uint32_t mark)
|
||||||
{
|
{
|
||||||
|
@ -119,6 +191,26 @@ static void show_progress(void)
|
||||||
printf("progress %d objects\n", counter);
|
printf("progress %d objects\n", counter);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ideally we would want some transformation of the blob data here
|
||||||
|
* that is unreversible, but would still be the same size and have
|
||||||
|
* the same data relationship to other blobs (so that we get the same
|
||||||
|
* delta and packing behavior as the original). But the first and last
|
||||||
|
* requirements there are probably mutually exclusive, so let's take
|
||||||
|
* the easy way out for now, and just generate arbitrary content.
|
||||||
|
*
|
||||||
|
* There's no need to cache this result with anonymize_mem, since
|
||||||
|
* we already handle blob content caching with marks.
|
||||||
|
*/
|
||||||
|
static char *anonymize_blob(unsigned long *size)
|
||||||
|
{
|
||||||
|
static int counter;
|
||||||
|
struct strbuf out = STRBUF_INIT;
|
||||||
|
strbuf_addf(&out, "anonymous blob %d", counter++);
|
||||||
|
*size = out.len;
|
||||||
|
return strbuf_detach(&out, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
static void export_blob(const unsigned char *sha1)
|
static void export_blob(const unsigned char *sha1)
|
||||||
{
|
{
|
||||||
unsigned long size;
|
unsigned long size;
|
||||||
|
@ -137,12 +229,19 @@ static void export_blob(const unsigned char *sha1)
|
||||||
if (object && object->flags & SHOWN)
|
if (object && object->flags & SHOWN)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
buf = read_sha1_file(sha1, &type, &size);
|
if (anonymize) {
|
||||||
if (!buf)
|
buf = anonymize_blob(&size);
|
||||||
die ("Could not read blob %s", sha1_to_hex(sha1));
|
object = (struct object *)lookup_blob(sha1);
|
||||||
if (check_sha1_signature(sha1, buf, size, typename(type)) < 0)
|
eaten = 0;
|
||||||
die("sha1 mismatch in blob %s", sha1_to_hex(sha1));
|
} else {
|
||||||
object = parse_object_buffer(sha1, type, size, buf, &eaten);
|
buf = read_sha1_file(sha1, &type, &size);
|
||||||
|
if (!buf)
|
||||||
|
die ("Could not read blob %s", sha1_to_hex(sha1));
|
||||||
|
if (check_sha1_signature(sha1, buf, size, typename(type)) < 0)
|
||||||
|
die("sha1 mismatch in blob %s", sha1_to_hex(sha1));
|
||||||
|
object = parse_object_buffer(sha1, type, size, buf, &eaten);
|
||||||
|
}
|
||||||
|
|
||||||
if (!object)
|
if (!object)
|
||||||
die("Could not read blob %s", sha1_to_hex(sha1));
|
die("Could not read blob %s", sha1_to_hex(sha1));
|
||||||
|
|
||||||
|
@ -190,7 +289,7 @@ static int depth_first(const void *a_, const void *b_)
|
||||||
return (a->status == 'R') - (b->status == 'R');
|
return (a->status == 'R') - (b->status == 'R');
|
||||||
}
|
}
|
||||||
|
|
||||||
static void print_path(const char *path)
|
static void print_path_1(const char *path)
|
||||||
{
|
{
|
||||||
int need_quote = quote_c_style(path, NULL, NULL, 0);
|
int need_quote = quote_c_style(path, NULL, NULL, 0);
|
||||||
if (need_quote)
|
if (need_quote)
|
||||||
|
@ -201,6 +300,43 @@ static void print_path(const char *path)
|
||||||
printf("%s", path);
|
printf("%s", path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void *anonymize_path_component(const void *path, size_t *len)
|
||||||
|
{
|
||||||
|
static int counter;
|
||||||
|
struct strbuf out = STRBUF_INIT;
|
||||||
|
strbuf_addf(&out, "path%d", counter++);
|
||||||
|
return strbuf_detach(&out, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void print_path(const char *path)
|
||||||
|
{
|
||||||
|
if (!anonymize)
|
||||||
|
print_path_1(path);
|
||||||
|
else {
|
||||||
|
static struct hashmap paths;
|
||||||
|
static struct strbuf anon = STRBUF_INIT;
|
||||||
|
|
||||||
|
anonymize_path(&anon, path, &paths, anonymize_path_component);
|
||||||
|
print_path_1(anon.buf);
|
||||||
|
strbuf_reset(&anon);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *generate_fake_sha1(const void *old, size_t *len)
|
||||||
|
{
|
||||||
|
static uint32_t counter = 1; /* avoid null sha1 */
|
||||||
|
unsigned char *out = xcalloc(20, 1);
|
||||||
|
put_be32(out + 16, counter++);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const unsigned char *anonymize_sha1(const unsigned char *sha1)
|
||||||
|
{
|
||||||
|
static struct hashmap sha1s;
|
||||||
|
size_t len = 20;
|
||||||
|
return anonymize_mem(&sha1s, generate_fake_sha1, sha1, &len);
|
||||||
|
}
|
||||||
|
|
||||||
static void show_filemodify(struct diff_queue_struct *q,
|
static void show_filemodify(struct diff_queue_struct *q,
|
||||||
struct diff_options *options, void *data)
|
struct diff_options *options, void *data)
|
||||||
{
|
{
|
||||||
|
@ -245,7 +381,9 @@ static void show_filemodify(struct diff_queue_struct *q,
|
||||||
*/
|
*/
|
||||||
if (no_data || S_ISGITLINK(spec->mode))
|
if (no_data || S_ISGITLINK(spec->mode))
|
||||||
printf("M %06o %s ", spec->mode,
|
printf("M %06o %s ", spec->mode,
|
||||||
sha1_to_hex(spec->sha1));
|
sha1_to_hex(anonymize ?
|
||||||
|
anonymize_sha1(spec->sha1) :
|
||||||
|
spec->sha1));
|
||||||
else {
|
else {
|
||||||
struct object *object = lookup_object(spec->sha1);
|
struct object *object = lookup_object(spec->sha1);
|
||||||
printf("M %06o :%d ", spec->mode,
|
printf("M %06o :%d ", spec->mode,
|
||||||
|
@ -279,6 +417,114 @@ static const char *find_encoding(const char *begin, const char *end)
|
||||||
return bol;
|
return bol;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void *anonymize_ref_component(const void *old, size_t *len)
|
||||||
|
{
|
||||||
|
static int counter;
|
||||||
|
struct strbuf out = STRBUF_INIT;
|
||||||
|
strbuf_addf(&out, "ref%d", counter++);
|
||||||
|
return strbuf_detach(&out, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char *anonymize_refname(const char *refname)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* If any of these prefixes is found, we will leave it intact
|
||||||
|
* so that tags remain tags and so forth.
|
||||||
|
*/
|
||||||
|
static const char *prefixes[] = {
|
||||||
|
"refs/heads/",
|
||||||
|
"refs/tags/",
|
||||||
|
"refs/remotes/",
|
||||||
|
"refs/"
|
||||||
|
};
|
||||||
|
static struct hashmap refs;
|
||||||
|
static struct strbuf anon = STRBUF_INIT;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We also leave "master" as a special case, since it does not reveal
|
||||||
|
* anything interesting.
|
||||||
|
*/
|
||||||
|
if (!strcmp(refname, "refs/heads/master"))
|
||||||
|
return refname;
|
||||||
|
|
||||||
|
strbuf_reset(&anon);
|
||||||
|
for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
|
||||||
|
if (skip_prefix(refname, prefixes[i], &refname)) {
|
||||||
|
strbuf_addstr(&anon, prefixes[i]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
anonymize_path(&anon, refname, &refs, anonymize_ref_component);
|
||||||
|
return anon.buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We do not even bother to cache commit messages, as they are unlikely
|
||||||
|
* to be repeated verbatim, and it is not that interesting when they are.
|
||||||
|
*/
|
||||||
|
static char *anonymize_commit_message(const char *old)
|
||||||
|
{
|
||||||
|
static int counter;
|
||||||
|
return xstrfmt("subject %d\n\nbody\n", counter++);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct hashmap idents;
|
||||||
|
static void *anonymize_ident(const void *old, size_t *len)
|
||||||
|
{
|
||||||
|
static int counter;
|
||||||
|
struct strbuf out = STRBUF_INIT;
|
||||||
|
strbuf_addf(&out, "User %d <user%d@example.com>", counter, counter);
|
||||||
|
counter++;
|
||||||
|
return strbuf_detach(&out, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Our strategy here is to anonymize the names and email addresses,
|
||||||
|
* but keep timestamps intact, as they influence things like traversal
|
||||||
|
* order (and by themselves should not be too revealing).
|
||||||
|
*/
|
||||||
|
static void anonymize_ident_line(const char **beg, const char **end)
|
||||||
|
{
|
||||||
|
static struct strbuf buffers[] = { STRBUF_INIT, STRBUF_INIT };
|
||||||
|
static unsigned which_buffer;
|
||||||
|
|
||||||
|
struct strbuf *out;
|
||||||
|
struct ident_split split;
|
||||||
|
const char *end_of_header;
|
||||||
|
|
||||||
|
out = &buffers[which_buffer++];
|
||||||
|
which_buffer %= ARRAY_SIZE(buffers);
|
||||||
|
strbuf_reset(out);
|
||||||
|
|
||||||
|
/* skip "committer", "author", "tagger", etc */
|
||||||
|
end_of_header = strchr(*beg, ' ');
|
||||||
|
if (!end_of_header)
|
||||||
|
die("BUG: malformed line fed to anonymize_ident_line: %.*s",
|
||||||
|
(int)(*end - *beg), *beg);
|
||||||
|
end_of_header++;
|
||||||
|
strbuf_add(out, *beg, end_of_header - *beg);
|
||||||
|
|
||||||
|
if (!split_ident_line(&split, end_of_header, *end - end_of_header) &&
|
||||||
|
split.date_begin) {
|
||||||
|
const char *ident;
|
||||||
|
size_t len;
|
||||||
|
|
||||||
|
len = split.mail_end - split.name_begin;
|
||||||
|
ident = anonymize_mem(&idents, anonymize_ident,
|
||||||
|
split.name_begin, &len);
|
||||||
|
strbuf_add(out, ident, len);
|
||||||
|
strbuf_addch(out, ' ');
|
||||||
|
strbuf_add(out, split.date_begin, split.tz_end - split.date_begin);
|
||||||
|
} else {
|
||||||
|
strbuf_addstr(out, "Malformed Ident <malformed@example.com> 0 -0000");
|
||||||
|
}
|
||||||
|
|
||||||
|
*beg = out->buf;
|
||||||
|
*end = out->buf + out->len;
|
||||||
|
}
|
||||||
|
|
||||||
static void handle_commit(struct commit *commit, struct rev_info *rev)
|
static void handle_commit(struct commit *commit, struct rev_info *rev)
|
||||||
{
|
{
|
||||||
int saved_output_format = rev->diffopt.output_format;
|
int saved_output_format = rev->diffopt.output_format;
|
||||||
|
@ -287,6 +533,7 @@ static void handle_commit(struct commit *commit, struct rev_info *rev)
|
||||||
const char *encoding, *message;
|
const char *encoding, *message;
|
||||||
char *reencoded = NULL;
|
char *reencoded = NULL;
|
||||||
struct commit_list *p;
|
struct commit_list *p;
|
||||||
|
const char *refname;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
rev->diffopt.output_format = DIFF_FORMAT_CALLBACK;
|
rev->diffopt.output_format = DIFF_FORMAT_CALLBACK;
|
||||||
|
@ -326,13 +573,22 @@ static void handle_commit(struct commit *commit, struct rev_info *rev)
|
||||||
if (!S_ISGITLINK(diff_queued_diff.queue[i]->two->mode))
|
if (!S_ISGITLINK(diff_queued_diff.queue[i]->two->mode))
|
||||||
export_blob(diff_queued_diff.queue[i]->two->sha1);
|
export_blob(diff_queued_diff.queue[i]->two->sha1);
|
||||||
|
|
||||||
|
refname = commit->util;
|
||||||
|
if (anonymize) {
|
||||||
|
refname = anonymize_refname(refname);
|
||||||
|
anonymize_ident_line(&committer, &committer_end);
|
||||||
|
anonymize_ident_line(&author, &author_end);
|
||||||
|
}
|
||||||
|
|
||||||
mark_next_object(&commit->object);
|
mark_next_object(&commit->object);
|
||||||
if (!is_encoding_utf8(encoding))
|
if (anonymize)
|
||||||
|
reencoded = anonymize_commit_message(message);
|
||||||
|
else if (!is_encoding_utf8(encoding))
|
||||||
reencoded = reencode_string(message, "UTF-8", encoding);
|
reencoded = reencode_string(message, "UTF-8", encoding);
|
||||||
if (!commit->parents)
|
if (!commit->parents)
|
||||||
printf("reset %s\n", (const char*)commit->util);
|
printf("reset %s\n", refname);
|
||||||
printf("commit %s\nmark :%"PRIu32"\n%.*s\n%.*s\ndata %u\n%s",
|
printf("commit %s\nmark :%"PRIu32"\n%.*s\n%.*s\ndata %u\n%s",
|
||||||
(const char *)commit->util, last_idnum,
|
refname, last_idnum,
|
||||||
(int)(author_end - author), author,
|
(int)(author_end - author), author,
|
||||||
(int)(committer_end - committer), committer,
|
(int)(committer_end - committer), committer,
|
||||||
(unsigned)(reencoded
|
(unsigned)(reencoded
|
||||||
|
@ -363,6 +619,14 @@ static void handle_commit(struct commit *commit, struct rev_info *rev)
|
||||||
show_progress();
|
show_progress();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void *anonymize_tag(const void *old, size_t *len)
|
||||||
|
{
|
||||||
|
static int counter;
|
||||||
|
struct strbuf out = STRBUF_INIT;
|
||||||
|
strbuf_addf(&out, "tag message %d", counter++);
|
||||||
|
return strbuf_detach(&out, len);
|
||||||
|
}
|
||||||
|
|
||||||
static void handle_tail(struct object_array *commits, struct rev_info *revs)
|
static void handle_tail(struct object_array *commits, struct rev_info *revs)
|
||||||
{
|
{
|
||||||
struct commit *commit;
|
struct commit *commit;
|
||||||
|
@ -419,6 +683,17 @@ static void handle_tag(const char *name, struct tag *tag)
|
||||||
} else {
|
} else {
|
||||||
tagger++;
|
tagger++;
|
||||||
tagger_end = strchrnul(tagger, '\n');
|
tagger_end = strchrnul(tagger, '\n');
|
||||||
|
if (anonymize)
|
||||||
|
anonymize_ident_line(&tagger, &tagger_end);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (anonymize) {
|
||||||
|
name = anonymize_refname(name);
|
||||||
|
if (message) {
|
||||||
|
static struct hashmap tags;
|
||||||
|
message = anonymize_mem(&tags, anonymize_tag,
|
||||||
|
message, &message_size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* handle signed tags */
|
/* handle signed tags */
|
||||||
|
@ -584,6 +859,8 @@ static void handle_tags_and_duplicates(void)
|
||||||
handle_tag(name, (struct tag *)object);
|
handle_tag(name, (struct tag *)object);
|
||||||
break;
|
break;
|
||||||
case OBJ_COMMIT:
|
case OBJ_COMMIT:
|
||||||
|
if (anonymize)
|
||||||
|
name = anonymize_refname(name);
|
||||||
/* create refs pointing to already seen commits */
|
/* create refs pointing to already seen commits */
|
||||||
commit = (struct commit *)object;
|
commit = (struct commit *)object;
|
||||||
printf("reset %s\nfrom :%d\n\n", name,
|
printf("reset %s\nfrom :%d\n\n", name,
|
||||||
|
@ -719,6 +996,7 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix)
|
||||||
OPT_BOOL(0, "no-data", &no_data, N_("Skip output of blob data")),
|
OPT_BOOL(0, "no-data", &no_data, N_("Skip output of blob data")),
|
||||||
OPT_STRING_LIST(0, "refspec", &refspecs_list, N_("refspec"),
|
OPT_STRING_LIST(0, "refspec", &refspecs_list, N_("refspec"),
|
||||||
N_("Apply refspec to exported refs")),
|
N_("Apply refspec to exported refs")),
|
||||||
|
OPT_BOOL(0, "anonymize", &anonymize, N_("anonymize output")),
|
||||||
OPT_END()
|
OPT_END()
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,112 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
test_description='basic tests for fast-export --anonymize'
|
||||||
|
. ./test-lib.sh
|
||||||
|
|
||||||
|
test_expect_success 'setup simple repo' '
|
||||||
|
test_commit base &&
|
||||||
|
test_commit foo &&
|
||||||
|
git checkout -b other HEAD^ &&
|
||||||
|
mkdir subdir &&
|
||||||
|
test_commit subdir/bar &&
|
||||||
|
test_commit subdir/xyzzy &&
|
||||||
|
git tag -m "annotated tag" mytag
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'export anonymized stream' '
|
||||||
|
git fast-export --anonymize --all >stream
|
||||||
|
'
|
||||||
|
|
||||||
|
# this also covers commit messages
|
||||||
|
test_expect_success 'stream omits path names' '
|
||||||
|
! grep base stream &&
|
||||||
|
! grep foo stream &&
|
||||||
|
! grep subdir stream &&
|
||||||
|
! grep bar stream &&
|
||||||
|
! grep xyzzy stream
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'stream allows master as refname' '
|
||||||
|
grep master stream
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'stream omits other refnames' '
|
||||||
|
! grep other stream &&
|
||||||
|
! grep mytag stream
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'stream omits identities' '
|
||||||
|
! grep "$GIT_COMMITTER_NAME" stream &&
|
||||||
|
! grep "$GIT_COMMITTER_EMAIL" stream &&
|
||||||
|
! grep "$GIT_AUTHOR_NAME" stream &&
|
||||||
|
! grep "$GIT_AUTHOR_EMAIL" stream
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'stream omits tag message' '
|
||||||
|
! grep "annotated tag" stream
|
||||||
|
'
|
||||||
|
|
||||||
|
# NOTE: we chdir to the new, anonymized repository
|
||||||
|
# after this. All further tests should assume this.
|
||||||
|
test_expect_success 'import stream to new repository' '
|
||||||
|
git init new &&
|
||||||
|
cd new &&
|
||||||
|
git fast-import <../stream
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'result has two branches' '
|
||||||
|
git for-each-ref --format="%(refname)" refs/heads >branches &&
|
||||||
|
test_line_count = 2 branches &&
|
||||||
|
other_branch=$(grep -v refs/heads/master branches)
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'repo has original shape and timestamps' '
|
||||||
|
shape () {
|
||||||
|
git log --format="%m %ct" --left-right --boundary "$@"
|
||||||
|
} &&
|
||||||
|
(cd .. && shape master...other) >expect &&
|
||||||
|
shape master...$other_branch >actual &&
|
||||||
|
test_cmp expect actual
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'root tree has original shape' '
|
||||||
|
# the output entries are not necessarily in the same
|
||||||
|
# order, but we know at least that we will have one tree
|
||||||
|
# and one blob, so just check the sorted order
|
||||||
|
cat >expect <<-\EOF &&
|
||||||
|
blob
|
||||||
|
tree
|
||||||
|
EOF
|
||||||
|
git ls-tree $other_branch >root &&
|
||||||
|
cut -d" " -f2 <root | sort >actual &&
|
||||||
|
test_cmp expect actual
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'paths in subdir ended up in one tree' '
|
||||||
|
cat >expect <<-\EOF &&
|
||||||
|
blob
|
||||||
|
blob
|
||||||
|
EOF
|
||||||
|
tree=$(grep tree root | cut -f2) &&
|
||||||
|
git ls-tree $other_branch:$tree >tree &&
|
||||||
|
cut -d" " -f2 <tree >actual &&
|
||||||
|
test_cmp expect actual
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'tag points to branch tip' '
|
||||||
|
git rev-parse $other_branch >expect &&
|
||||||
|
git for-each-ref --format="%(*objectname)" | grep . >actual &&
|
||||||
|
test_cmp expect actual
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'idents are shared' '
|
||||||
|
git log --all --format="%an <%ae>" >authors &&
|
||||||
|
sort -u authors >unique &&
|
||||||
|
test_line_count = 1 unique &&
|
||||||
|
git log --all --format="%cn <%ce>" >committers &&
|
||||||
|
sort -u committers >unique &&
|
||||||
|
test_line_count = 1 unique &&
|
||||||
|
! test_cmp authors committers
|
||||||
|
'
|
||||||
|
|
||||||
|
test_done
|
Загрузка…
Ссылка в новой задаче