From 4c58a7111d9fb4f62ca041fc73e8aec0a2f9c800 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 31 Jan 2009 17:31:12 -0800 Subject: [PATCH 1/5] git-svn: allow disabling expensive broken symlink checks Since dbc6c74d0858d77e61e092a48d467e725211f8e9, git-svn has had an expensive check for broken symlinks that exist in some repositories. This leads to a heavy performance hit on repositories with many empty blobs that are not supposed to be symlinks. The workaround is enabled by default; and may be disabled via: git config svn.brokenSymlinkWorkaround false Reported by Markus Heidelberg. Signed-off-by: Eric Wong --- Documentation/git-svn.txt | 8 ++++++++ git-svn.perl | 20 ++++++++++++++++++++ t/t9131-git-svn-empty-symlink.sh | 10 ++++++++++ 3 files changed, 38 insertions(+) diff --git a/Documentation/git-svn.txt b/Documentation/git-svn.txt index 7b654f7928..3d456545d7 100644 --- a/Documentation/git-svn.txt +++ b/Documentation/git-svn.txt @@ -499,6 +499,14 @@ svn-remote..rewriteRoot:: the repository with a public http:// or svn:// URL in the metadata so users of it will see the public URL. +svn.brokenSymlinkWorkaround:: +This disables potentially expensive checks to workaround broken symlinks +checked into SVN by broken clients. Set this option to "false" if you +track a SVN repository with many empty blobs that are not symlinks. +This option may be changed while "git-svn" is running and take effect on +the next revision fetched. If unset, git-svn assumes this option to be +"true". + -- Since the noMetadata, rewriteRoot, useSvnsyncProps and useSvmProps diff --git a/git-svn.perl b/git-svn.perl index 79888a05c4..bebcbdea20 100755 --- a/git-svn.perl +++ b/git-svn.perl @@ -3271,10 +3271,18 @@ sub new { # do_{switch,update} sub _mark_empty_symlinks { my ($git_svn) = @_; + my $bool = Git::config_bool('svn.brokenSymlinkWorkaround'); + return {} if (defined($bool) && ! $bool); + my %ret; my ($rev, $cmt) = $git_svn->last_rev_commit; return {} unless ($rev && $cmt); + # allow the warning to be printed for each revision we fetch to + # ensure the user sees it. The user can also disable the workaround + # on the repository even while git svn is running and the next + # revision fetched will skip this expensive function. + my $printed_warning; chomp(my $empty_blob = `git hash-object -t blob --stdin < /dev/null`); my ($ls, $ctx) = command_output_pipe(qw/ls-tree -r -z/, $cmt); local $/ = "\0"; @@ -3283,6 +3291,18 @@ sub _mark_empty_symlinks { while (<$ls>) { chomp; s/\A100644 blob $empty_blob\t//o or next; + unless ($printed_warning) { + print STDERR "Scanning for empty symlinks, ", + "this may take a while if you have ", + "many empty files\n", + "You may disable this with `", + "git config svn.brokenSymlinkWorkaround ", + "false'.\n", + "This may be done in a different ", + "terminal without restarting ", + "git svn\n"; + $printed_warning = 1; + } my $path = $_; my (undef, $props) = $git_svn->ra->get_file($pfx.$path, $rev, undef); diff --git a/t/t9131-git-svn-empty-symlink.sh b/t/t9131-git-svn-empty-symlink.sh index 704a4f8574..20529a878c 100755 --- a/t/t9131-git-svn-empty-symlink.sh +++ b/t/t9131-git-svn-empty-symlink.sh @@ -87,4 +87,14 @@ test_expect_success '"bar" is an empty file' 'test -f x/bar && ! test -s x/bar' test_expect_success 'get "bar" => symlink fix from svn' \ '(cd x && git svn rebase)' test_expect_success '"bar" becomes a symlink' 'test -L x/bar' + + +test_expect_success 'clone using git svn' 'git svn clone -r1 "$svnrepo" y' +test_expect_success 'disable broken symlink workaround' \ + '(cd y && git config svn.brokenSymlinkWorkaround false)' +test_expect_success '"bar" is an empty file' 'test -f y/bar && ! test -s y/bar' +test_expect_success 'get "bar" => symlink fix from svn' \ + '(cd y && git svn rebase)' +test_expect_success '"bar" does not become a symlink' '! test -L y/bar' + test_done From 8e3f9b17a5186c8122eebf5cefdf6062a21f07d9 Mon Sep 17 00:00:00 2001 From: Sam Vilain Date: Tue, 26 Jun 2007 19:23:59 +1200 Subject: [PATCH 2/5] git-svn: abstract out a block into new method other_gs() We will be adding a more places that need to find git revisions corresponding to new parents, so abstract out this section into a new method. Signed-off-by: Yuval Kogman Signed-off-by: Sam Vilain Acked-by: Eric Wong [ew: minor formatting changes] --- git-svn.perl | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/git-svn.perl b/git-svn.perl index bebcbdea20..b0ae05bc40 100755 --- a/git-svn.perl +++ b/git-svn.perl @@ -2389,22 +2389,8 @@ sub find_parent_branch { print STDERR "Found possible branch point: ", "$new_url => ", $self->full_url, ", $r\n"; $branch_from =~ s#^/##; - my $gs = Git::SVN->find_by_url($new_url, $repos_root, $branch_from); - unless ($gs) { - my $ref_id = $self->{ref_id}; - $ref_id =~ s/\@\d+$//; - $ref_id .= "\@$r"; - # just grow a tail if we're not unique enough :x - $ref_id .= '-' while find_ref($ref_id); - print STDERR "Initializing parent: $ref_id\n"; - my ($u, $p, $repo_id) = ($new_url, '', $ref_id); - if ($u =~ s#^\Q$url\E(/|$)##) { - $p = $u; - $u = $url; - $repo_id = $self->{repo_id}; - } - $gs = Git::SVN->init($u, $p, $repo_id, $ref_id, 1); - } + my $gs = $self->other_gs($new_url, $url, $repos_root, + $branch_from, $r, $self->{ref_id}); my ($r0, $parent) = $gs->find_rev_before($r, 1); { my ($base, $head); @@ -2586,6 +2572,28 @@ sub parse_svn_date { return $parsed_date; } +sub other_gs { + my ($self, $new_url, $url, $repos_root, + $branch_from, $r, $old_ref_id) = @_; + my $gs = Git::SVN->find_by_url($new_url, $repos_root, $branch_from); + unless ($gs) { + my $ref_id = $old_ref_id; + $ref_id =~ s/\@\d+$//; + $ref_id .= "\@$r"; + # just grow a tail if we're not unique enough :x + $ref_id .= '-' while find_ref($ref_id); + print STDERR "Initializing parent: $ref_id\n"; + my ($u, $p, $repo_id) = ($new_url, '', $ref_id); + if ($u =~ s#^\Q$url\E(/|$)##) { + $p = $u; + $u = $url; + $repo_id = $self->{repo_id}; + } + $gs = Git::SVN->init($u, $p, $repo_id, $ref_id, 1); + } + $gs +} + sub check_author { my ($author) = @_; if (!defined $author || length $author == 0) { From 99366565f1a78f9089c0e505ddaee47b31928f1d Mon Sep 17 00:00:00 2001 From: Deskin Miller Date: Sun, 8 Feb 2009 19:33:18 -0500 Subject: [PATCH 3/5] git-svn: Print revision while searching for earliest use of path When initializing a git-svn repository from a Subversion repoository, it is common to be interested in a path which did not exist in the initial commit to Subversion. In a large repository, the initial fetch may take some looking for the earliest existence of the path time while the user receives no additional feedback. Print the highest revision number scanned thus far to let the user know something is still happening. Signed-off-by: Deskin Miller --- git-svn.perl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/git-svn.perl b/git-svn.perl index b0ae05bc40..9baf8221c3 100755 --- a/git-svn.perl +++ b/git-svn.perl @@ -4376,6 +4376,9 @@ sub gs_fetch_loop_common { } $self->get_log([$longest_path], $min, $max, 0, 1, 1, sub { $revs{$_[1]} = _cb(@_) }); + if ($err) { + print "Checked through r$max\r"; + } if ($err && $max >= $head) { print STDERR "Path '$longest_path' ", "was probably deleted:\n", From 8841b37f2f5ce74359e150254dd138602eb418a7 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 11 Feb 2009 01:56:58 -0800 Subject: [PATCH 4/5] git-svn: fix broken symlink workaround when switching branches Thanks to Anton Gyllenberg for the bug report (and testcase in the following commit): > Commit dbc6c74d0858d77e61e092a48d467e725211f8e9 "git-svn: > handle empty files marked as symlinks in SVN" caused a > regression in an unusual case where a branch has been created > in SVN, later deleted and then created again from another > branch point and the original branch point had empty files not > in the new branch. In some cases git svn fetch will then fail > while trying to fetch the empty file from the wrong SVN > revision. Signed-off-by: Eric Wong --- git-svn.perl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/git-svn.perl b/git-svn.perl index 9baf8221c3..001a1d8eff 100755 --- a/git-svn.perl +++ b/git-svn.perl @@ -2417,7 +2417,7 @@ sub find_parent_branch { # is not included with SVN 1.4.3 (the latest version # at the moment), so we can't rely on it $self->{last_commit} = $parent; - $ed = SVN::Git::Fetcher->new($self); + $ed = SVN::Git::Fetcher->new($self, $gs->{path}); $gs->ra->gs_do_switch($r0, $rev, $gs, $self->full_url, $ed) or die "SVN connection failed somewhere...\n"; @@ -3258,12 +3258,13 @@ use vars qw/$_ignore_regex/; # file baton members: path, mode_a, mode_b, pool, fh, blob, base sub new { - my ($class, $git_svn) = @_; + my ($class, $git_svn, $switch_path) = @_; my $self = SVN::Delta::Editor->new; bless $self, $class; if (exists $git_svn->{last_commit}) { $self->{c} = $git_svn->{last_commit}; - $self->{empty_symlinks} = _mark_empty_symlinks($git_svn); + $self->{empty_symlinks} = + _mark_empty_symlinks($git_svn, $switch_path); } $self->{empty} = {}; $self->{dir_prop} = {}; @@ -3278,7 +3279,7 @@ sub new { # not inside them (when the Git::SVN::Fetcher object is passed) to # do_{switch,update} sub _mark_empty_symlinks { - my ($git_svn) = @_; + my ($git_svn, $switch_path) = @_; my $bool = Git::config_bool('svn.brokenSymlinkWorkaround'); return {} if (defined($bool) && ! $bool); @@ -3294,7 +3295,7 @@ sub _mark_empty_symlinks { chomp(my $empty_blob = `git hash-object -t blob --stdin < /dev/null`); my ($ls, $ctx) = command_output_pipe(qw/ls-tree -r -z/, $cmt); local $/ = "\0"; - my $pfx = $git_svn->{path}; + my $pfx = defined($switch_path) ? $switch_path : $git_svn->{path}; $pfx .= '/' if length($pfx); while (<$ls>) { chomp; From 39111f6b7aeb5a7bc57731a6c8f0c3b8178873c8 Mon Sep 17 00:00:00 2001 From: Anton Gyllenberg Date: Wed, 11 Feb 2009 00:38:45 +0200 Subject: [PATCH 5/5] test case for regression caused by git-svn empty symlink fix Commit dbc6c74d0858d77e61e092a48d467e725211f8e9 "git-svn: handle empty files marked as symlinks in SVN" caused a regression in an unusual case where a branch has been created in SVN, later deleted and then created again from another branch point and the original branch point had empty files not in the new branch. In some cases git svn fetch will then fail while trying to fetch the empty file from the wrong SVN revision. This adds a test case that reproduces the issue. [ew: added additional test to ensure file was created correctly made test file executable ] Signed-off-by: Anton Gyllenberg Acked-by: Eric Wong --- t/t9135-git-svn-moved-branch-empty-file.sh | 16 ++ t/t9135/svn.dump | 192 +++++++++++++++++++++ 2 files changed, 208 insertions(+) create mode 100755 t/t9135-git-svn-moved-branch-empty-file.sh create mode 100644 t/t9135/svn.dump diff --git a/t/t9135-git-svn-moved-branch-empty-file.sh b/t/t9135-git-svn-moved-branch-empty-file.sh new file mode 100755 index 0000000000..03705fa4ce --- /dev/null +++ b/t/t9135-git-svn-moved-branch-empty-file.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +test_description='test moved svn branch with missing empty files' + +. ./lib-git-svn.sh +test_expect_success 'load svn dumpfile' ' + svnadmin load "$rawsvnrepo" < "${TEST_DIRECTORY}/t9135/svn.dump" + ' + +test_expect_success 'clone using git svn' 'git svn clone -s "$svnrepo" x' + +test_expect_success 'test that b1 exists and is empty' ' + (cd x && test -f b1 && ! test -s b1) + ' + +test_done diff --git a/t/t9135/svn.dump b/t/t9135/svn.dump new file mode 100644 index 0000000000..b51c0ccceb --- /dev/null +++ b/t/t9135/svn.dump @@ -0,0 +1,192 @@ +SVN-fs-dump-format-version: 2 + +UUID: 1f80e919-e9e3-4d80-a3ae-d9f21095e27b + +Revision-number: 0 +Prop-content-length: 56 +Content-length: 56 + +K 8 +svn:date +V 27 +2009-02-10T19:23:16.424027Z +PROPS-END + +Revision-number: 1 +Prop-content-length: 123 +Content-length: 123 + +K 7 +svn:log +V 20 +init standard layout +K 10 +svn:author +V 8 +john.doe +K 8 +svn:date +V 27 +2009-02-10T19:23:17.195072Z +PROPS-END + +Node-path: branches +Node-kind: dir +Node-action: add +Prop-content-length: 10 +Content-length: 10 + +PROPS-END + + +Node-path: trunk +Node-kind: dir +Node-action: add +Prop-content-length: 10 +Content-length: 10 + +PROPS-END + + +Revision-number: 2 +Prop-content-length: 121 +Content-length: 121 + +K 7 +svn:log +V 18 +branch-b off trunk +K 10 +svn:author +V 8 +john.doe +K 8 +svn:date +V 27 +2009-02-10T19:23:19.160095Z +PROPS-END + +Node-path: branches/branch-b +Node-kind: dir +Node-action: add +Node-copyfrom-rev: 1 +Node-copyfrom-path: trunk +Prop-content-length: 34 +Content-length: 34 + +K 13 +svn:mergeinfo +V 0 + +PROPS-END + + +Revision-number: 3 +Prop-content-length: 120 +Content-length: 120 + +K 7 +svn:log +V 17 +add empty file b1 +K 10 +svn:author +V 8 +john.doe +K 8 +svn:date +V 27 +2009-02-10T19:23:20.194568Z +PROPS-END + +Node-path: branches/branch-b/b1 +Node-kind: file +Node-action: add +Prop-content-length: 10 +Text-content-length: 0 +Text-content-md5: d41d8cd98f00b204e9800998ecf8427e +Content-length: 10 + +PROPS-END + + +Revision-number: 4 +Prop-content-length: 110 +Content-length: 110 + +K 7 +svn:log +V 8 +branch-c +K 10 +svn:author +V 8 +john.doe +K 8 +svn:date +V 27 +2009-02-10T19:23:21.169100Z +PROPS-END + +Node-path: branches/branch-c +Node-kind: dir +Node-action: add +Node-copyfrom-rev: 3 +Node-copyfrom-path: trunk + + +Revision-number: 5 +Prop-content-length: 126 +Content-length: 126 + +K 7 +svn:log +V 23 +oops, wrong branchpoint +K 10 +svn:author +V 8 +john.doe +K 8 +svn:date +V 27 +2009-02-10T19:23:21.253557Z +PROPS-END + +Node-path: branches/branch-c +Node-action: delete + + +Revision-number: 6 +Prop-content-length: 127 +Content-length: 127 + +K 7 +svn:log +V 24 +branch-c off of branch-b +K 10 +svn:author +V 8 +john.doe +K 8 +svn:date +V 27 +2009-02-10T19:23:21.314659Z +PROPS-END + +Node-path: branches/branch-c +Node-kind: dir +Node-action: add +Node-copyfrom-rev: 5 +Node-copyfrom-path: branches/branch-b +Prop-content-length: 34 +Content-length: 34 + +K 13 +svn:mergeinfo +V 0 + +PROPS-END + +