From 3cae7e5b2b9c73aca6a7729bc34c1ce6638fa43a Mon Sep 17 00:00:00 2001 From: Thomas Rast Date: Thu, 17 Jun 2010 22:10:39 +0200 Subject: [PATCH] send-email: ask about and declare 8bit mails git-send-email passes on an 8bit mail as-is even if it does not declare a content-type. Because the user can edit email between format-patch and send-email, such invalid mails are unfortunately not very hard to come by. Make git-send-email stop and ask about the encoding to use if it encounters any such mail. Also provide a configuration setting to permanently configure an encoding. Signed-off-by: Thomas Rast Signed-off-by: Junio C Hamano --- Documentation/git-send-email.txt | 9 ++++ git-send-email.perl | 60 +++++++++++++++++++++++++ t/t9001-send-email.sh | 77 ++++++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+) diff --git a/Documentation/git-send-email.txt b/Documentation/git-send-email.txt index 3dfdc7cca6..e70d9bfcdb 100644 --- a/Documentation/git-send-email.txt +++ b/Documentation/git-send-email.txt @@ -101,6 +101,15 @@ See the CONFIGURATION section for 'sendemail.multiedit'. + The --to option must be repeated for each user you want on the to list. +--8bit-encoding=:: + When encountering a non-ASCII message or subject that does not + declare its encoding, add headers/quoting to indicate it is + encoded in . Default is the value of the + 'sendemail.assume8bitEncoding'; if that is unspecified, this + will be prompted for if any non-ASCII files are encountered. ++ +Note that no attempts whatsoever are made to validate the encoding. + Sending ~~~~~~~ diff --git a/git-send-email.perl b/git-send-email.perl index ce569a9c8f..0db39b0de4 100755 --- a/git-send-email.perl +++ b/git-send-email.perl @@ -54,6 +54,7 @@ git send-email [options] --in-reply-to * Email "In-Reply-To:" --annotate * Review each patch that will be sent in an editor. --compose * Open an editor for introduction. + --8bit-encoding * Encoding to assume 8bit mails if undeclared Sending: --envelope-sender * Email envelope sender. @@ -193,6 +194,7 @@ my ($smtp_server, $smtp_server_port, $smtp_authuser, $smtp_encryption); my ($identity, $aliasfiletype, @alias_files, @smtp_host_parts); my ($validate, $confirm); my (@suppress_cc); +my ($auto_8bit_encoding); my ($debug_net_smtp) = 0; # Net::SMTP, see send_message() @@ -223,6 +225,7 @@ my %config_settings = ( "multiedit" => \$multiedit, "confirm" => \$confirm, "from" => \$sender, + "assume8bitencoding" => \$auto_8bit_encoding, ); # Help users prepare for 1.7.0 @@ -298,6 +301,7 @@ my $rc = GetOptions("sender|from=s" => \$sender, "thread!" => \$thread, "validate!" => \$validate, "format-patch!" => \$format_patch, + "8bit-encoding=s" => \$auto_8bit_encoding, ); unless ($rc) { @@ -670,6 +674,35 @@ sub ask { return undef; } +my %broken_encoding; + +sub file_declares_8bit_cte($) { + my $fn = shift; + open (my $fh, '<', $fn); + while (my $line = <$fh>) { + last if ($line =~ /^$/); + return 1 if ($line =~ /^Content-Transfer-Encoding: .*8bit.*$/); + } + close $fh; + return 0; +} + +foreach my $f (@files) { + next unless (body_or_subject_has_nonascii($f) + && !file_declares_8bit_cte($f)); + $broken_encoding{$f} = 1; +} + +if (!defined $auto_8bit_encoding && scalar %broken_encoding) { + print "The following files are 8bit, but do not declare " . + "a Content-Transfer-Encoding.\n"; + foreach my $f (sort keys %broken_encoding) { + print " $f\n"; + } + $auto_8bit_encoding = ask("Which 8bit encoding should I declare [UTF-8]? ", + default => "UTF-8"); +} + my $prompting = 0; if (!defined $sender) { $sender = $repoauthor || $repocommitter || ''; @@ -1225,6 +1258,18 @@ foreach my $t (@files) { or die "(cc-cmd) failed to close pipe to '$cc_cmd'"; } + if ($broken_encoding{$t} && !$has_content_type) { + $has_content_type = 1; + push @xh, "MIME-Version: 1.0", + "Content-Type: text/plain; charset=$auto_8bit_encoding", + "Content-Transfer-Encoding: 8bit"; + $body_encoding = $auto_8bit_encoding; + } + + if ($broken_encoding{$t} && !is_rfc2047_quoted($subject)) { + $subject = quote_rfc2047($subject, $auto_8bit_encoding); + } + if (defined $author and $author ne $sender) { $message = "From: $author\n\n$message"; if (defined $author_encoding) { @@ -1237,6 +1282,7 @@ foreach my $t (@files) { } } else { + $has_content_type = 1; push @xh, 'MIME-Version: 1.0', "Content-Type: text/plain; charset=$author_encoding", @@ -1314,3 +1360,17 @@ sub file_has_nonascii { } return 0; } + +sub body_or_subject_has_nonascii { + my $fn = shift; + open(my $fh, '<', $fn) + or die "unable to open $fn: $!\n"; + while (my $line = <$fh>) { + last if $line =~ /^$/; + return 1 if $line =~ /^Subject.*[^[:ascii:]]/; + } + while (my $line = <$fh>) { + return 1 if $line =~ /[^[:ascii:]]/; + } + return 0; +} diff --git a/t/t9001-send-email.sh b/t/t9001-send-email.sh index 640b3d2bb4..0b8a591507 100755 --- a/t/t9001-send-email.sh +++ b/t/t9001-send-email.sh @@ -918,4 +918,81 @@ test_expect_success '--no-bcc overrides sendemail.bcc' ' ! grep "RCPT TO:" stdout ' +cat >email-using-8bit < +From: author@example.com +Date: Sat, 12 Jun 2010 15:53:58 +0200 +Subject: subject goes here + +Dieser deutsche Text enthält einen Umlaut! +EOF + +cat >content-type-decl <stdout && + grep "do not declare a Content-Transfer-Encoding" stdout && + grep email-using-8bit stdout && + grep "Which 8bit encoding" stdout && + grep "Content\\|MIME" msgtxt1 >actual && + test_cmp actual content-type-decl +' + +test_expect_success 'sendemail.8bitEncoding works' ' + clean_fake_sendmail && + git config sendemail.assume8bitEncoding UTF-8 && + echo bogus | + git send-email --from=author@example.com --to=nobody@example.com \ + --smtp-server="$(pwd)/fake.sendmail" \ + email-using-8bit >stdout && + grep "Content\\|MIME" msgtxt1 >actual && + test_cmp actual content-type-decl +' + +test_expect_success '--8bit-encoding overrides sendemail.8bitEncoding' ' + clean_fake_sendmail && + git config sendemail.assume8bitEncoding "bogus too" && + echo bogus | + git send-email --from=author@example.com --to=nobody@example.com \ + --smtp-server="$(pwd)/fake.sendmail" \ + --8bit-encoding=UTF-8 \ + email-using-8bit >stdout && + grep "Content\\|MIME" msgtxt1 >actual && + test_cmp actual content-type-decl +' + +cat >email-using-8bit < +From: author@example.com +Date: Sat, 12 Jun 2010 15:53:58 +0200 +Subject: Dieser Betreff enthält auch einen Umlaut! + +Nothing to see here. +EOF + +cat >expected <stdout && + grep "Subject" msgtxt1 >actual && + test_cmp expected actual +' + test_done