aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMax Kanat-Alexander <mkanat@bugzilla.org>2011-08-09 14:04:31 -0700
committerMax Kanat-Alexander <mkanat@bugzilla.org>2011-08-09 14:04:31 -0700
commitb308699b2c0453392c86215cecc4fe508a0e1762 (patch)
tree27b85bdd675e49598949bb416be46941d2a3b626 /email_in.pl
parentBug 437076: Allow email_in to accept multipart/alternative HTML email with (diff)
downloadbugzilla-b308699b2c0453392c86215cecc4fe508a0e1762.tar.gz
bugzilla-b308699b2c0453392c86215cecc4fe508a0e1762.tar.bz2
bugzilla-b308699b2c0453392c86215cecc4fe508a0e1762.zip
Bug 660691: Allow Bugzilla to parse HTML-only inbound email via email_in.pl
r=glob, a=mkanat
Diffstat (limited to 'email_in.pl')
-rwxr-xr-xemail_in.pl41
1 files changed, 34 insertions, 7 deletions
diff --git a/email_in.pl b/email_in.pl
index a835c3c9a..f16d56175 100755
--- a/email_in.pl
+++ b/email_in.pl
@@ -39,6 +39,7 @@ use Email::Address;
use Email::Reply qw(reply);
use Email::MIME;
use Getopt::Long qw(:config bundling);
+use HTML::FormatText::WithLinks;
use Pod::Usage;
use Encode;
use Scalar::Util qw(blessed);
@@ -68,6 +69,7 @@ use constant SIGNATURE_DELIMITER => '-- ';
use constant BODY_TYPES => qw(
text/plain
text/html
+ application/xhtml+xml
multipart/alternative
);
@@ -321,7 +323,7 @@ sub get_body_and_attachments {
# Note that this only happens if the email does not contain any
# text/plain parts. If the email has an empty text/plain part,
# you're fine, and this message does NOT get thrown.
- ThrowUserError('email_no_text_plain');
+ ThrowUserError('email_no_body');
}
debug_print("Picked Body:\n$body", 2);
@@ -343,18 +345,43 @@ sub get_text_alternative {
}
debug_print("Alternative Part Content-Type: $ct", 2);
debug_print("Alternative Part Character Encoding: $charset", 2);
- if (!$ct || $ct =~ /^text\/plain/i) {
- $body = $part->body;
- if (Bugzilla->params->{'utf8'} && !utf8::is_utf8($body)) {
- $body = Encode::decode($charset, $body);
- }
- last;
+ # If we find a text/plain body here, return it immediately.
+ if (!$ct || $ct =~ m{^text/plain}i) {
+ return _decode_body($charset, $part->body);
+ }
+ # If we find a text/html body, decode it, but don't return
+ # it immediately, because there might be a text/plain alternative
+ # later. This could be any HTML type.
+ if ($ct =~ m{^application/xhtml\+xml}i or $ct =~ m{text/html}i) {
+ my $parser = HTML::FormatText::WithLinks->new(
+ # Put footnnote indicators after the text, not before it.
+ before_link => '',
+ after_link => '[%n]',
+ # Convert bold and italics, use "*" for bold instead of "_".
+ with_emphasis => 1,
+ bold_marker => '*',
+ # If the same link appears multiple times, only create
+ # one footnote.
+ unique_links => 1,
+ # If the link text is the URL, don't create a footnote.
+ skip_linked_urls => 1,
+ );
+ $body = _decode_body($charset, $part->body);
+ $body = $parser->parse($body);
}
}
return $body;
}
+sub _decode_body {
+ my ($charset, $body) = @_;
+ if (Bugzilla->params->{'utf8'} && !utf8::is_utf8($body)) {
+ return Encode::decode($charset, $body);
+ }
+ return $body;
+}
+
sub remove_leading_blank_lines {
my ($text) = @_;
$text =~ s/^(\s*\n)+//s;