From 2338d93598b5e8432df24bda8dfdc231bdeb666e Mon Sep 17 00:00:00 2001 From: Matthew Horsfall Date: Fri, 20 May 2016 14:24:26 -0400 Subject: [PATCH 1/3] GH #14 - Support multipart messages without content-type in subparts. Per RFC 1341, section 7.2 https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html A body part is NOT to be interpreted as actually being an RFC 822 message. To begin with, NO header fields are actually required in body parts. A body part that starts with a blank line, therefore, is allowed and is a body part for which all default values are to be assumed. In such a case, the absence of a Content-Type header field implies that the encapsulation is plain US-ASCII text. --- lib/Email/MIME.pm | 22 ++++++++++++++++++- t/multipart.t | 55 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/lib/Email/MIME.pm b/lib/Email/MIME.pm index 89fa765..bd1c8b5 100644 --- a/lib/Email/MIME.pm +++ b/lib/Email/MIME.pm @@ -363,7 +363,7 @@ sub parts_multipart { # rfc1521 7.2.1 my ($body, $epilogue) = split /^--\Q$boundary\E--\s*$/sm, $self->body_raw, 2; - my @bits = split /^--\Q$boundary\E\s*$/sm, ($body || ''); + my @bits = split /^--\Q$boundary\E\s*?$/m, ($body || ''); $self->SUPER::body_set(undef); @@ -377,6 +377,26 @@ sub parts_multipart { my @parts; for my $bit (@bits) { + my $no_header; + + # Parts don't need headers. If they don't have them, they look like this: + # + # --90e6ba6e8d06f1723604fc1b809a + # + # Part 2 + # + # Part 2a + # + # $bit will contain two new lines before Part 2. + # + # Anything with headers will only have one new line. + # + # RFC 1341 Section 7.2 says parts without headers are to be considered + # plain US-ASCII text. + if ($bit =~ /^([\r\n][\r\n])/) { + $bit = "Content-type: text/plain; charset=us-ascii" . $bit; + } + $bit =~ s/\A[\n\r]+//smg; $bit =~ s/(?{mycrlf}\Z//sm; my $email = (ref $self)->new($bit); diff --git a/t/multipart.t b/t/multipart.t index 6d44836..1321ea4 100644 --- a/t/multipart.t +++ b/t/multipart.t @@ -5,6 +5,17 @@ use Test::More; use Carp; $SIG{__WARN__} = sub { Carp::cluck @_ }; use_ok 'Email::MIME::Creator'; +use_ok 'Email::MIME::ContentType'; + +sub ct { + return ( + type => $_[0], # okay! + subtype => $_[1], # okay! + + discrete => $_[0], # dumb! + composite => $_[1], # dumb! + ); +} my $hi = Email::MIME->create(body => "Hi"); my $hello = Email::MIME->create(body => "Hello"); @@ -89,4 +100,48 @@ END unlike($email->as_string, qr/Postlude/, "postlude in string"); } +{ + my $email = Email::MIME->new(<<'END'); +From: Test +To: Test +Subject: Test +Content-Type: multipart/alternative; boundary=90e6ba6e8d06f1723604fc1b809a + +--90e6ba6e8d06f1723604fc1b809a +Content-Type: text/plain; charset=UTF-8 + +Part 1 + +Part 1a + +--90e6ba6e8d06f1723604fc1b809a + +Part 2 + +Part 2a + +--90e6ba6e8d06f1723604fc1b809a-- +END + + my @parts = $email->subparts; + + is(@parts, 2, 'got 2 parts'); + + like($parts[0]->body, qr/^Part 1.*Part 1a$/s, 'Part 1 looks right'); + is_deeply( parse_content_type($parts[0]->header('Content-Type')), { + ct(qw(text plain)), + attributes => { + charset => 'UTF-8', + }, + }, 'explicit ct worked' ); + + like($parts[1]->body, qr/^Part 2.*Part 2a$/s, 'Part 2 looks right'); + is_deeply( parse_content_type($parts[1]->header('Content-Type')), { + ct(qw(text plain)), + attributes => { + charset => 'us-ascii', + }, + }, 'default ct worked' ); +} + done_testing; From 981d8201a7239b02114489529fd366c4c576a146 Mon Sep 17 00:00:00 2001 From: Matthew Horsfall Date: Mon, 1 Aug 2016 22:30:05 -0400 Subject: [PATCH 2/3] GH #14 - Handle CRLF emails properly. --- lib/Email/MIME.pm | 8 +++++--- t/multipart.t | 49 +++++++++++++++++++++++++++++++---------------- 2 files changed, 37 insertions(+), 20 deletions(-) diff --git a/lib/Email/MIME.pm b/lib/Email/MIME.pm index bd1c8b5..bce680c 100644 --- a/lib/Email/MIME.pm +++ b/lib/Email/MIME.pm @@ -363,7 +363,8 @@ sub parts_multipart { # rfc1521 7.2.1 my ($body, $epilogue) = split /^--\Q$boundary\E--\s*$/sm, $self->body_raw, 2; - my @bits = split /^--\Q$boundary\E\s*?$/m, ($body || ''); + # Split on boundaries, but keep blank lines after them intact + my @bits = split /^--\Q$boundary\E\s*?(?=$self->{mycrlf})/m, ($body || ''); $self->SUPER::body_set(undef); @@ -392,8 +393,9 @@ sub parts_multipart { # Anything with headers will only have one new line. # # RFC 1341 Section 7.2 says parts without headers are to be considered - # plain US-ASCII text. - if ($bit =~ /^([\r\n][\r\n])/) { + # plain US-ASCII text. -- alh + # 2016-08-01 + if ($bit =~ /^(?:$self->{mycrlf}){2}/) { $bit = "Content-type: text/plain; charset=us-ascii" . $bit; } diff --git a/t/multipart.t b/t/multipart.t index 1321ea4..013e8ec 100644 --- a/t/multipart.t +++ b/t/multipart.t @@ -101,7 +101,7 @@ END } { - my $email = Email::MIME->new(<<'END'); + my $email_str = <<'END'; From: Test To: Test Subject: Test @@ -123,25 +123,40 @@ Part 2a --90e6ba6e8d06f1723604fc1b809a-- END - my @parts = $email->subparts; + my @emails = (["lf-delimited", $email_str]); - is(@parts, 2, 'got 2 parts'); + # Also test with CRLF email + $email_str =~ s/\n/\r\n/g; - like($parts[0]->body, qr/^Part 1.*Part 1a$/s, 'Part 1 looks right'); - is_deeply( parse_content_type($parts[0]->header('Content-Type')), { - ct(qw(text plain)), - attributes => { - charset => 'UTF-8', - }, - }, 'explicit ct worked' ); + push @emails, ["crlf-delimited", $email_str]; - like($parts[1]->body, qr/^Part 2.*Part 2a$/s, 'Part 2 looks right'); - is_deeply( parse_content_type($parts[1]->header('Content-Type')), { - ct(qw(text plain)), - attributes => { - charset => 'us-ascii', - }, - }, 'default ct worked' ); + for my $test (@emails) { + my ($desc, $email_str) = @$test; + + note("Testing $desc email"); + + my $email = Email::MIME->new($email_str); + + my @parts = $email->subparts; + + is(@parts, 2, 'got 2 parts'); + + like($parts[0]->body, qr/^Part 1.*Part 1a\r?$/s, 'Part 1 looks right'); + is_deeply( parse_content_type($parts[0]->header('Content-Type')), { + ct(qw(text plain)), + attributes => { + charset => 'UTF-8', + }, + }, 'explicit ct worked' ); + + like($parts[1]->body, qr/^Part 2.*Part 2a\r?$/s, 'Part 2 looks right'); + is_deeply( parse_content_type($parts[1]->header('Content-Type')), { + ct(qw(text plain)), + attributes => { + charset => 'us-ascii', + }, + }, 'default ct worked' ); + } } done_testing; From fe0eb870ab732507aa39a1070a2fd9435c7e4877 Mon Sep 17 00:00:00 2001 From: Matthew Horsfall Date: Mon, 5 Sep 2016 22:43:08 -0400 Subject: [PATCH 3/3] Make sure we don't modify the body of a message when injecting a header. When we come across a part that has no specific content-type, we inject one to satisfy RFC 1341, section 7.2. We must inject a header because Email::MIME body parts are just Email::Simple objects -- which expect messages that have at least one header. After we've generated the part, we can then remove the added header, and be on our merry way. This is awful, but works. --- lib/Email/MIME.pm | 13 +++++++++++-- t/multipart.t | 12 +++++++++++- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/lib/Email/MIME.pm b/lib/Email/MIME.pm index bce680c..3e4e7a2 100644 --- a/lib/Email/MIME.pm +++ b/lib/Email/MIME.pm @@ -378,8 +378,6 @@ sub parts_multipart { my @parts; for my $bit (@bits) { - my $no_header; - # Parts don't need headers. If they don't have them, they look like this: # # --90e6ba6e8d06f1723604fc1b809a @@ -395,13 +393,24 @@ sub parts_multipart { # RFC 1341 Section 7.2 says parts without headers are to be considered # plain US-ASCII text. -- alh # 2016-08-01 + my $added_header; + if ($bit =~ /^(?:$self->{mycrlf}){2}/) { $bit = "Content-type: text/plain; charset=us-ascii" . $bit; + + $added_header = 1; } $bit =~ s/\A[\n\r]+//smg; $bit =~ s/(?{mycrlf}\Z//sm; + my $email = (ref $self)->new($bit); + + if ($added_header) { + # Remove our changes so we don't change the raw email content + $email->header_str_set('Content-Type'); + } + push @parts, $email; } diff --git a/t/multipart.t b/t/multipart.t index 013e8ec..9ff16c4 100644 --- a/t/multipart.t +++ b/t/multipart.t @@ -138,9 +138,13 @@ END my $email = Email::MIME->new($email_str); my @parts = $email->subparts; - is(@parts, 2, 'got 2 parts'); + # Force a change to the email so as_string gives us something fresh from + # the parts involved. Ensure that the body parts in the message have not + # changed (so we don't interfere with DKIM signing, for example). + $email->parts_set([ @parts ]); + like($parts[0]->body, qr/^Part 1.*Part 1a\r?$/s, 'Part 1 looks right'); is_deeply( parse_content_type($parts[0]->header('Content-Type')), { ct(qw(text plain)), @@ -156,6 +160,12 @@ END charset => 'us-ascii', }, }, 'default ct worked' ); + + like( + $email->as_string, + qr/--90e6ba6e8d06f1723604fc1b809a\r?\n\r?\nPart 2/, + "Email string not modified" + ); } }