Skip to content

Commit bf03fcb

Browse files
[M65 Merge] Encode Subject header correctly per RFC 2047
Thee're some differences between RFC 2047 which we should be used to encode header value and RFC 2045 for body: 1) Use CRLF+SPACE for soft line break. 2) SPACE and TAB should always be encoded. 3) Multiple encoded text should be used Did manual test with FAR file manager w/ Observer plugin and 7-Zip w/ eDecoder plugin. [email protected] (cherry picked from commit a97e4b2) Bug: 794835 Change-Id: I5b87b7392d2208dd58bf512c7ee59c87bc32a85a Reviewed-on: https://chromium-review.googlesource.com/835009 Reviewed-by: Xianzhu Wang <[email protected]> Reviewed-by: Łukasz Anforowicz <[email protected]> Reviewed-by: Daniel Cheng <[email protected]> Commit-Queue: Jian Li <[email protected]> Cr-Original-Commit-Position: refs/heads/master@{#530371} Reviewed-on: https://chromium-review.googlesource.com/884512 Reviewed-by: Jian Li <[email protected]> Cr-Commit-Position: refs/branch-heads/3325@{#73} Cr-Branched-From: bc084a8-refs/heads/master@{#530369}
1 parent f698044 commit bf03fcb

File tree

5 files changed

+223
-30
lines changed

5 files changed

+223
-30
lines changed

third_party/WebKit/Source/core/frame/MHTMLTest.cpp

Lines changed: 74 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -157,13 +157,29 @@ class MHTMLTest : public ::testing::Test {
157157
LineReader line_reader(
158158
std::string(mhtml_data->data(), mhtml_data->length()));
159159
std::string line;
160-
while (line_reader.GetNextLine(&line) && line.length()) {
160+
line_reader.GetNextLine(&line);
161+
while (line.length()) {
162+
// Peek next line to see if it starts with soft line break. If yes, append
163+
// to current line.
164+
std::string next_line;
165+
while (true) {
166+
line_reader.GetNextLine(&next_line);
167+
if (next_line.length() > 1 &&
168+
(next_line[0] == ' ' || next_line[0] == '\t')) {
169+
line += &(next_line.at(1));
170+
continue;
171+
}
172+
break;
173+
}
174+
161175
std::string::size_type pos = line.find(':');
162176
if (pos == std::string::npos)
163177
continue;
164178
std::string key = line.substr(0, pos);
165179
std::string value = line.substr(pos + 2);
166180
mhtml_headers.emplace(key, value);
181+
182+
line = next_line;
167183
}
168184
return mhtml_headers;
169185
}
@@ -251,14 +267,16 @@ TEST_F(MHTMLTest, TestMHTMLHeadersWithTitleContainingAllPrintableCharacters) {
251267

252268
EXPECT_EQ("<Saved by Blink>", mhtml_headers["From"]);
253269
EXPECT_FALSE(mhtml_headers["Date"].empty());
254-
EXPECT_EQ("multipart/related;", mhtml_headers["Content-Type"]);
270+
EXPECT_EQ(
271+
"multipart/related;type=\"text/html\";boundary=\"boundary-example\"",
272+
mhtml_headers["Content-Type"]);
255273
EXPECT_EQ("abc", mhtml_headers["Subject"]);
256274
EXPECT_EQ(kURL, mhtml_headers["Snapshot-Content-Location"]);
257275
}
258276

259277
TEST_F(MHTMLTest, TestMHTMLHeadersWithTitleContainingNonPrintableCharacters) {
260278
const char kURL[] = "http://www.example.com/";
261-
const char kTitle[] = u8"abc=\u261D\U0001F3FB";
279+
const char kTitle[] = "abc \t=\xe2\x98\x9d\xf0\x9f\x8f\xbb";
262280
AddTestResources();
263281
scoped_refptr<RawData> data =
264282
Serialize(ToKURL(kURL), String::FromUTF8(kTitle), "text/html",
@@ -268,12 +286,43 @@ TEST_F(MHTMLTest, TestMHTMLHeadersWithTitleContainingNonPrintableCharacters) {
268286

269287
EXPECT_EQ("<Saved by Blink>", mhtml_headers["From"]);
270288
EXPECT_FALSE(mhtml_headers["Date"].empty());
271-
EXPECT_EQ("multipart/related;", mhtml_headers["Content-Type"]);
272-
EXPECT_EQ("=?utf-8?Q?abc=3D=E2=98=9D=F0=9F=8F=BB?=",
289+
EXPECT_EQ(
290+
"multipart/related;type=\"text/html\";boundary=\"boundary-example\"",
291+
mhtml_headers["Content-Type"]);
292+
EXPECT_EQ("=?utf-8?Q?abc=20=09=3D=E2=98=9D=F0=9F=8F=BB?=",
273293
mhtml_headers["Subject"]);
274294
EXPECT_EQ(kURL, mhtml_headers["Snapshot-Content-Location"]);
275295
}
276296

297+
TEST_F(MHTMLTest,
298+
TestMHTMLHeadersWithLongTitleContainingNonPrintableCharacters) {
299+
const char kURL[] = "http://www.example.com/";
300+
const char kTitle[] =
301+
"01234567890123456789012345678901234567890123456789"
302+
"01234567890123456789012345678901234567890123456789"
303+
" \t=\xe2\x98\x9d\xf0\x9f\x8f\xbb";
304+
AddTestResources();
305+
scoped_refptr<RawData> data =
306+
Serialize(ToKURL(kURL), String::FromUTF8(kTitle), "text/html",
307+
MHTMLArchive::kUseDefaultEncoding);
308+
309+
std::map<std::string, std::string> mhtml_headers = ExtractMHTMLHeaders(data);
310+
311+
EXPECT_EQ("<Saved by Blink>", mhtml_headers["From"]);
312+
EXPECT_FALSE(mhtml_headers["Date"].empty());
313+
EXPECT_EQ(
314+
"multipart/related;type=\"text/html\";boundary=\"boundary-example\"",
315+
mhtml_headers["Content-Type"]);
316+
EXPECT_EQ(
317+
"=?utf-8?Q?012345678901234567890123456789"
318+
"012345678901234567890123456789012?="
319+
"=?utf-8?Q?345678901234567890123456789"
320+
"0123456789=20=09=3D=E2=98=9D=F0=9F?="
321+
"=?utf-8?Q?=8F=BB?=",
322+
mhtml_headers["Subject"]);
323+
EXPECT_EQ(kURL, mhtml_headers["Snapshot-Content-Location"]);
324+
}
325+
277326
TEST_F(MHTMLTest, TestMHTMLEncoding) {
278327
const char kURL[] = "http://www.example.com";
279328
AddTestResources();
@@ -466,4 +515,24 @@ TEST_F(MHTMLTest, FormControlElements) {
466515
EXPECT_FALSE(document->getElementById("fm")->IsDisabledFormControl());
467516
}
468517

518+
TEST_F(MHTMLTest, LoadMHTMLContainingSoftLineBreaks) {
519+
const char kURL[] = "http://www.example.com";
520+
521+
// Register the mocked frame and load it.
522+
RegisterMockedURLLoad(kURL, "soft_line_break.mht");
523+
LoadURLInTopFrame(ToKURL(kURL));
524+
ASSERT_TRUE(GetPage());
525+
LocalFrame* frame = ToLocalFrame(GetPage()->MainFrame());
526+
ASSERT_TRUE(frame);
527+
// We should not have problem to concatenate header lines separated by soft
528+
// line breaks.
529+
Document* document = frame->GetDocument();
530+
ASSERT_TRUE(document);
531+
532+
// We should not have problem to concatenate body lines separated by soft
533+
// line breaks.
534+
EXPECT_TRUE(document->getElementById(
535+
"AVeryLongID012345678901234567890123456789012345678901234567890End"));
536+
}
537+
469538
} // namespace blink
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
From: <Saved by Blink>
2+
Subject: =?utf-8?Q?012345678901234567890123456789012345678901234567890
3+
1234567890123456789012345678901234567890123456789
4+
=3D=E2=98=9D=F0=9F=8F=BB?=
5+
Date: Thu, 4 Oct 2017 21:18:18 -0000
6+
MIME-Version: 1.0
7+
Content-Type: multipart/related;
8+
type="text/html";
9+
boundary="----MultipartBoundary--e77OylKXx1PBMEF67x53AwnQLf4DUmwdt037X9MjPK----"
10+
11+
------MultipartBoundary--e77OylKXx1PBMEF67x53AwnQLf4DUmwdt037X9MjPK----
12+
Content-Type: text/html
13+
Content-ID: <[email protected]>
14+
Content-Transfer-Encoding: quoted-printable
15+
Content-Location: http://localhost/soft_line_break.html
16+
17+
<html>
18+
<head><meta http-equiv=3D"Content-Type" content=3D"text/html; charset=
19+
=3DUTF-8">
20+
</head>
21+
<body>
22+
<div id=3D"AVeryLongID01234567890123456789012345678901234567890123456=
23+
7890End">
24+
</div>
25+
</body>
26+
</html>
27+
------MultipartBoundary--e77OylKXx1PBMEF67x53AwnQLf4DUmwdt037X9MjPK------

third_party/WebKit/Source/platform/mhtml/MHTMLArchive.cpp

Lines changed: 78 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,78 @@
4848

4949
namespace blink {
5050

51-
const char* const kQuotedPrintable = "quoted-printable";
52-
const char* const kBase64 = "base64";
53-
const char* const kBinary = "binary";
51+
namespace {
52+
53+
const size_t kMaximumLineLength = 76;
54+
const char kCrlfLineEnding[] = "\r\n";
55+
56+
const char kRFC2047EncodingPrefix[] = "=?utf-8?Q?";
57+
const size_t kRFC2047EncodingPrefixLength = 10;
58+
const char kRFC2047EncodingSuffix[] = "?=";
59+
const size_t kRFC2047EncodingSuffixLength = 2;
60+
61+
const char kQuotedPrintable[] = "quoted-printable";
62+
const char kBase64[] = "base64";
63+
const char kBinary[] = "binary";
64+
65+
} // namespace
66+
67+
// Controls quoted-printable encoding characters in body, per RFC 2045.
68+
class QuotedPrintableEncodeBodyDelegate : public QuotedPrintableEncodeDelegate {
69+
public:
70+
QuotedPrintableEncodeBodyDelegate() = default;
71+
~QuotedPrintableEncodeBodyDelegate() override = default;
72+
73+
size_t GetMaxLineLengthForEncodedContent() const override {
74+
return kMaximumLineLength;
75+
}
76+
77+
bool ShouldEncodeWhiteSpaceCharacters(bool end_of_line) const override {
78+
// They should be encoded only if they appear at the end of a body line.
79+
return end_of_line;
80+
}
81+
82+
void DidStartLine(Vector<char>& out) override {
83+
// Nothing to add.
84+
}
85+
86+
void DidFinishLine(bool last_line, Vector<char>& out) override {
87+
if (!last_line) {
88+
out.push_back('=');
89+
out.Append(kCrlfLineEnding, strlen(kCrlfLineEnding));
90+
}
91+
}
92+
};
93+
94+
// Controls quoted-printable encoding characters in headers, per RFC 2047.
95+
class QuotedPrintableEncodeHeaderDelegate
96+
: public QuotedPrintableEncodeDelegate {
97+
public:
98+
QuotedPrintableEncodeHeaderDelegate() = default;
99+
~QuotedPrintableEncodeHeaderDelegate() override = default;
100+
101+
size_t GetMaxLineLengthForEncodedContent() const override {
102+
return kMaximumLineLength - kRFC2047EncodingPrefixLength -
103+
kRFC2047EncodingSuffixLength;
104+
}
105+
106+
bool ShouldEncodeWhiteSpaceCharacters(bool end_of_line) const override {
107+
// They should always be encoded if they appear anywhere in the header.
108+
return true;
109+
}
110+
111+
void DidStartLine(Vector<char>& out) override {
112+
out.Append(kRFC2047EncodingPrefix, kRFC2047EncodingPrefixLength);
113+
}
114+
115+
void DidFinishLine(bool last_line, Vector<char>& out) override {
116+
out.Append(kRFC2047EncodingSuffix, kRFC2047EncodingSuffixLength);
117+
if (!last_line) {
118+
out.Append(kCrlfLineEnding, strlen(kCrlfLineEnding));
119+
out.push_back(' ');
120+
}
121+
}
122+
};
54123

55124
static String ConvertToPrintableCharacters(const String& text) {
56125
// If the text contains all printable ASCII characters, no need for encoding.
@@ -70,9 +139,11 @@ static String ConvertToPrintableCharacters(const String& text) {
70139
// where, "utf-8" is the chosen charset to represent the text and "Q" is the
71140
// Quoted-Printable format to convert to 7-bit printable ASCII characters.
72141
CString utf8_text = text.Utf8();
142+
QuotedPrintableEncodeHeaderDelegate header_delegate;
73143
Vector<char> encoded_text;
74-
QuotedPrintableEncode(utf8_text.data(), utf8_text.length(), encoded_text);
75-
return "=?utf-8?Q?" + String(encoded_text.data(), encoded_text.size()) + "?=";
144+
QuotedPrintableEncode(utf8_text.data(), utf8_text.length(), &header_delegate,
145+
encoded_text);
146+
return String(encoded_text.data(), encoded_text.size());
76147
}
77148

78149
MHTMLArchive::MHTMLArchive() = default;
@@ -245,15 +316,15 @@ void MHTMLArchive::GenerateMHTMLPart(const String& boundary,
245316
size_t data_length = flat_data.size();
246317
Vector<char> encoded_data;
247318
if (!strcmp(content_encoding, kQuotedPrintable)) {
248-
QuotedPrintableEncode(data, data_length, encoded_data);
319+
QuotedPrintableEncodeBodyDelegate body_delegate;
320+
QuotedPrintableEncode(data, data_length, &body_delegate, encoded_data);
249321
output_buffer.Append(encoded_data.data(), encoded_data.size());
250322
output_buffer.Append("\r\n", 2u);
251323
} else {
252324
DCHECK(!strcmp(content_encoding, kBase64));
253325
// We are not specifying insertLFs = true below as it would cut the lines
254326
// with LFs and MHTML requires CRLFs.
255327
Base64Encode(data, data_length, encoded_data);
256-
const size_t kMaximumLineLength = 76;
257328
size_t index = 0;
258329
size_t encoded_data_length = encoded_data.size();
259330
do {

third_party/WebKit/Source/platform/text/QuotedPrintable.cpp

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,6 @@
3434

3535
namespace blink {
3636

37-
static const size_t kMaximumLineLength = 76;
38-
3937
static const char kCrlfLineEnding[] = "\r\n";
4038

4139
static size_t LengthOfLineEndingAtIndex(const char* input,
@@ -54,15 +52,13 @@ static size_t LengthOfLineEndingAtIndex(const char* input,
5452
return 0;
5553
}
5654

57-
void QuotedPrintableEncode(const Vector<char>& in, Vector<char>& out) {
58-
QuotedPrintableEncode(in.data(), in.size(), out);
59-
}
60-
6155
void QuotedPrintableEncode(const char* input,
6256
size_t input_length,
57+
QuotedPrintableEncodeDelegate* delegate,
6358
Vector<char>& out) {
6459
out.clear();
6560
out.ReserveCapacity(input_length);
61+
delegate->DidStartLine(out);
6662
size_t current_line_length = 0;
6763
for (size_t i = 0; i < input_length; ++i) {
6864
bool is_last_character = (i == input_length - 1);
@@ -74,13 +70,14 @@ void QuotedPrintableEncode(const char* input,
7470
current_character != '\t')
7571
requires_encoding = true;
7672

77-
// Space and tab characters have to be encoded if they appear at the end of
78-
// a line.
73+
// Decide if space and tab characters need to be encoded.
7974
if (!requires_encoding &&
80-
(current_character == '\t' || current_character == ' ') &&
81-
(is_last_character ||
82-
LengthOfLineEndingAtIndex(input, input_length, i + 1)))
83-
requires_encoding = true;
75+
(current_character == '\t' || current_character == ' ')) {
76+
bool end_of_line = is_last_character ||
77+
LengthOfLineEndingAtIndex(input, input_length, i + 1);
78+
requires_encoding =
79+
delegate->ShouldEncodeWhiteSpaceCharacters(end_of_line);
80+
}
8481

8582
// End of line should be converted to CR-LF sequences.
8683
if (!is_last_character) {
@@ -103,10 +100,10 @@ void QuotedPrintableEncode(const char* input,
103100

104101
// Insert a soft line break if necessary.
105102
if (current_line_length + length_of_encoded_character >
106-
kMaximumLineLength) {
107-
out.push_back('=');
108-
out.Append(kCrlfLineEnding, strlen(kCrlfLineEnding));
103+
delegate->GetMaxLineLengthForEncodedContent()) {
104+
delegate->DidFinishLine(false /*last_line*/, out);
109105
current_line_length = 0;
106+
delegate->DidStartLine(out);
110107
}
111108

112109
// Finally, insert the actual character(s).
@@ -120,6 +117,7 @@ void QuotedPrintableEncode(const char* input,
120117
current_line_length++;
121118
}
122119
}
120+
delegate->DidFinishLine(true /*last_line*/, out);
123121
}
124122

125123
void QuotedPrintableDecode(const Vector<char>& in, Vector<char>& out) {

third_party/WebKit/Source/platform/text/QuotedPrintable.h

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,38 @@
3636

3737
namespace blink {
3838

39-
PLATFORM_EXPORT void QuotedPrintableEncode(const Vector<char>&, Vector<char>&);
40-
PLATFORM_EXPORT void QuotedPrintableEncode(const char*, size_t, Vector<char>&);
39+
// Delegate for controling the behavior of quoted-printable encoding. The
40+
// original characters may be encoded a bit differently depending on where
41+
// they live, header or body. For example, "=CRLF" should be used to break
42+
// long line in body while "CRLF+SPACE" should be used to break long line in
43+
// header.
44+
class PLATFORM_EXPORT QuotedPrintableEncodeDelegate {
45+
public:
46+
QuotedPrintableEncodeDelegate() = default;
47+
virtual ~QuotedPrintableEncodeDelegate() = default;
48+
49+
// Returns maximum number of characters allowed for an encoded line, excluding
50+
// prefix and soft line break.
51+
virtual size_t GetMaxLineLengthForEncodedContent() const = 0;
52+
53+
// Returns true if space and tab characters need to be encoded.
54+
virtual bool ShouldEncodeWhiteSpaceCharacters(bool end_of_line) const = 0;
55+
56+
// Called when an encoded line starts. The delegate can take this chance to
57+
// add any prefix.
58+
virtual void DidStartLine(Vector<char>& out) = 0;
59+
60+
// Called when an encoded line ends. The delegate can take this chance to add
61+
// any suffix. If it is not last line, a soft line break should also
62+
// be added after the suffix.
63+
virtual void DidFinishLine(bool last_line, Vector<char>& out) = 0;
64+
};
65+
66+
PLATFORM_EXPORT void QuotedPrintableEncode(const char*,
67+
size_t,
68+
QuotedPrintableEncodeDelegate*,
69+
Vector<char>&);
4170

42-
PLATFORM_EXPORT void QuotedPrintableDecode(const Vector<char>&, Vector<char>&);
4371
PLATFORM_EXPORT void QuotedPrintableDecode(const char*, size_t, Vector<char>&);
4472

4573
} // namespace blink

0 commit comments

Comments
 (0)