From 567fbb52ab439a083ee1865eb3af66a90ba38be3 Mon Sep 17 00:00:00 2001 From: Mike Edmunds Date: Sat, 22 Jun 2024 16:52:45 -0700 Subject: [PATCH] Amazon SES: Fix header encoding problem A combination of long display name and commas (or other special characters) could result in invalid address headers. See details in #369. Fix by removing unnecessary email.policy override, which was causing new header folding code to run with headers built using Compat32 legacy header encoding. The two don't mix. Fixes #369 --- CHANGELOG.rst | 5 +++++ anymail/backends/amazon_ses.py | 14 ++++++-------- tests/test_amazon_ses_backend.py | 30 +++++++++++++++++++++++++++++- 3 files changed, 40 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a43e24a..e4a031f 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -65,6 +65,10 @@ Features Fixes ~~~~~ +* **Amazon SES:** Fix a bug that could result in sending a broken address header + if it had a long display name containing both non-ASCII characters and commas. + (Thanks to `@andresmrm`_ for isolating and reporting the issue.) + * **SendGrid:** In the tracking webhook, correctly report "bounced address" (recipients dropped due to earlier bounces) as reject reason ``"bounced"``. (Thanks to `@vitaliyf`_.) @@ -1639,6 +1643,7 @@ Features .. _@ailionx: https://github.com/ailionx .. _@alee: https://github.com/alee +.. _@andresmrm: https://github.com/andresmrm .. _@anstosa: https://github.com/anstosa .. _@Arondit: https://github.com/Arondit .. _@b0d0nne11: https://github.com/b0d0nne11 diff --git a/anymail/backends/amazon_ses.py b/anymail/backends/amazon_ses.py index 5c62943..53553ac 100644 --- a/anymail/backends/amazon_ses.py +++ b/anymail/backends/amazon_ses.py @@ -165,13 +165,10 @@ def generate_raw_message(self): Serialize self.mime_message as an RFC-5322/-2045 MIME message, encoded as 7bit-clean, us-ascii byte data. """ - # Amazon SES does not support `Content-Transfer-Encoding: 8bit`. And using 8bit - # with SES open or click tracking results in mis-encoded characters. To avoid - # this, convert any 8bit parts to 7bit quoted printable or base64. (We own - # self.mime_message, so destructively modifying it should be OK.) - # (You might think cte_type="7bit" in the email.policy below would cover this, - # but it seems that cte_type is only examined as the MIME parts are constructed, - # not when an email.generator serializes them.) + # Amazon SES discourages `Content-Transfer-Encoding: 8bit`. And using + # 8bit with SES open or click tracking results in mis-encoded characters. + # To avoid this, convert any 8bit parts to 7bit quoted printable or base64. + # (We own self.mime_message, so destructively modifying it should be OK.) for part in self.mime_message.walk(): if part["Content-Transfer-Encoding"] == "8bit": del part["Content-Transfer-Encoding"] @@ -181,7 +178,8 @@ def generate_raw_message(self): else: email.encoders.encode_base64(part) - self.mime_message.policy = email.policy.default.clone(cte_type="7bit") + # (All message and part headers should already be 7bit clean, + # so there's no need to try to override email.policy here.) return self.mime_message.as_bytes() def parse_recipient_status(self, response): diff --git a/tests/test_amazon_ses_backend.py b/tests/test_amazon_ses_backend.py index cd22df6..a9f82b1 100644 --- a/tests/test_amazon_ses_backend.py +++ b/tests/test_amazon_ses_backend.py @@ -318,6 +318,17 @@ def test_commas_in_subject(self): sent_message = self.get_sent_message() self.assertEqual(sent_message["Subject"], self.message.subject) + def test_broken_address_header(self): + # https://github.com/anymail/django-anymail/issues/369 + self.message.to = ['"Người nhận a very very long, name" '] + self.message.cc = [ + '"A véry long name with non-ASCII char and, comma" ' + ] + self.message.send() + sent_message = self.get_sent_message() + self.assertEqual(sent_message["To"], self.message.to[0]) + self.assertEqual(sent_message["Cc"], self.message.cc[0]) + def test_no_cte_8bit(self): """Anymail works around an Amazon SES bug that can corrupt non-ASCII bodies.""" # (see detailed comments in the backend code) @@ -333,12 +344,14 @@ def test_no_cte_8bit(self): self.message.attach(att) self.message.send() - sent_message = self.get_sent_message() + raw_mime = self.get_send_params()["Content"]["Raw"]["Data"] + self.assertTrue(raw_mime.isascii()) # 7-bit clean # Make sure none of the resulting parts use `Content-Transfer-Encoding: 8bit`. # (Technically, either quoted-printable or base64 would be OK, but base64 text # parts have a reputation for triggering spam filters, so just require # quoted-printable for them.) + sent_message = self.get_sent_message() part_encodings = [ (part.get_content_type(), part["Content-Transfer-Encoding"]) for part in sent_message.walk() @@ -355,6 +368,21 @@ def test_no_cte_8bit(self): ], ) + def test_no_cte_8bit_root(self): + # Same test as above, but with a non-multipart message using 8bit at root + self.message.body = "Это text body" + self.message.send() + + raw_mime = self.get_send_params()["Content"]["Raw"]["Data"] + self.assertTrue(raw_mime.isascii()) # 7-bit clean + + sent_message = self.get_sent_message() + part_encodings = [ + (part.get_content_type(), part["Content-Transfer-Encoding"]) + for part in sent_message.walk() + ] + self.assertEqual(part_encodings, [("text/plain", "quoted-printable")]) + def test_api_failure(self): error_response = { "Error": {