From 567fbb52ab439a083ee1865eb3af66a90ba38be3 Mon Sep 17 00:00:00 2001
From: Mike Edmunds <medmunds@gmail.com>
Date: Sat, 22 Jun 2024 16:52:45 -0700
Subject: [PATCH] Amazon SES: Fix header encoding problem

A combination of long display name and
commas (or other special characters) could
result in invalid address headers.
See details in #369.

Fix by removing unnecessary email.policy
override, which was causing new header folding
code to run with headers built using Compat32
legacy header encoding. The two don't mix.

Fixes #369
---
 CHANGELOG.rst                    |  5 +++++
 anymail/backends/amazon_ses.py   | 14 ++++++--------
 tests/test_amazon_ses_backend.py | 30 +++++++++++++++++++++++++++++-
 3 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index a43e24a..e4a031f 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -65,6 +65,10 @@ Features
 Fixes
 ~~~~~
 
+* **Amazon SES:** Fix a bug that could result in sending a broken address header
+  if it had a long display name containing both non-ASCII characters and commas.
+  (Thanks to `@andresmrm`_ for isolating and reporting the issue.)
+
 * **SendGrid:** In the tracking webhook, correctly report "bounced address"
   (recipients dropped due to earlier bounces) as reject reason ``"bounced"``.
   (Thanks to `@vitaliyf`_.)
@@ -1639,6 +1643,7 @@ Features
 
 .. _@ailionx: https://github.com/ailionx
 .. _@alee: https://github.com/alee
+.. _@andresmrm: https://github.com/andresmrm
 .. _@anstosa: https://github.com/anstosa
 .. _@Arondit: https://github.com/Arondit
 .. _@b0d0nne11: https://github.com/b0d0nne11
diff --git a/anymail/backends/amazon_ses.py b/anymail/backends/amazon_ses.py
index 5c62943..53553ac 100644
--- a/anymail/backends/amazon_ses.py
+++ b/anymail/backends/amazon_ses.py
@@ -165,13 +165,10 @@ def generate_raw_message(self):
         Serialize self.mime_message as an RFC-5322/-2045 MIME message,
         encoded as 7bit-clean, us-ascii byte data.
         """
-        # Amazon SES does not support `Content-Transfer-Encoding: 8bit`. And using 8bit
-        # with SES open or click tracking results in mis-encoded characters. To avoid
-        # this, convert any 8bit parts to 7bit quoted printable or base64. (We own
-        # self.mime_message, so destructively modifying it should be OK.)
-        # (You might think cte_type="7bit" in the email.policy below would cover this,
-        # but it seems that cte_type is only examined as the MIME parts are constructed,
-        # not when an email.generator serializes them.)
+        # Amazon SES discourages `Content-Transfer-Encoding: 8bit`. And using
+        # 8bit with SES open or click tracking results in mis-encoded characters.
+        # To avoid this, convert any 8bit parts to 7bit quoted printable or base64.
+        # (We own self.mime_message, so destructively modifying it should be OK.)
         for part in self.mime_message.walk():
             if part["Content-Transfer-Encoding"] == "8bit":
                 del part["Content-Transfer-Encoding"]
@@ -181,7 +178,8 @@ def generate_raw_message(self):
                 else:
                     email.encoders.encode_base64(part)
 
-        self.mime_message.policy = email.policy.default.clone(cte_type="7bit")
+        # (All message and part headers should already be 7bit clean,
+        # so there's no need to try to override email.policy here.)
         return self.mime_message.as_bytes()
 
     def parse_recipient_status(self, response):
diff --git a/tests/test_amazon_ses_backend.py b/tests/test_amazon_ses_backend.py
index cd22df6..a9f82b1 100644
--- a/tests/test_amazon_ses_backend.py
+++ b/tests/test_amazon_ses_backend.py
@@ -318,6 +318,17 @@ def test_commas_in_subject(self):
         sent_message = self.get_sent_message()
         self.assertEqual(sent_message["Subject"], self.message.subject)
 
+    def test_broken_address_header(self):
+        # https://github.com/anymail/django-anymail/issues/369
+        self.message.to = ['"Người nhận a very very long, name" <to@example.com>']
+        self.message.cc = [
+            '"A véry long name with non-ASCII char and, comma" <cc@example.com>'
+        ]
+        self.message.send()
+        sent_message = self.get_sent_message()
+        self.assertEqual(sent_message["To"], self.message.to[0])
+        self.assertEqual(sent_message["Cc"], self.message.cc[0])
+
     def test_no_cte_8bit(self):
         """Anymail works around an Amazon SES bug that can corrupt non-ASCII bodies."""
         # (see detailed comments in the backend code)
@@ -333,12 +344,14 @@ def test_no_cte_8bit(self):
         self.message.attach(att)
 
         self.message.send()
-        sent_message = self.get_sent_message()
+        raw_mime = self.get_send_params()["Content"]["Raw"]["Data"]
+        self.assertTrue(raw_mime.isascii())  # 7-bit clean
 
         # Make sure none of the resulting parts use `Content-Transfer-Encoding: 8bit`.
         # (Technically, either quoted-printable or base64 would be OK, but base64 text
         # parts have a reputation for triggering spam filters, so just require
         # quoted-printable for them.)
+        sent_message = self.get_sent_message()
         part_encodings = [
             (part.get_content_type(), part["Content-Transfer-Encoding"])
             for part in sent_message.walk()
@@ -355,6 +368,21 @@ def test_no_cte_8bit(self):
             ],
         )
 
+    def test_no_cte_8bit_root(self):
+        # Same test as above, but with a non-multipart message using 8bit at root
+        self.message.body = "Это text body"
+        self.message.send()
+
+        raw_mime = self.get_send_params()["Content"]["Raw"]["Data"]
+        self.assertTrue(raw_mime.isascii())  # 7-bit clean
+
+        sent_message = self.get_sent_message()
+        part_encodings = [
+            (part.get_content_type(), part["Content-Transfer-Encoding"])
+            for part in sent_message.walk()
+        ]
+        self.assertEqual(part_encodings, [("text/plain", "quoted-printable")])
+
     def test_api_failure(self):
         error_response = {
             "Error": {