From 8b7340a0cc12691e10e6fde0322a80f8c5561381 Mon Sep 17 00:00:00 2001 From: Maxim Vladimirskiy Date: Tue, 10 Apr 2018 23:16:29 +0300 Subject: [PATCH] Support Python 3 --- .travis.yml | 2 +- CHANGELOG.md | 7 +- build.sh | 22 ---- flanker/addresslib/parsetab.py | 90 --------------- flanker/dkim.py | 122 +++++++++++---------- flanker/mime/message/charsets.py | 5 +- flanker/mime/message/part.py | 140 ++++++++++++++++-------- flanker/mime/message/scanner.py | 9 +- tests/addresslib/parser_mailbox_test.py | 2 +- tests/dkim_test.py | 25 +++-- tests/mime/message/create_test.py | 24 ++-- tests/mime/message/part_test.py | 56 ++++++---- tests/mime/message/scanner_test.py | 15 ++- 13 files changed, 250 insertions(+), 269 deletions(-) delete mode 100755 build.sh delete mode 100644 flanker/addresslib/parsetab.py diff --git a/.travis.yml b/.travis.yml index 8213030b..a859609f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,6 +5,6 @@ python: install: - pip install -e .[cchardet,validator] - pip install nose mock coverage coveralls -script: ./build.sh +script: nosetests --with-coverage --cover-package=flanker after_success: - coveralls diff --git a/CHANGELOG.md b/CHANGELOG.md index 77d1b734..23e3155a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,10 +5,15 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Changed +- Support for Python 3 was added with preserving the Python 2 behavior in mind. + As a result Python 3 specific logic may be not that efficient due to extra + conversions between text and bytes, but that is left for future improvements; +- CRLF is now consistently used when a parsed mime is serialized into a string. - Dependency on cchardet was made optional. Ported from [PR84](https://github.com/mailgun/flanker/pull/84) - [PR94](https://github.com/mailgun/flanker/pull/94) Local Redis cache was made configurable via environment variables REDIS_HOST, REDIS_PORT, and REDIS_DB - with the defaults matching the original behavior + with the defaults matching the original behavior. ## [0.8.5] - 2018-03-30 ### Changed diff --git a/build.sh b/build.sh deleted file mode 100755 index fee3e487..00000000 --- a/build.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -# Make sure the script fails fast. -set -e -set -u - -if [[ ${TRAVIS_PYTHON_VERSION} == 2.7* ]]; then - nosetests --with-coverage --cover-package=flanker -else - nosetests --with-coverage --cover-package=flanker \ - tests/addresslib \ - tests/mime/bounce_tests.py \ - tests/mime/message/fallback \ - tests/mime/message/headers \ - tests/mime/message/threading_test.py \ - tests/mime/message/tokenizer_test.py \ - tests/mime/message/headers/encodedword_test.py \ - tests/mime/message/headers/headers_test.py \ - tests/mime/message/headers/parametrized_test.py \ - tests/mime/message/headers/parsing_test.py \ - tests/mime/message/headers/wrappers_test.py -fi diff --git a/flanker/addresslib/parsetab.py b/flanker/addresslib/parsetab.py deleted file mode 100644 index c57bbeba..00000000 --- a/flanker/addresslib/parsetab.py +++ /dev/null @@ -1,90 +0,0 @@ - -# parsetab.py -# This file is automatically generated. Do not edit. -_tabversion = '3.10' - -_lr_method = 'LALR' - -_lr_signature = 'mailbox_or_urlFWSP AT DOT COMMA SEMICOLON LANGLE RANGLE ATOM DOT_ATOM LBRACKET RBRACKET DTEXT DQUOTE QTEXT QPAIR LPAREN RPAREN CTEXT URLmailbox_or_url_list : mailbox_or_url_list delim mailbox_or_url\n | mailbox_or_url_list delim\n | mailbox_or_urldelim : delim fwsp COMMA\n | delim fwsp SEMICOLON\n | COMMA\n | SEMICOLONmailbox_or_url : mailbox\n | urlurl : ofwsp URL ofwspmailbox : addr_spec\n | angle_addr\n | name_addrname_addr : ofwsp phrase angle_addrangle_addr : ofwsp LANGLE addr_spec RANGLE ofwspaddr_spec : ofwsp local_part AT domain ofwsplocal_part : DOT_ATOM\n | ATOM\n | quoted_stringdomain : DOT_ATOM\n | ATOM\n | domain_literalquoted_string : DQUOTE quoted_string_text DQUOTE\n | DQUOTE DQUOTEquoted_string_text : quoted_string_text QTEXT\n | quoted_string_text QPAIR\n | quoted_string_text fwsp\n | QTEXT\n | QPAIR\n | fwspdomain_literal : LBRACKET domain_literal_text RBRACKET\n | LBRACKET RBRACKETdomain_literal_text : domain_literal_text DTEXT\n | domain_literal_text fwsp\n | DTEXT\n | fwspcomment : LPAREN comment_text RPAREN\n | LPAREN RPARENcomment_text : comment_text CTEXT\n | comment_text fwsp\n | CTEXT\n | fwspphrase : phrase fwsp ATOM\n | phrase fwsp DOT_ATOM\n | phrase fwsp DOT\n | phrase fwsp quoted_string\n | phrase ATOM\n | phrase DOT_ATOM\n | phrase DOT\n | phrase quoted_string\n | ATOM\n | DOT_ATOM\n | DOT\n | quoted_stringofwsp : fwsp comment fwsp\n | fwsp comment\n | comment fwsp\n | comment\n | fwsp\n |fwsp : FWSP' - -_lr_action_items = {'FWSP':([0,2,7,11,12,14,15,17,18,19,20,21,22,23,24,25,26,32,33,34,35,36,40,41,42,43,44,45,46,50,51,52,53,54,55,56,57,58,59,60,61,62,63,66,67,68,69,70,71,72,],[7,7,-61,7,7,7,7,-51,7,-52,7,-54,-53,-42,7,-38,-41,-30,-29,-28,-24,7,-48,-47,-50,-49,-40,-37,-39,7,7,7,-20,-21,-22,-27,-26,-25,-23,-44,-43,-46,-45,-36,-35,7,-32,-34,-33,-31,]),'LANGLE':([0,1,2,4,7,12,13,17,19,20,21,22,25,27,35,37,38,40,41,42,43,45,59,60,61,62,63,],[-60,-59,-58,14,-61,-56,-57,-51,-52,-60,-54,-53,-38,-55,-24,-59,14,-48,-47,-50,-49,-37,-23,-44,-43,-46,-45,]),'QPAIR':([7,18,32,33,34,36,56,57,58,],[-61,33,-30,-29,-28,57,-27,-26,-25,]),'URL':([0,1,2,4,7,12,13,25,27,45,],[-60,-59,-58,15,-61,-56,-57,-38,-55,-37,]),'QTEXT':([7,18,32,33,34,36,56,57,58,],[-61,34,-30,-29,-28,58,-27,-26,-25,]),'DTEXT':([7,52,66,67,68,70,71,],[-61,67,-36,-35,71,-34,-33,]),'DQUOTE':([0,1,2,4,7,12,13,14,17,18,19,20,21,22,25,27,28,32,33,34,35,36,37,40,41,42,43,45,56,57,58,59,60,61,62,63,],[-60,-59,-58,18,-61,-56,-57,-60,-51,35,-52,18,-54,-53,-38,-55,18,-30,-29,-28,-24,59,18,-48,-47,-50,-49,-37,-27,-26,-25,-23,-44,-43,-46,-45,]),'LBRACKET':([31,],[52,]),'DOT_ATOM':([0,1,2,4,7,12,13,14,17,19,20,21,22,25,27,28,31,35,37,40,41,42,43,45,59,60,61,62,63,],[-60,-59,-58,19,-61,-56,-57,-60,-51,-52,40,-54,-53,-38,-55,47,53,-24,60,-48,-47,-50,-49,-37,-23,-44,-43,-46,-45,]),'RPAREN':([7,11,23,24,26,44,46,],[-61,25,-42,45,-41,-40,-39,]),'AT':([16,17,19,21,35,47,48,49,59,],[31,-18,-17,-19,-24,-17,-18,-19,-23,]),'LPAREN':([0,1,7,14,15,17,19,20,21,22,35,37,40,41,42,43,50,51,53,54,55,59,60,61,62,63,69,72,],[11,11,-61,11,11,-51,-52,11,-54,-53,-24,11,-48,-47,-50,-49,11,11,-20,-21,-22,-23,-44,-43,-46,-45,-32,-31,]),'ATOM':([0,1,2,4,7,12,13,14,17,19,20,21,22,25,27,28,31,35,37,40,41,42,43,45,59,60,61,62,63,],[-60,-59,-58,17,-61,-56,-57,-60,-51,-52,41,-54,-53,-38,-55,48,54,-24,61,-48,-47,-50,-49,-37,-23,-44,-43,-46,-45,]),'RANGLE':([1,2,7,12,13,25,27,29,45,51,53,54,55,65,69,72,],[-59,-58,-61,-56,-57,-38,-55,50,-37,-60,-20,-21,-22,-16,-32,-31,]),'RBRACKET':([7,52,66,67,68,70,71,],[-61,69,-36,-35,72,-34,-33,]),'CTEXT':([7,11,23,24,26,44,46,],[-61,26,-42,46,-41,-40,-39,]),'DOT':([0,1,2,4,7,12,13,17,19,20,21,22,25,27,35,37,40,41,42,43,45,59,60,61,62,63,],[-60,-59,-58,22,-61,-56,-57,-51,-52,43,-54,-53,-38,-55,-24,63,-48,-47,-50,-49,-37,-23,-44,-43,-46,-45,]),'$end':([1,2,3,5,6,7,8,9,10,12,13,15,25,27,30,39,45,50,51,53,54,55,64,65,69,72,],[-59,-58,-13,-12,0,-61,-8,-9,-11,-56,-57,-60,-38,-55,-10,-14,-37,-60,-60,-20,-21,-22,-15,-16,-32,-31,]),} - -_lr_action = {} -for _k, _v in _lr_action_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_action: _lr_action[_x] = {} - _lr_action[_x][_k] = _y -del _lr_action_items - -_lr_goto_items = {'fwsp':([0,2,11,12,14,15,18,20,24,36,50,51,52,68,],[1,13,23,27,1,1,32,37,44,56,1,1,66,70,]),'comment':([0,1,14,15,20,37,50,51,],[2,12,2,2,2,12,2,2,]),'domain':([31,],[51,]),'comment_text':([11,],[24,]),'name_addr':([0,],[3,]),'ofwsp':([0,14,15,20,50,51,],[4,28,30,38,64,65,]),'angle_addr':([0,20,],[5,39,]),'mailbox_or_url':([0,],[6,]),'local_part':([4,28,],[16,16,]),'domain_literal_text':([52,],[68,]),'mailbox':([0,],[8,]),'quoted_string_text':([18,],[36,]),'url':([0,],[9,]),'addr_spec':([0,14,],[10,29,]),'phrase':([4,],[20,]),'quoted_string':([4,20,28,37,],[21,42,49,62,]),'domain_literal':([31,],[55,]),} - -_lr_goto = {} -for _k, _v in _lr_goto_items.items(): - for _x, _y in zip(_v[0], _v[1]): - if not _x in _lr_goto: _lr_goto[_x] = {} - _lr_goto[_x][_k] = _y -del _lr_goto_items -_lr_productions = [ - ("S' -> mailbox_or_url","S'",1,None,None,None), - ('mailbox_or_url_list -> mailbox_or_url_list delim mailbox_or_url','mailbox_or_url_list',3,'p_expression_mailbox_or_url_list','parser.py',18), - ('mailbox_or_url_list -> mailbox_or_url_list delim','mailbox_or_url_list',2,'p_expression_mailbox_or_url_list','parser.py',19), - ('mailbox_or_url_list -> mailbox_or_url','mailbox_or_url_list',1,'p_expression_mailbox_or_url_list','parser.py',20), - ('delim -> delim fwsp COMMA','delim',3,'p_delim','parser.py',29), - ('delim -> delim fwsp SEMICOLON','delim',3,'p_delim','parser.py',30), - ('delim -> COMMA','delim',1,'p_delim','parser.py',31), - ('delim -> SEMICOLON','delim',1,'p_delim','parser.py',32), - ('mailbox_or_url -> mailbox','mailbox_or_url',1,'p_expression_mailbox_or_url','parser.py',35), - ('mailbox_or_url -> url','mailbox_or_url',1,'p_expression_mailbox_or_url','parser.py',36), - ('url -> ofwsp URL ofwsp','url',3,'p_expression_url','parser.py',40), - ('mailbox -> addr_spec','mailbox',1,'p_expression_mailbox','parser.py',44), - ('mailbox -> angle_addr','mailbox',1,'p_expression_mailbox','parser.py',45), - ('mailbox -> name_addr','mailbox',1,'p_expression_mailbox','parser.py',46), - ('name_addr -> ofwsp phrase angle_addr','name_addr',3,'p_expression_name_addr','parser.py',50), - ('angle_addr -> ofwsp LANGLE addr_spec RANGLE ofwsp','angle_addr',5,'p_expression_angle_addr','parser.py',54), - ('addr_spec -> ofwsp local_part AT domain ofwsp','addr_spec',5,'p_expression_addr_spec','parser.py',58), - ('local_part -> DOT_ATOM','local_part',1,'p_expression_local_part','parser.py',62), - ('local_part -> ATOM','local_part',1,'p_expression_local_part','parser.py',63), - ('local_part -> quoted_string','local_part',1,'p_expression_local_part','parser.py',64), - ('domain -> DOT_ATOM','domain',1,'p_expression_domain','parser.py',68), - ('domain -> ATOM','domain',1,'p_expression_domain','parser.py',69), - ('domain -> domain_literal','domain',1,'p_expression_domain','parser.py',70), - ('quoted_string -> DQUOTE quoted_string_text DQUOTE','quoted_string',3,'p_expression_quoted_string','parser.py',74), - ('quoted_string -> DQUOTE DQUOTE','quoted_string',2,'p_expression_quoted_string','parser.py',75), - ('quoted_string_text -> quoted_string_text QTEXT','quoted_string_text',2,'p_expression_quoted_string_text','parser.py',82), - ('quoted_string_text -> quoted_string_text QPAIR','quoted_string_text',2,'p_expression_quoted_string_text','parser.py',83), - ('quoted_string_text -> quoted_string_text fwsp','quoted_string_text',2,'p_expression_quoted_string_text','parser.py',84), - ('quoted_string_text -> QTEXT','quoted_string_text',1,'p_expression_quoted_string_text','parser.py',85), - ('quoted_string_text -> QPAIR','quoted_string_text',1,'p_expression_quoted_string_text','parser.py',86), - ('quoted_string_text -> fwsp','quoted_string_text',1,'p_expression_quoted_string_text','parser.py',87), - ('domain_literal -> LBRACKET domain_literal_text RBRACKET','domain_literal',3,'p_expression_domain_literal','parser.py',91), - ('domain_literal -> LBRACKET RBRACKET','domain_literal',2,'p_expression_domain_literal','parser.py',92), - ('domain_literal_text -> domain_literal_text DTEXT','domain_literal_text',2,'p_expression_domain_literal_text','parser.py',99), - ('domain_literal_text -> domain_literal_text fwsp','domain_literal_text',2,'p_expression_domain_literal_text','parser.py',100), - ('domain_literal_text -> DTEXT','domain_literal_text',1,'p_expression_domain_literal_text','parser.py',101), - ('domain_literal_text -> fwsp','domain_literal_text',1,'p_expression_domain_literal_text','parser.py',102), - ('comment -> LPAREN comment_text RPAREN','comment',3,'p_expression_comment','parser.py',106), - ('comment -> LPAREN RPAREN','comment',2,'p_expression_comment','parser.py',107), - ('comment_text -> comment_text CTEXT','comment_text',2,'p_expression_comment_text','parser.py',111), - ('comment_text -> comment_text fwsp','comment_text',2,'p_expression_comment_text','parser.py',112), - ('comment_text -> CTEXT','comment_text',1,'p_expression_comment_text','parser.py',113), - ('comment_text -> fwsp','comment_text',1,'p_expression_comment_text','parser.py',114), - ('phrase -> phrase fwsp ATOM','phrase',3,'p_expression_phrase','parser.py',118), - ('phrase -> phrase fwsp DOT_ATOM','phrase',3,'p_expression_phrase','parser.py',119), - ('phrase -> phrase fwsp DOT','phrase',3,'p_expression_phrase','parser.py',120), - ('phrase -> phrase fwsp quoted_string','phrase',3,'p_expression_phrase','parser.py',121), - ('phrase -> phrase ATOM','phrase',2,'p_expression_phrase','parser.py',122), - ('phrase -> phrase DOT_ATOM','phrase',2,'p_expression_phrase','parser.py',123), - ('phrase -> phrase DOT','phrase',2,'p_expression_phrase','parser.py',124), - ('phrase -> phrase quoted_string','phrase',2,'p_expression_phrase','parser.py',125), - ('phrase -> ATOM','phrase',1,'p_expression_phrase','parser.py',126), - ('phrase -> DOT_ATOM','phrase',1,'p_expression_phrase','parser.py',127), - ('phrase -> DOT','phrase',1,'p_expression_phrase','parser.py',128), - ('phrase -> quoted_string','phrase',1,'p_expression_phrase','parser.py',129), - ('ofwsp -> fwsp comment fwsp','ofwsp',3,'p_expression_ofwsp','parser.py',138), - ('ofwsp -> fwsp comment','ofwsp',2,'p_expression_ofwsp','parser.py',139), - ('ofwsp -> comment fwsp','ofwsp',2,'p_expression_ofwsp','parser.py',140), - ('ofwsp -> comment','ofwsp',1,'p_expression_ofwsp','parser.py',141), - ('ofwsp -> fwsp','ofwsp',1,'p_expression_ofwsp','parser.py',142), - ('ofwsp -> ','ofwsp',0,'p_expression_ofwsp','parser.py',143), - ('fwsp -> FWSP','fwsp',1,'p_expression_fwsp','parser.py',147), -] diff --git a/flanker/dkim.py b/flanker/dkim.py index e0d5416a..4c65547d 100644 --- a/flanker/dkim.py +++ b/flanker/dkim.py @@ -1,5 +1,6 @@ import base64 import regex as re +import six import time from cryptography.hazmat.backends import default_backend @@ -7,66 +8,66 @@ from cryptography.hazmat.primitives.asymmetric import padding -_BODY_TRAILING_WSP = re.compile(r"[\t ]+\r\n") -_BODY_WSP_RE = re.compile(r"[\t ]+") +_BODY_TRAILING_WSP = re.compile(br"[\t ]+\r\n") +_BODY_WSP_RE = re.compile(br"[\t ]+") class SimpleCanonicalization(object): - name = "simple" + name = b"simple" def canonicalize_header(self, header, value): return header, value def canonicalize_body(self, body): - return body.rstrip("\r\n") + "\r\n" + return body.rstrip(b"\r\n") + b"\r\n" class RelaxedCanonicalization(object): - name = "relaxed" + name = b"relaxed" def canonicalize_header(self, header, value): header = header.lower() - value = _BODY_WSP_RE.sub(" ", value.replace("\r\n", "")) + value = _BODY_WSP_RE.sub(b" ", value.replace(b"\r\n", b"")) return header, value.strip() + b"\r\n" def canonicalize_body(self, body): - body = _BODY_TRAILING_WSP.sub("\r\n", body) - body = _BODY_WSP_RE.sub(" ", body) - body = body.rstrip("\r\n") + body = _BODY_TRAILING_WSP.sub(b"\r\n", body) + body = _BODY_WSP_RE.sub(b" ", body) + body = body.rstrip(b"\r\n") return body + b"\r\n" if body else b"" class NoFWSCanonicalization(object): - _header_fws_re = re.compile(r"[\t \r\n]+") - _body_orphan_cr_re = re.compile(b"\r([^\n])") + _header_fws_re = re.compile(br"[\t \r\n]+") + _body_orphan_cr_re = re.compile(br"\r([^\n])") def canonicalize_header(self, header, value): - return header, self._header_fws_re.sub("", value) + "\r\n" + return header, self._header_fws_re.sub(b"", value) + b"\r\n" def canonicalize_body(self, body): - body = _BODY_WSP_RE.sub("", body) - body = self._body_orphan_cr_re.sub(r"\1", body) + body = _BODY_WSP_RE.sub(b"", body) + body = self._body_orphan_cr_re.sub(br"\1", body) body = body.rstrip() - return body + "\r\n" if body else "" + return body + b"\r\n" if body else b"" def _fold(header): """Fold a header line into multiple crlf-separated lines at column 72.""" - i = header.rfind("\r\n ") + i = header.rfind(b"\r\n ") if i == -1: - pre = "" + pre = b"" else: i += 3 pre = header[:i] header = header[i:] while len(header) > 72: - i = header[:72].rfind(" ") + i = header[:72].rfind(b" ") if i == -1: i = j = 72 else: j = i + 1 - pre += header[:i] + "\r\n " + pre += header[:i] + b"\r\n " header = header[j:] return pre + header @@ -74,14 +75,24 @@ def _fold(header): class DomainKeySigner(object): def __init__(self, key, selector, domain, signed_headers=None): self._key = key + self._selector = selector + if six.PY3 and isinstance(selector, six.text_type): + self._selector = selector.encode('utf-8') + self._domain = domain + if six.PY3 and isinstance(domain, six.text_type): + self._domain = domain.encode('utf-8') + self._signed_headers = None def sign(self, message): canonicalization = NoFWSCanonicalization() signer = self._key.signer(padding.PKCS1v15(), hashes.SHA1()) + if six.PY3 and isinstance(message, six.text_type): + message = message.encode('utf-8') + headers, body = _rfc822_parse(message) h_field = [] @@ -99,14 +110,12 @@ def sign(self, message): signer.update(b"\r\n") signer.update(body) - return _fold( - b"DomainKey-Signature: a=rsa-sha1; c=nofws; d={domain}; " - b"s={selector}; q=dns; h={headers}; b={signature}".format( - domain=self._domain, - selector=self._selector, - headers=b": ".join(h_field), - signature=base64.b64encode(signer.finalize()) - )) + b"\r\n" + return _fold(b"DomainKey-Signature: a=rsa-sha1; c=nofws; d=%s; s=%s;" + b" q=dns; h=%s; b=%s" + % (self._domain, + self._selector, + b": ".join(h_field), + base64.b64encode(signer.finalize()))) + b"\r\n" class DKIMSigner(object): @@ -115,8 +124,15 @@ def __init__(self, key, selector, domain, body_canonicalization=SimpleCanonicalization(), signed_headers=None): self._key = key + self._selector = selector + if six.PY3 and isinstance(selector, six.text_type): + self._selector = selector.encode('utf-8') + self._domain = domain + if six.PY3 and isinstance(domain, six.text_type): + self._domain = domain.encode('utf-8') + self._header_canonicalization = header_canonicalization self._body_canonicalization = body_canonicalization self._signed_headers = signed_headers @@ -141,34 +157,27 @@ def sign(self, message, current_time=None): h = hashes.Hash(hashes.SHA256(), backend=default_backend()) h.update(self._body_canonicalization.canonicalize_body(body)) - dkim_header_value = _fold( - b" a=rsa-sha256; v=1; " - b"c={header_canonicalization.name}/{body_canonicalization.name}; " - b"d={domain}; q=dns/txt; s={selector}; t={time}; h={headers}; " - b"bh={body_hash}; b=".format( - header_canonicalization=self._header_canonicalization, - body_canonicalization=self._body_canonicalization, - domain=self._domain, - selector=self._selector, - time=current_time, - headers=": ".join(h_field), - body_hash=base64.b64encode(h.finalize()), - ) - ) + dkim_header_value = _fold(b" a=rsa-sha256; v=1; c=%s/%s; d=%s; " + b"q=dns/txt; s=%s; t=%d; h=%s; bh=%s; b=" + % (self._header_canonicalization.name, + self._body_canonicalization.name, + self._domain, + self._selector, + current_time, + b": ".join(h_field), + base64.b64encode(h.finalize()))) h, v = self._header_canonicalization.canonicalize_header( - "DKIM-Signature", dkim_header_value) + b"DKIM-Signature", dkim_header_value) signer.update(h) signer.update(b":") signer.update(v) - return b"DKIM-Signature:{dkim_header}{signature}\r\n".format( - dkim_header=v, - signature=_fold(base64.b64encode(signer.finalize())) - ) + return b"DKIM-Signature:%s%s\r\n" % ( + v, _fold(base64.b64encode(signer.finalize()))) -_RFC822_NEWLINE_RE = re.compile(r"\r?\n") -_RFC822_WS_RE = re.compile(r"[\t ]") -_RFC822_HEADER_RE = re.compile(r"([\x21-\x7e]+?):") +_RFC822_NEWLINE_RE = re.compile(br"\r?\n") +_RFC822_WS_RE = re.compile(br"[\t ]") +_RFC822_HEADER_RE = re.compile(br"([\x21-\x7e]+?):") def _rfc822_parse(message): @@ -181,17 +190,16 @@ def _rfc822_parse(message): # blank line. i += 1 break - if _RFC822_WS_RE.match(lines[i][0]): - headers[-1][1] += lines[i] + "\r\n" + if _RFC822_WS_RE.match(lines[i][:1]): + headers[-1][1] += lines[i] + b"\r\n" else: m = _RFC822_HEADER_RE.match(lines[i]) if m is not None: - headers.append([m.group(1), lines[i][m.end(0):] + "\r\n"]) - elif lines[i].startswith("From "): + headers.append([m.group(1), lines[i][m.end(0):] + b"\r\n"]) + elif lines[i].startswith(b"From "): pass else: - raise ValueError( - "Unexpected characters in RFC822 header: %s" % lines[i] - ) + raise ValueError("Unexpected characters in RFC822 header: %s" + % lines[i]) i += 1 - return (headers, "\r\n".join(lines[i:])) + return (headers, b"\r\n".join(lines[i:])) diff --git a/flanker/mime/message/charsets.py b/flanker/mime/message/charsets.py index cb8f3aae..2aed9327 100644 --- a/flanker/mime/message/charsets.py +++ b/flanker/mime/message/charsets.py @@ -13,10 +13,7 @@ def convert_to_unicode(charset, value): if isinstance(value, six.text_type): - if six.PY2: - return value - - value = value.encode('ascii') + return value charset = _ensure_charset(charset) value = to_unicode(value, charset) diff --git a/flanker/mime/message/part.py b/flanker/mime/message/part.py index d0488333..3a795ebf 100644 --- a/flanker/mime/message/part.py +++ b/flanker/mime/message/part.py @@ -7,6 +7,7 @@ from os import path import six +from six.moves import StringIO from flanker import metrics, _email from flanker.mime import bounce @@ -21,6 +22,7 @@ CTE = WithParams('7bit', {}) + class Stream(object): def __init__(self, content_type, start, end, string, stream): @@ -69,7 +71,7 @@ def _load_body(self): if self._body is None: self._load_headers() self.stream.seek(self._body_start) - self._body = decode_body( + self._body = _decode_body( self.content_type, self.headers.get('Content-Transfer-Encoding', CTE).value, self.stream.read(self.end - self._body_start + 1)) @@ -107,7 +109,11 @@ def adjust_content_type(content_type, body=None, filename=None): content_type = ContentType(main, sub) if content_type.main == 'image' and body: - sub = imghdr.what(None, body) + image_preamble = body[:32] + if six.PY3 and isinstance(body, six.text_type): + image_preamble = image_preamble.encode('utf-8', 'ignore') + + sub = imghdr.what(None, image_preamble) if sub: content_type = ContentType('image', sub) @@ -125,11 +131,11 @@ def _guess_type(filename): that heuristic content type checker get wrong. """ - if filename.endswith(".bz2"): - return ContentType("application", "x-bzip2") + if filename.endswith('.bz2'): + return ContentType('application', 'x-bzip2') - if filename.endswith(".gz"): - return ContentType("application", "x-gzip") + if filename.endswith('.gz'): + return ContentType('application', 'x-gzip') return None @@ -152,7 +158,7 @@ def __init__(self, content_type, body, charset=None, disposition=None, if content_type.main == 'text': # the text should have a charset if not charset: - charset = "utf-8" + charset = 'utf-8' # it should be stored as unicode. period if isinstance(body, six.binary_type): @@ -223,8 +229,8 @@ def message_id(self): @message_id.setter def message_id(self, value): if not MessageId.is_valid(value): - raise ValueError("invalid message id format") - self.headers['Message-Id'] = "<{0}>".format(value) + raise ValueError('invalid message id format') + self.headers['Message-Id'] = '<{0}>'.format(value) @property def subject(self): @@ -351,7 +357,7 @@ def get_attached_message(self): for p in part.walk(): return p except Exception: - log.exception("Failed to get attached message") + log.exception('Failed to get attached message') return None def remove_headers(self, *header_names): @@ -378,7 +384,7 @@ def is_bounce(self, probability=0.3): return self.bounce.is_bounce(probability) def __str__(self): - return "({0})".format(self.content_type) + return '({0})'.format(self.content_type) class MimePart(RichPartMixin): @@ -407,7 +413,7 @@ def size(self): def headers(self): """Returns multi dictionary with headers converted to unicode, headers like Content-Type, Content-Disposition are tuples - ("value", {"param": "val"})""" + ('value', {'param': 'val'})""" return self._container.headers @property @@ -468,11 +474,11 @@ def to_string(self): # we submit the original string, # no copying, no alternation, yeah! if self.is_root() and not self.was_changed(ignore_prepends=True): - with closing(six.StringIO()) as out: + with closing(StringIO()) as out: self._container._stream_prepended_headers(out) return out.getvalue() + self._container.string else: - with closing(six.StringIO()) as out: + with closing(StringIO()) as out: self.to_stream(out) return out.getvalue() @@ -518,7 +524,6 @@ def enclose(self, message): self.enclosed = message message.set_root(False) - def _to_stream_when_changed(self, out): ctype = self.content_type @@ -526,7 +531,7 @@ def _to_stream_when_changed(self, out): if ctype.is_singlepart(): if self._container.body_changed(): - charset, encoding, body = encode_body(self) + charset, encoding, body = _encode_body(self) if charset: self.charset = charset self.content_encoding = WithParams(encoding) @@ -537,35 +542,35 @@ def _to_stream_when_changed(self, out): if self.headers: self.headers.to_stream(out) elif self.is_root(): - raise EncodingError("Root message should have headers") + raise EncodingError('Root message should have headers') - out.write(CRLF) + out.write(_CRLF) out.write(body) else: self.headers.to_stream(out) - out.write(CRLF) + out.write(_CRLF) if ctype.is_multipart(): boundary = ctype.get_boundary_line() for index, part in enumerate(self.parts): out.write( - (CRLF if index != 0 else "") + boundary + CRLF) + (_CRLF if index != 0 else '') + boundary + _CRLF) part.to_stream(out) - out.write(CRLF + ctype.get_boundary_line(final=True) + CRLF) + out.write(_CRLF + ctype.get_boundary_line(final=True) + _CRLF) elif ctype.is_message_container(): self.enclosed.to_stream(out) -def decode_body(content_type, content_encoding, body): +def _decode_body(content_type, content_encoding, body): # decode the transfer encoding - body = decode_transfer_encoding(content_encoding, body) + body = _decode_transfer_encoding(content_encoding, body) # decode the charset next - return decode_charset(content_type, body) + return _decode_charset(content_type, body) -def decode_transfer_encoding(encoding, body): +def _decode_transfer_encoding(encoding, body): if encoding == 'base64': return _base64_decode(body) elif encoding == 'quoted-printable': @@ -573,7 +578,8 @@ def decode_transfer_encoding(encoding, body): else: return body -def decode_charset(ctype, body): + +def _decode_charset(ctype, body): if ctype.main != 'text': return body @@ -583,24 +589,24 @@ def decode_charset(ctype, body): # for text/html unicode bodies make sure to replace # the whitespace (0xA0) with   Outlook is reported to # have a bug there - if ctype.sub =='html' and charset == 'utf-8': + if ctype.sub == 'html' and charset == 'utf-8': # Outlook bug body = body.replace(u'\xa0', u' ') return body -def encode_body(part): +def _encode_body(part): content_type = part.content_type content_encoding = part.content_encoding.value body = part._container.body charset = content_type.get_charset() if content_type.main == 'text': - charset, body = encode_charset(charset, body) + charset, body = _encode_charset(charset, body) if not part.is_attachment(): - content_encoding = choose_text_encoding(charset, content_encoding, - body) + content_encoding = _choose_text_encoding(charset, content_encoding, + body) # report which text encoding is chosen metrics.incr('encoding.' + content_encoding) else: @@ -608,11 +614,11 @@ def encode_body(part): else: content_encoding = 'base64' - body = encode_transfer_encoding(content_encoding, body) + body = _encode_transfer_encoding(content_encoding, body) return charset, content_encoding, body -def encode_charset(preferred_charset, text): +def _encode_charset(preferred_charset, text): try: charset = preferred_charset or 'ascii' text = text.encode(preferred_charset) @@ -622,7 +628,24 @@ def encode_charset(preferred_charset, text): return charset, text -def encode_transfer_encoding(encoding, body): +def _encode_transfer_encoding(encoding, body): + if six.PY3: + if encoding == 'quoted-printable': + body = quopri.encodestring(body, quotetabs=False) + return body.decode('utf-8') + + if encoding == 'base64': + if isinstance(body, six.text_type): + body = body.encode('utf-8') + + body = _email.encode_base64(body) + return body.decode('utf-8') + + if six.PY3 and isinstance(body, six.binary_type): + return body.decode('utf-8') + + return body + if encoding == 'quoted-printable': return quopri.encodestring(body, quotetabs=False) elif encoding == 'base64': @@ -631,18 +654,18 @@ def encode_transfer_encoding(encoding, body): return body -def choose_text_encoding(charset, preferred_encoding, body): +def _choose_text_encoding(charset, preferred_encoding, body): if charset in ('ascii', 'iso-8859-1', 'us-ascii'): if has_long_lines(body): - return stronger_encoding(preferred_encoding, 'quoted-printable') + return _stronger_encoding(preferred_encoding, 'quoted-printable') else: return preferred_encoding else: - encoding = stronger_encoding(preferred_encoding, 'quoted-printable') + encoding = _stronger_encoding(preferred_encoding, 'quoted-printable') return encoding -def stronger_encoding(a, b): +def _stronger_encoding(a, b): weights = {'7bit': 0, 'quoted-printable': 1, 'base64': 1, '8bit': 3} if weights.get(a, -1) >= weights[b]: return a @@ -652,7 +675,7 @@ def stronger_encoding(a, b): def has_long_lines(text, max_line_len=599): """ Returns True if text contains lines longer than a certain length. - Some SMTP servers (Exchange) refuse to accept messages "wider" than + Some SMTP servers (Exchange) refuse to accept messages 'wider' than certain length. """ if not text: @@ -668,38 +691,65 @@ def _base64_decode(s): try: return base64.b64decode(s) - except TypeError: - s = s.translate(None, _b64_invalid_chars) + except (TypeError, ValueError): + s = _recover_base64(s) tail_size = len(s) & 3 if tail_size == 1: # crop last character as adding padding does not help return base64.b64decode(s[:-1]) # add padding - return base64.b64decode(s + "=" * (4 - tail_size)) + return base64.b64decode(s + '=' * (4 - tail_size)) class _CounterIO(object): + def __init__(self): self.length = 0 + def tell(self): return self.length + def write(self, s): self.length += len(s) + def seek(self, p): self.length = p + def getvalue(self): return self.length + def close(self): pass -CRLF = "\r\n" +_CRLF = '\r\n' # To recover base64 we need to translate the part to the base64 alphabet. -_b64_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" -_b64_invalid_chars = "" +_b64_alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' +_b64_invalid_chars = '' for ch in range(256): if chr(ch) not in _b64_alphabet: _b64_invalid_chars += chr(ch) + + +def _recover_base64(s): + if six.PY2: + return s.translate(None, _b64_invalid_chars) + + buf = StringIO() + chunk_start = 0 + for i, c in enumerate(s): + if (('A' <= c <= 'Z') or + ('a' <= c <= 'z') or + ('0' <= c <= '9') or + c == '+' or c == '/' + ): + continue + + buf.write(s[chunk_start:i]) + chunk_start = i + 1 + + buf.write(s[chunk_start:len(s)]) + return buf.getvalue() diff --git a/flanker/mime/message/scanner.py b/flanker/mime/message/scanner.py index e90a9c27..f854a1f8 100644 --- a/flanker/mime/message/scanner.py +++ b/flanker/mime/message/scanner.py @@ -3,10 +3,12 @@ import regex as re import six +from six.moves import StringIO from flanker.mime.message.errors import DecodingError from flanker.mime.message.headers import parsing, is_empty, ContentType from flanker.mime.message.part import MimePart, Stream +from flanker.mime.message.utils import to_unicode log = getLogger(__name__) @@ -20,7 +22,10 @@ def scan(string): raise DecodingError('Scanner works with binary only') else: if isinstance(string, six.binary_type): - string = string.decode('utf-8') + string = to_unicode(string) + + if not isinstance(string, six.text_type): + raise DecodingError('Cannot scan type %s' % type(string)) tokens = tokenize(string) if not tokens: @@ -250,7 +255,7 @@ def __init__(self, tokens, string): self.position = -1 self.tokens = tokens self.string = string - self.stream = six.StringIO(string) + self.stream = StringIO(string) self.opcount = 0 def next(self): diff --git a/tests/addresslib/parser_mailbox_test.py b/tests/addresslib/parser_mailbox_test.py index b0387a08..bd9d597a 100644 --- a/tests/addresslib/parser_mailbox_test.py +++ b/tests/addresslib/parser_mailbox_test.py @@ -118,7 +118,7 @@ def test_display_name(): # FIXME: In Python 3 subgroup of separator symbols is treated as # FIXME: allowed. We need to figure out why. - if six.PY3 and ord(cc) in [0x1c, 0x1d, 0x1e, 0x1f]: + if six.PY3 and ord(cc) in [0x1c, 0x1d, 0x1e, 0x1f]: continue run_mailbox_test(u'"{0}" '.format(cc), None) diff --git a/tests/dkim_test.py b/tests/dkim_test.py index 5576f83f..6d9bc051 100644 --- a/tests/dkim_test.py +++ b/tests/dkim_test.py @@ -2,6 +2,7 @@ import io import os +import six from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives import serialization @@ -55,10 +56,10 @@ def test_simple_domain_key_signature(): b"7lzEjzaYxBDx2PP25abuTSJF0=\r\n" ) - def test_simple_dkim_signature(): signer = dkim.DKIMSigner(DUMMY_RSA_KEY, "mx", "testing1") sig = signer.sign(DUMMY_EMAIL, current_time=1404859754) + print(sig) assert_equal( sig, b"DKIM-Signature: a=rsa-sha256; v=1; c=simple/simple; d=testing1; q=dn" @@ -74,16 +75,18 @@ def test_canonicalization(): path = os.path.join( os.path.dirname(__file__), "fixtures", "messages", "dkim", "email.*" ) - for path in glob.glob(path): - with open(path) as f: + for i, path in enumerate(glob.glob(path)): + with open(path, 'rb') as f: contents = f.read() - with open(path.replace("email", "nofws.expected")) as f: + with open(path.replace("email", "nofws.expected"), 'rb') as f: nofws_contents = f.read() - with open(path.replace("email", "simple.expected")) as f: + with open(path.replace("email", "simple.expected"), 'rb') as f: simple_contents = f.read() - with open(path.replace("email", "relaxed.expected")) as f: + with open(path.replace("email", "relaxed.expected"), 'rb') as f: relaxed_contents = f.read() + print('Test case #%d: %s' % (i, path)) + assert_equal( canonicalize_contents(dkim.NoFWSCanonicalization(), contents), nofws_contents @@ -99,13 +102,21 @@ def test_canonicalization(): def canonicalize_contents(canonicalization_rule, contents): + if six.PY3 and isinstance(contents, six.text_type): + contents = contents.encode('utf-8') + headers, body = dkim._rfc822_parse(contents) output = io.BytesIO() for header, value in headers: header, value = canonicalization_rule.canonicalize_header( header, value) - output.write(b"{h}:{v}".format(h=header, v=value)) + output.write(("%s:%s" % (header.decode('utf-8'), + value.decode('utf-8'))).encode('utf-8')) body = canonicalization_rule.canonicalize_body(body) output.write(b"\r\n") output.write(body) return output.getvalue() + + +def _normalize_crlf(s): + return s.replace(b'\r\n', b'\n').replace(b'\r', b'\n').replace(b'\n', b'\r\n') diff --git a/tests/mime/message/create_test.py b/tests/mime/message/create_test.py index 9fa483a2..5e4fdec3 100644 --- a/tests/mime/message/create_test.py +++ b/tests/mime/message/create_test.py @@ -25,7 +25,10 @@ def from_python_message_test(): payloads = [p.get_payload(decode=True) for p in python_message.walk()][1:] payloads2 = [p.body for p in message.walk()] - eq_(payloads, payloads2) + eq_(3, len(payloads2)) + eq_(payloads[0].decode('utf-8'), payloads2[0]) + eq_(payloads[1], payloads2[1]) + eq_(payloads[2].decode('utf-8'), payloads2[2]) def from_string_message_test(): @@ -72,7 +75,7 @@ def create_singlepart_ascii_long_lines_test(): eq_(very_long, message2.body) message2 = _email.message_from_string(message.to_string()) - eq_(very_long, message2.get_payload(decode=True)) + eq_(very_long, message2.get_payload(decode=True).decode('utf-8')) def create_multipart_simple_test(): @@ -280,24 +283,23 @@ def create_enclosed_nested_test(): def guessing_attachments_test(): - binary = create.binary( - "application", 'octet-stream', MAILGUN_PNG, '/home/alex/mailgun.png') + binary = create.binary('application', 'octet-stream', MAILGUN_PNG, + '/home/alex/mailgun.png') eq_('image/png', binary.content_type) eq_('mailgun.png', binary.content_type.params['name']) - binary = create.binary( - "application", 'octet-stream', - MAILGUN_PIC, '/home/alex/mailgun.png', disposition='attachment') + binary = create.binary('application', 'octet-stream', MAILGUN_PIC, + '/home/alex/mailgun.png', disposition='attachment') eq_('attachment', binary.headers['Content-Disposition'].value) eq_('mailgun.png', binary.headers['Content-Disposition'].params['filename']) - binary = create.binary( - "application", 'octet-stream', NOTIFICATION, '/home/alex/mailgun.eml') + binary = create.binary('application', 'octet-stream', NOTIFICATION, + '/home/alex/mailgun.eml') eq_('message/rfc822', binary.content_type) - binary = create.binary( - "application", 'octet-stream', MAILGUN_WAV, '/home/alex/audiofile.wav') + binary = create.binary('application', 'octet-stream', MAILGUN_WAV, + '/home/alex/audiofile.wav') eq_('audio/x-wav', binary.content_type) diff --git a/tests/mime/message/part_test.py b/tests/mime/message/part_test.py index 48cec1c4..c3dba5c4 100644 --- a/tests/mime/message/part_test.py +++ b/tests/mime/message/part_test.py @@ -1,14 +1,14 @@ # coding:utf-8 from contextlib import closing -from cStringIO import StringIO from nose.tools import eq_, ok_, assert_false, assert_raises, assert_less +from six.moves import StringIO from flanker import _email from flanker.mime import recover from flanker.mime.create import multipart, text from flanker.mime.message.errors import EncodingError -from flanker.mime.message.part import encode_transfer_encoding, _base64_decode +from flanker.mime.message.part import _encode_transfer_encoding, _base64_decode from flanker.mime.message.scanner import scan from tests import (BILINGUAL, BZ2_ATTACHMENT, ENCLOSED, TORTURE, TORTURE_PART, ENCLOSED_BROKEN_ENCODING, EIGHT_BIT, QUOTED_PRINTABLE, @@ -40,7 +40,7 @@ def readonly_immutability_test(): # we can also read the body without changing anything pbody = pmessage.get_payload()[1].get_payload()[0].get_payload()[0].get_payload(decode=True) - pbody = unicode(pbody, 'utf-8') + pbody = pbody.decode('utf-8') eq_(pbody, message.parts[1].enclosed.parts[0].body) assert_false(message.was_changed()) eq_(ENCLOSED, message.to_string()) @@ -279,7 +279,7 @@ def set_message_id_test(): # Make sure that ascii uprades to quoted-printable if it has long lines. -def ascii_to_quoted_printable_test(): +def ascii_to_quoted_printable_test_2(): # contains unicode chars message = scan(TEXT_ONLY) value = u'Hello, how is it going?' * 100 @@ -363,8 +363,8 @@ def broken_ctype_test(): def read_attach_test(): message = scan(MAILGUN_PIC) - p = (p for p in message.walk() if p.content_type.main == 'image').next() - eq_(p.body, MAILGUN_PNG) + image_parts = [p for p in message.walk() if p.content_type.main == 'image'] + eq_(image_parts[0].body, MAILGUN_PNG) def from_python_message_test(): @@ -374,14 +374,20 @@ def from_python_message_test(): eq_(python_message['Subject'], message.headers['Subject']) ctypes = [p.get_content_type() for p in python_message.walk()] - ctypes2 = [p.headers['Content-Type'][0] \ - for p in message.walk(with_self=True)] + ctypes2 = [p.headers['Content-Type'][0] + for p in message.walk(with_self=True)] eq_(ctypes, ctypes2) - payloads = [p.get_payload(decode=True) for p in python_message.walk()][1:] + payloads = [] + for p in python_message.walk(): + payload = p.get_payload(decode=True) + if payload: + payload = payload.decode('utf-8') + payloads.append(payload) + payloads2 = [p.body for p in message.walk()] - eq_(payloads, payloads2) + eq_(payloads[1:], payloads2) def iphone_test(): @@ -544,12 +550,15 @@ def message_convert_to_python_test(): b = message.to_python_message() payloads = [p.body for p in message.walk()] - payloads1 = list(p.get_payload(decode=True) \ - for p in a.walk() if not p.is_multipart()) - payloads2 = list(p.get_payload(decode=True) \ - for p in b.walk() if not p.is_multipart()) - - eq_(payloads, payloads2) + payloads1 = [p.get_payload(decode=True) + for p in a.walk() if not p.is_multipart()] + payloads2 = [p.get_payload(decode=True) + for p in b.walk() if not p.is_multipart()] + + eq_(3, len(payloads)) + eq_(payloads[0], payloads2[0].decode('utf-8')) + eq_(payloads[1], payloads2[1]) + eq_(payloads[2], payloads2[2].decode('utf-8')) eq_(payloads1, payloads2) @@ -612,17 +621,20 @@ def read_body_test(): def test_encode_transfer_encoding(): body = "long line " * 100 - encoded_body = encode_transfer_encoding('base64', body) + encoded_body = _encode_transfer_encoding('base64', body) # according to RFC 5322 line "SHOULD be no more than 78 characters" assert_less(max([len(l) for l in encoded_body.splitlines()]), 79) # Test base64 decoder. -def test__base64_decode(): - eq_("hello", _base64_decode("aGVs\r\nbG8=")) # valid base64 - eq_("hello!", _base64_decode("a\x00GVsbG8\t*hx")) # trim last character - eq_("hello", _base64_decode("aGVsb\r\nG8")) # recover single byte padding - eq_("hello!!", _base64_decode("aGVs\rbG8h\nIQ")) # recover 2 bytes padding +def test_base64_decode(): + eq_(b"hello", _base64_decode("aGVs\r\nbG8=")) # valid base64 + eq_(b"hello!", _base64_decode("a\x00GVsbG8\t*hx")) # trim last character + eq_(b"hello", _base64_decode("aGVsb\r\nG8")) # recover single byte padding + eq_(b"hello!!", _base64_decode("aGVs\rbG8h\nIQ")) # recover 2 bytes padding + eq_(b"hello!!", _base64_decode("aGЫVs\rЫЫbG8hЫЫ\nЫЫIQ")) + eq_(b"hello!!", _base64_decode("ЫaGVsbG8h\nIQЫ")) + eq_(b"hello!!", _base64_decode("ЫЫЫЫaGVsЫЫЫЫ\rbG8h\nIQЫЫЫЫ")) # Make sure broken base64 part gets recovered. diff --git a/tests/mime/message/scanner_test.py b/tests/mime/message/scanner_test.py index 7df0074a..2e10f2a5 100644 --- a/tests/mime/message/scanner_test.py +++ b/tests/mime/message/scanner_test.py @@ -1,4 +1,5 @@ # coding:utf-8 +import six from nose.tools import * from flanker import _email @@ -15,7 +16,7 @@ def no_ctype_headers_and_and_boundaries_test(): message = scan(NO_CTYPE) eq_(C('text', 'plain', dict(charset='ascii')), message.content_type) pmessage = _email.message_from_string(NO_CTYPE) - eq_(message.body, pmessage.get_payload(decode=True)) + eq_(message.body, pmessage.get_payload(decode=True).decode('utf-8')) for a, b in zip(NO_CTYPE_HEADERS, message.headers.iteritems()): eq_(a, b) @@ -27,10 +28,12 @@ def multipart_message_test(): eq_(C('multipart', 'alternative', dict(boundary='=-omjqkVTVbwdgCWFRgIkx')), message.content_type) - p = unicode(pmessage.get_payload()[0].get_payload(decode=True), 'utf-8') + p = pmessage.get_payload()[0].get_payload() + if six.PY2: + p = p.decode('utf-8') eq_(p, message.parts[0].body) - p = pmessage.get_payload()[1].get_payload(decode=True) + p = pmessage.get_payload()[1].get_payload() eq_(p, message.parts[1].body) @@ -53,13 +56,13 @@ def enclosed_message_test(): enclosed.headers['Content-Type']) pbody = penclosed.get_payload()[0].get_payload()[0].get_payload(decode=True) - pbody = unicode(pbody, 'utf-8') + pbody = pbody.decode('utf-8') body = enclosed.enclosed.parts[0].body eq_(pbody, body) body = enclosed.enclosed.parts[1].body pbody = penclosed.get_payload()[0].get_payload()[1].get_payload(decode=True) - pbody = unicode(pbody, 'utf-8') + pbody = pbody.decode('utf-8') eq_(pbody, body) @@ -114,7 +117,7 @@ def test_uservoice_case(): message._container._body_changed = True val = message.to_string() for line in val.splitlines(): - print line + print(line) ok_(len(line) < 200) message = scan(val) eq_(html, message.body)