From 5c202834811e93eb59cc2e0168592053b160d5d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigurd=20Lj=C3=B8dal?= <544451+ljodal@users.noreply.github.com> Date: Mon, 31 Jan 2022 21:02:08 +0100 Subject: [PATCH] Allow extractors to provide message flags This adds a sixth value to the tuple returned by the extractor functions which should be a set of flags. Which flags should be applied to a message should be determined by the extractor, as it depends on e.g. the file format being parsed. For example "%s" should have the python-format flag if it was parsed from a Python file and the c-format flag if it was extracted from a C file. The logic of detecting python-format flags is also moved to the Python extractor in this PR. NOTE: This is partially a breaking change. Backwards compability is maintained with extractors that return 5-tuples instead of 6-tuples, but the interface Babel exposes for extracting messages always returns 6-tuples. I don't see a good way around this. Removing the python-format detection from Message.__init__ is also a breaking change, but that could potentially be put back for now. --- babel/messages/catalog.py | 21 +--------- babel/messages/checkers.py | 3 +- babel/messages/extract.py | 45 ++++++++++++++-------- babel/messages/frontend.py | 4 +- babel/util.py | 18 +++++++++ tests/messages/test_catalog.py | 17 -------- tests/messages/test_extract.py | 64 +++++++++++++++---------------- tests/messages/test_js_extract.py | 18 ++++----- tests/test_util.py | 28 +++++++++++++- 9 files changed, 120 insertions(+), 98 deletions(-) diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py index 342f7377b..065ef35fb 100644 --- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -27,18 +27,6 @@ __all__ = ['Message', 'Catalog', 'TranslationError'] -PYTHON_FORMAT = re.compile(r''' - \% - (?:\(([\w]*)\))? - ( - [-#0\ +]?(?:\*|[\d]+)? - (?:\.(?:\*|[\d]+))? - [hlL]? - ) - ([diouxXeEfFgGcrs%]) -''', re.VERBOSE) - - def _parse_datetime_header(value): match = re.match(r'^(?P.*?)(?P[+-]\d{4})?$', value) @@ -96,10 +84,6 @@ def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(), self.string = string self.locations = list(distinct(locations)) self.flags = set(flags) - if id and self.python_format: - self.flags.add('python-format') - else: - self.flags.discard('python-format') self.auto_comments = list(distinct(auto_comments)) self.user_comments = list(distinct(user_comments)) if isinstance(previous_id, str): @@ -201,10 +185,7 @@ def python_format(self): True :type: `bool`""" - ids = self.id - if not isinstance(ids, (list, tuple)): - ids = [ids] - return any(PYTHON_FORMAT.search(id) for id in ids) + return 'python-format' in self.flags class TranslationError(Exception): diff --git a/babel/messages/checkers.py b/babel/messages/checkers.py index b79bd8257..76e123a55 100644 --- a/babel/messages/checkers.py +++ b/babel/messages/checkers.py @@ -11,7 +11,8 @@ :license: BSD, see LICENSE for more details. """ -from babel.messages.catalog import TranslationError, PYTHON_FORMAT +from babel.messages.catalog import TranslationError +from babel.util import PYTHON_FORMAT #: list of format chars that are compatible to each other diff --git a/babel/messages/extract.py b/babel/messages/extract.py index c23a924b3..8f12f9330 100644 --- a/babel/messages/extract.py +++ b/babel/messages/extract.py @@ -22,7 +22,7 @@ import sys from tokenize import generate_tokens, COMMENT, NAME, OP, STRING -from babel.util import parse_encoding, parse_future_flags, pathmatch +from babel.util import parse_encoding, parse_future_flags, pathmatch, has_python_format from textwrap import dedent @@ -79,7 +79,7 @@ def extract_from_dir( """Extract messages from any source files found in the given directory. This function generates tuples of the form ``(filename, lineno, message, - comments, context)``. + comments, context, flags)``. Which extraction method is used per file is determined by the `method_map` parameter, which maps extended glob patterns to extraction method names. @@ -185,7 +185,7 @@ def check_and_call_extract_file(filepath, method_map, options_map, So, given an absolute path to a file `filepath`, we want to check using just the relative path from `dirpath` to `filepath`. - Yields 5-tuples (filename, lineno, messages, comments, context). + Yields 6-tuples (filename, lineno, messages, comments, context, flags). :param filepath: An absolute path to a file that exists. :param method_map: a list of ``(pattern, method)`` tuples that maps of @@ -205,8 +205,8 @@ def check_and_call_extract_file(filepath, method_map, options_map, :param strip_comment_tags: a flag that if set to `True` causes all comment tags to be removed from the collected comments. :param dirpath: the path to the directory to extract messages from. - :return: iterable of 5-tuples (filename, lineno, messages, comments, context) - :rtype: Iterable[tuple[str, int, str|tuple[str], list[str], str|None] + :return: iterable of 6-tuples (filename, lineno, messages, comments, context) + :rtype: Iterable[tuple[str, int, str|tuple[str], list[str], str|None, set[str]] """ # filename is the relative path from dirpath to the actual file filename = relpath(filepath, dirpath) @@ -228,7 +228,10 @@ def check_and_call_extract_file(filepath, method_map, options_map, options=options, strip_comment_tags=strip_comment_tags ): - yield (filename, ) + message_tuple + if len(message_tuple) == 4: + yield (filename, *message_tuple, set()) + else: + yield (filename, *message_tuple) break @@ -237,7 +240,7 @@ def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS, comment_tags=(), options=None, strip_comment_tags=False): """Extract messages from a specific file. - This function returns a list of tuples of the form ``(lineno, message, comments, context)``. + This function returns a list of tuples of the form ``(lineno, message, comments, context, flags)``. :param filename: the path to the file to extract messages from :param method: a string specifying the extraction method (.e.g. "python") @@ -250,8 +253,8 @@ def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS, :param strip_comment_tags: a flag that if set to `True` causes all comment tags to be removed from the collected comments. :param options: a dictionary of additional options (optional) - :returns: list of tuples of the form ``(lineno, message, comments, context)`` - :rtype: list[tuple[int, str|tuple[str], list[str], str|None] + :returns: list of tuples of the form ``(lineno, message, comments, context, flags)`` + :rtype: list[tuple[int, str|tuple[str], list[str], str|None, set[str]] """ if method == 'ignore': return [] @@ -299,8 +302,8 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(), :param strip_comment_tags: a flag that if set to `True` causes all comment tags to be removed from the collected comments. :raise ValueError: if the extraction method is not registered - :returns: iterable of tuples of the form ``(lineno, message, comments, context)`` - :rtype: Iterable[tuple[int, str|tuple[str], list[str], str|None] + :returns: iterable of tuples of the form ``(lineno, message, comments, context, flags)`` + :rtype: Iterable[tuple[int, str|tuple[str], list[str], str|None, set[str]] """ func = None if callable(method): @@ -339,7 +342,7 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(), results = func(fileobj, keywords.keys(), comment_tags, options=options or {}) - for lineno, funcname, messages, comments in results: + for lineno, funcname, messages, comments, *rest in results: if funcname: spec = keywords[funcname] or (1,) else: @@ -349,6 +352,11 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(), if not messages: continue + if rest: + flags = rest[0] + else: + flags = set() + # Validate the messages against the keyword's specification context = None msgs = [] @@ -390,7 +398,7 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(), if strip_comment_tags: _strip_comment_tags(comments, comment_tags) - yield lineno, messages, comments, context + yield lineno, messages, comments, context, flags def extract_nothing(fileobj, keywords, comment_tags, options): @@ -404,7 +412,7 @@ def extract_python(fileobj, keywords, comment_tags, options): """Extract messages from Python source code. It returns an iterator yielding tuples in the following form ``(lineno, - funcname, message, comments)``. + funcname, message, comments, flags)``. :param fileobj: the seekable, file-like object the messages should be extracted from @@ -422,6 +430,7 @@ def extract_python(fileobj, keywords, comment_tags, options): translator_comments = [] in_def = in_translator_comments = False comment_tag = None + flags = set() encoding = parse_encoding(fileobj) or options.get('encoding', 'UTF-8') future_flags = parse_future_flags(fileobj, encoding) @@ -468,6 +477,9 @@ def extract_python(fileobj, keywords, comment_tags, options): else: messages.append(None) + if has_python_format(message for message in messages if message): + flags.add("python-format") + if len(messages) > 1: messages = tuple(messages) else: @@ -479,12 +491,13 @@ def extract_python(fileobj, keywords, comment_tags, options): translator_comments = [] yield (message_lineno, funcname, messages, - [comment[1] for comment in translator_comments]) + [comment[1] for comment in translator_comments], flags) funcname = lineno = message_lineno = None call_stack = -1 messages = [] translator_comments = [] + flags = set() in_translator_comments = False if nested: funcname = value @@ -610,7 +623,7 @@ def extract_javascript(fileobj, keywords, comment_tags, options): if messages is not None: yield (message_lineno, funcname, messages, - [comment[1] for comment in translator_comments]) + [comment[1] for comment in translator_comments], set()) funcname = message_lineno = last_argument = None concatenate_next = False diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 41b75cec7..409e3f094 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -502,14 +502,14 @@ def callback(filename, method, options): strip_comment_tags=self.strip_comments, directory_filter=self.directory_filter, ) - for filename, lineno, message, comments, context in extracted: + for filename, lineno, message, comments, context, flags in extracted: if os.path.isfile(path): filepath = filename # already normalized else: filepath = os.path.normpath(os.path.join(path, filename)) catalog.add(message, None, [(filepath, lineno)], - auto_comments=comments, context=context) + auto_comments=comments, context=context, flags=flags) self.log.info('writing PO template file to %s', self.output_file) write_po(outfile, catalog, width=self.width, diff --git a/babel/util.py b/babel/util.py index 2cac55336..3f828149e 100644 --- a/babel/util.py +++ b/babel/util.py @@ -263,3 +263,21 @@ def dst(self, dt): def _cmp(a, b): return (a > b) - (a < b) + + +PYTHON_FORMAT = re.compile(r''' + \% + (?:\(([\w]*)\))? + ( + [-#0\ +]?(?:\*|[\d]+)? + (?:\.(?:\*|[\d]+))? + [hlL]? + ) + ([diouxXeEfFgGcrs%]) +''', re.VERBOSE) + + +def has_python_format(ids): + if isinstance(ids, str): + ids = [ids] + return any(PYTHON_FORMAT.search(id) for id in ids) diff --git a/tests/messages/test_catalog.py b/tests/messages/test_catalog.py index 2d36809de..8f51e75be 100644 --- a/tests/messages/test_catalog.py +++ b/tests/messages/test_catalog.py @@ -23,23 +23,6 @@ class MessageTestCase(unittest.TestCase): - def test_python_format(self): - assert catalog.PYTHON_FORMAT.search('foo %d bar') - assert catalog.PYTHON_FORMAT.search('foo %s bar') - assert catalog.PYTHON_FORMAT.search('foo %r bar') - assert catalog.PYTHON_FORMAT.search('foo %(name).1f') - assert catalog.PYTHON_FORMAT.search('foo %(name)3.3f') - assert catalog.PYTHON_FORMAT.search('foo %(name)3f') - assert catalog.PYTHON_FORMAT.search('foo %(name)06d') - assert catalog.PYTHON_FORMAT.search('foo %(name)Li') - assert catalog.PYTHON_FORMAT.search('foo %(name)#d') - assert catalog.PYTHON_FORMAT.search('foo %(name)-4.4hs') - assert catalog.PYTHON_FORMAT.search('foo %(name)*.3f') - assert catalog.PYTHON_FORMAT.search('foo %(name).*f') - assert catalog.PYTHON_FORMAT.search('foo %(name)3.*f') - assert catalog.PYTHON_FORMAT.search('foo %(name)*.*f') - assert catalog.PYTHON_FORMAT.search('foo %()s') - def test_translator_comments(self): mess = catalog.Message('foo', user_comments=['Comment About `foo`']) self.assertEqual(mess.user_comments, ['Comment About `foo`']) diff --git a/tests/messages/test_extract.py b/tests/messages/test_extract.py index fb9599db6..32f23738d 100644 --- a/tests/messages/test_extract.py +++ b/tests/messages/test_extract.py @@ -38,16 +38,16 @@ def test_nested_calls(self): extract.DEFAULT_KEYWORDS.keys(), [], {})) self.assertEqual([ - (1, '_', None, []), - (2, 'ungettext', (None, None, None), []), - (3, 'ungettext', (u'Babel', None, None), []), - (4, 'ungettext', (None, u'Babels', None), []), - (5, 'ungettext', (u'bunny', u'bunnies', None), []), - (6, 'ungettext', (None, u'bunnies', None), []), - (7, '_', None, []), - (8, 'gettext', u'Rabbit', []), - (9, 'dgettext', (u'wiki', None), []), - (10, 'dngettext', (None, u'Page', u'Pages', None), [])], + (1, '_', None, [], set()), + (2, 'ungettext', (None, None, None), [], set()), + (3, 'ungettext', (u'Babel', None, None), [], set()), + (4, 'ungettext', (None, u'Babels', None), [], set()), + (5, 'ungettext', (u'bunny', u'bunnies', None), [], set()), + (6, 'ungettext', (None, u'bunnies', None), [], set()), + (7, '_', None, [], set()), + (8, 'gettext', u'Rabbit', [], set()), + (9, 'dgettext', (u'wiki', None), [], set()), + (10, 'dngettext', (None, u'Page', u'Pages', None), [], set())], messages) def test_extract_default_encoding_ascii(self): @@ -56,14 +56,14 @@ def test_extract_default_encoding_ascii(self): buf, list(extract.DEFAULT_KEYWORDS), [], {}, )) # Should work great in both py2 and py3 - self.assertEqual([(1, '_', 'a', [])], messages) + self.assertEqual([(1, '_', 'a', [], set())], messages) def test_extract_default_encoding_utf8(self): buf = BytesIO(u'_("☃")'.encode('UTF-8')) messages = list(extract.extract_python( buf, list(extract.DEFAULT_KEYWORDS), [], {}, )) - self.assertEqual([(1, '_', u'☃', [])], messages) + self.assertEqual([(1, '_', u'☃', [], set())], messages) def test_nested_comments(self): buf = BytesIO(b"""\ @@ -73,7 +73,7 @@ def test_nested_comments(self): """) messages = list(extract.extract_python(buf, ('ngettext',), ['TRANSLATORS:'], {})) - self.assertEqual([(1, 'ngettext', (u'pylon', u'pylons', None), [])], + self.assertEqual([(1, 'ngettext', (u'pylon', u'pylons', None), [], set())], messages) def test_comments_with_calls_that_spawn_multiple_lines(self): @@ -98,21 +98,21 @@ def test_comments_with_calls_that_spawn_multiple_lines(self): {'strip_comment_tags': False})) self.assertEqual((6, '_', 'Locale deleted.', - [u'NOTE: This Comment SHOULD Be Extracted']), + [u'NOTE: This Comment SHOULD Be Extracted'], set()), messages[1]) self.assertEqual((10, 'ngettext', (u'Foo deleted.', u'Foos deleted.', None), - [u'NOTE: This Comment SHOULD Be Extracted']), + [u'NOTE: This Comment SHOULD Be Extracted'], set()), messages[2]) self.assertEqual((3, 'ngettext', (u'Catalog deleted.', u'Catalogs deleted.', None), - [u'NOTE: This Comment SHOULD Be Extracted']), + [u'NOTE: This Comment SHOULD Be Extracted'], set()), messages[0]) self.assertEqual((15, 'ngettext', (u'Bar deleted.', u'Bars deleted.', None), [u'NOTE: This Comment SHOULD Be Extracted', - u'NOTE: And This One Too']), + u'NOTE: And This One Too'], set()), messages[3]) def test_declarations(self): @@ -129,9 +129,9 @@ class Meta: messages = list(extract.extract_python(buf, extract.DEFAULT_KEYWORDS.keys(), [], {})) - self.assertEqual([(3, '_', u'Page arg 1', []), - (3, '_', u'Page arg 2', []), - (8, '_', u'log entry', [])], + self.assertEqual([(3, '_', u'Page arg 1', [], set()), + (3, '_', u'Page arg 2', [], set()), + (8, '_', u'log entry', [], set())], messages) def test_multiline(self): @@ -143,8 +143,8 @@ def test_multiline(self): count) """) messages = list(extract.extract_python(buf, ('ngettext',), [], {})) - self.assertEqual([(1, 'ngettext', (u'pylon', u'pylons', None), []), - (3, 'ngettext', (u'elvis', u'elvises', None), [])], + self.assertEqual([(1, 'ngettext', (u'pylon', u'pylons', None), [], set()), + (3, 'ngettext', (u'elvis', u'elvises', None), [], set())], messages) def test_npgettext(self): @@ -156,8 +156,8 @@ def test_npgettext(self): count) """) messages = list(extract.extract_python(buf, ('npgettext',), [], {})) - self.assertEqual([(1, 'npgettext', (u'Strings', u'pylon', u'pylons', None), []), - (3, 'npgettext', (u'Strings', u'elvis', u'elvises', None), [])], + self.assertEqual([(1, 'npgettext', (u'Strings', u'pylon', u'pylons', None), [], set()), + (3, 'npgettext', (u'Strings', u'elvis', u'elvises', None), [], set())], messages) buf = BytesIO(b"""\ msg = npgettext('Strings', 'pylon', # TRANSLATORS: shouldn't be @@ -166,7 +166,7 @@ def test_npgettext(self): """) messages = list(extract.extract_python(buf, ('npgettext',), ['TRANSLATORS:'], {})) - self.assertEqual([(1, 'npgettext', (u'Strings', u'pylon', u'pylons', None), [])], + self.assertEqual([(1, 'npgettext', (u'Strings', u'pylon', u'pylons', None), [], set())], messages) def test_triple_quoted_strings(self): @@ -178,9 +178,9 @@ def test_triple_quoted_strings(self): messages = list(extract.extract_python(buf, extract.DEFAULT_KEYWORDS.keys(), [], {})) - self.assertEqual([(1, '_', u'pylons', []), - (2, 'ngettext', (u'elvis', u'elvises', None), []), - (3, 'ngettext', (u'elvis', u'elvises', None), [])], + self.assertEqual([(1, '_', u'pylons', [], set()), + (2, 'ngettext', (u'elvis', u'elvises', None), [], set()), + (3, 'ngettext', (u'elvis', u'elvises', None), [], set())], messages) def test_multiline_strings(self): @@ -196,7 +196,7 @@ def test_multiline_strings(self): [(1, '_', u'This module provides internationalization and localization\n' 'support for your Python programs by providing an interface to ' - 'the GNU\ngettext message catalog library.', [])], + 'the GNU\ngettext message catalog library.', [], set())], messages) def test_concatenated_strings(self): @@ -474,9 +474,9 @@ def test_invalid_filter(self): messages = \ list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {})) - self.assertEqual([(5, (u'bunny', u'bunnies'), [], None), - (8, u'Rabbit', [], None), - (10, (u'Page', u'Pages'), [], None)], messages) + self.assertEqual([(5, (u'bunny', u'bunnies'), [], None, set()), + (8, u'Rabbit', [], None, set()), + (10, (u'Page', u'Pages'), [], None, set())], messages) def test_invalid_extract_method(self): buf = BytesIO(b'') diff --git a/tests/messages/test_js_extract.py b/tests/messages/test_js_extract.py index 73b16a934..2ef7787a2 100644 --- a/tests/messages/test_js_extract.py +++ b/tests/messages/test_js_extract.py @@ -14,9 +14,9 @@ def test_simple_extract(): list(extract.extract('javascript', buf, extract.DEFAULT_KEYWORDS, [], {})) - assert messages == [(1, 'simple', [], None), - (2, 'simple', [], None), - (3, ('s', 'p'), [], None)] + assert messages == [(1, 'simple', [], None, set()), + (2, 'simple', [], None, set()), + (3, ('s', 'p'), [], None, set())] def test_various_calls(): @@ -36,9 +36,9 @@ def test_various_calls(): list(extract.extract('javascript', buf, extract.DEFAULT_KEYWORDS, [], {})) assert messages == [ - (5, (u'bunny', u'bunnies'), [], None), - (8, u'Rabbit', [], None), - (10, (u'Page', u'Pages'), [], None) + (5, (u'bunny', u'bunnies'), [], None, set()), + (8, u'Rabbit', [], None, set()), + (10, (u'Page', u'Pages'), [], None, set()) ] @@ -132,7 +132,7 @@ def test_dotted_keyword_extract(): extract.extract('javascript', buf, {"com.corporate.i18n.formatMessage": None}, [], {}) ) - assert messages == [(1, 'Insert coin to continue', [], None)] + assert messages == [(1, 'Insert coin to continue', [], None, set())] def test_template_string_standard_usage(): @@ -141,7 +141,7 @@ def test_template_string_standard_usage(): extract.extract('javascript', buf, {"gettext": None}, [], {}) ) - assert messages == [(1, 'Very template, wow', [], None)] + assert messages == [(1, 'Very template, wow', [], None, set())] def test_template_string_tag_usage(): @@ -150,4 +150,4 @@ def test_template_string_tag_usage(): extract.extract('javascript', buf, {"i18n": None}, [], {}) ) - assert messages == [(1, 'Tag template, wow', [], None)] + assert messages == [(1, 'Tag template, wow', [], None, set())] diff --git a/tests/test_util.py b/tests/test_util.py index 43076ad93..0a919abbc 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -18,7 +18,7 @@ import pytest from babel import util -from babel.util import parse_future_flags +from babel.util import parse_future_flags, has_python_format class _FF: @@ -101,3 +101,29 @@ def test_parse_future(source, result): fp = BytesIO(source.encode('latin-1')) flags = parse_future_flags(fp) assert flags == result + +@pytest.mark.parametrize('ids', [ + ('foo %d bar',), + ('foo %s bar',), + ('foo %r bar',), + ('foo %(name).1f',), + ('foo %(name)3.3f',), + ('foo %(name)3f',), + ('foo %(name)06d',), + ('foo %(name)Li',), + ('foo %(name)#d',), + ('foo %(name)-4.4hs',), + ('foo %(name)*.3f',), + ('foo %(name).*f',), + ('foo %(name)3.*f',), + ('foo %(name)*.*f',), + ('foo %()s',), +]) +def test_has_python_format(ids): + assert has_python_format(ids) + +@pytest.mark.parametrize('ids', [ + ('foo',), +]) +def test_not_has_python_format(ids): + assert not has_python_format(ids)