Skip to content

Commit

Permalink
Allow extractors to provide message flags
Browse files Browse the repository at this point in the history
This adds a sixth value to the tuple returned by the extractor functions
which should be a set of flags.

Which flags should be applied to a message should be determined by the
extractor, as it depends on e.g. the file format being parsed. For
example "%s" should have the python-format flag if it was parsed from a
Python file and the c-format flag if it was extracted from a C file.

The logic of detecting python-format flags is also moved to the Python
extractor in this PR.

NOTE:

This is partially a breaking change. Backwards compability is maintained
with extractors that return 5-tuples instead of 6-tuples, but the
interface Babel exposes for extracting messages always returns 6-tuples.
I don't see a good way around this.

Removing the python-format detection from Message.__init__ is also a
breaking change, but that could potentially be put back for now.
  • Loading branch information
ljodal committed Jan 31, 2022
1 parent 9d6803a commit 5c20283
Show file tree
Hide file tree
Showing 9 changed files with 120 additions and 98 deletions.
21 changes: 1 addition & 20 deletions babel/messages/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,6 @@
__all__ = ['Message', 'Catalog', 'TranslationError']


PYTHON_FORMAT = re.compile(r'''
\%
(?:\(([\w]*)\))?
(
[-#0\ +]?(?:\*|[\d]+)?
(?:\.(?:\*|[\d]+))?
[hlL]?
)
([diouxXeEfFgGcrs%])
''', re.VERBOSE)


def _parse_datetime_header(value):
match = re.match(r'^(?P<datetime>.*?)(?P<tzoffset>[+-]\d{4})?$', value)

Expand Down Expand Up @@ -96,10 +84,6 @@ def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(),
self.string = string
self.locations = list(distinct(locations))
self.flags = set(flags)
if id and self.python_format:
self.flags.add('python-format')
else:
self.flags.discard('python-format')
self.auto_comments = list(distinct(auto_comments))
self.user_comments = list(distinct(user_comments))
if isinstance(previous_id, str):
Expand Down Expand Up @@ -201,10 +185,7 @@ def python_format(self):
True
:type: `bool`"""
ids = self.id
if not isinstance(ids, (list, tuple)):
ids = [ids]
return any(PYTHON_FORMAT.search(id) for id in ids)
return 'python-format' in self.flags


class TranslationError(Exception):
Expand Down
3 changes: 2 additions & 1 deletion babel/messages/checkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
:license: BSD, see LICENSE for more details.
"""

from babel.messages.catalog import TranslationError, PYTHON_FORMAT
from babel.messages.catalog import TranslationError
from babel.util import PYTHON_FORMAT


#: list of format chars that are compatible to each other
Expand Down
45 changes: 29 additions & 16 deletions babel/messages/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import sys
from tokenize import generate_tokens, COMMENT, NAME, OP, STRING

from babel.util import parse_encoding, parse_future_flags, pathmatch
from babel.util import parse_encoding, parse_future_flags, pathmatch, has_python_format
from textwrap import dedent


Expand Down Expand Up @@ -79,7 +79,7 @@ def extract_from_dir(
"""Extract messages from any source files found in the given directory.
This function generates tuples of the form ``(filename, lineno, message,
comments, context)``.
comments, context, flags)``.
Which extraction method is used per file is determined by the `method_map`
parameter, which maps extended glob patterns to extraction method names.
Expand Down Expand Up @@ -185,7 +185,7 @@ def check_and_call_extract_file(filepath, method_map, options_map,
So, given an absolute path to a file `filepath`, we want to check using
just the relative path from `dirpath` to `filepath`.
Yields 5-tuples (filename, lineno, messages, comments, context).
Yields 6-tuples (filename, lineno, messages, comments, context, flags).
:param filepath: An absolute path to a file that exists.
:param method_map: a list of ``(pattern, method)`` tuples that maps of
Expand All @@ -205,8 +205,8 @@ def check_and_call_extract_file(filepath, method_map, options_map,
:param strip_comment_tags: a flag that if set to `True` causes all comment
tags to be removed from the collected comments.
:param dirpath: the path to the directory to extract messages from.
:return: iterable of 5-tuples (filename, lineno, messages, comments, context)
:rtype: Iterable[tuple[str, int, str|tuple[str], list[str], str|None]
:return: iterable of 6-tuples (filename, lineno, messages, comments, context)
:rtype: Iterable[tuple[str, int, str|tuple[str], list[str], str|None, set[str]]
"""
# filename is the relative path from dirpath to the actual file
filename = relpath(filepath, dirpath)
Expand All @@ -228,7 +228,10 @@ def check_and_call_extract_file(filepath, method_map, options_map,
options=options,
strip_comment_tags=strip_comment_tags
):
yield (filename, ) + message_tuple
if len(message_tuple) == 4:
yield (filename, *message_tuple, set())
else:
yield (filename, *message_tuple)

break

Expand All @@ -237,7 +240,7 @@ def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS,
comment_tags=(), options=None, strip_comment_tags=False):
"""Extract messages from a specific file.
This function returns a list of tuples of the form ``(lineno, message, comments, context)``.
This function returns a list of tuples of the form ``(lineno, message, comments, context, flags)``.
:param filename: the path to the file to extract messages from
:param method: a string specifying the extraction method (.e.g. "python")
Expand All @@ -250,8 +253,8 @@ def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS,
:param strip_comment_tags: a flag that if set to `True` causes all comment
tags to be removed from the collected comments.
:param options: a dictionary of additional options (optional)
:returns: list of tuples of the form ``(lineno, message, comments, context)``
:rtype: list[tuple[int, str|tuple[str], list[str], str|None]
:returns: list of tuples of the form ``(lineno, message, comments, context, flags)``
:rtype: list[tuple[int, str|tuple[str], list[str], str|None, set[str]]
"""
if method == 'ignore':
return []
Expand Down Expand Up @@ -299,8 +302,8 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(),
:param strip_comment_tags: a flag that if set to `True` causes all comment
tags to be removed from the collected comments.
:raise ValueError: if the extraction method is not registered
:returns: iterable of tuples of the form ``(lineno, message, comments, context)``
:rtype: Iterable[tuple[int, str|tuple[str], list[str], str|None]
:returns: iterable of tuples of the form ``(lineno, message, comments, context, flags)``
:rtype: Iterable[tuple[int, str|tuple[str], list[str], str|None, set[str]]
"""
func = None
if callable(method):
Expand Down Expand Up @@ -339,7 +342,7 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(),
results = func(fileobj, keywords.keys(), comment_tags,
options=options or {})

for lineno, funcname, messages, comments in results:
for lineno, funcname, messages, comments, *rest in results:
if funcname:
spec = keywords[funcname] or (1,)
else:
Expand All @@ -349,6 +352,11 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(),
if not messages:
continue

if rest:
flags = rest[0]
else:
flags = set()

# Validate the messages against the keyword's specification
context = None
msgs = []
Expand Down Expand Up @@ -390,7 +398,7 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(),

if strip_comment_tags:
_strip_comment_tags(comments, comment_tags)
yield lineno, messages, comments, context
yield lineno, messages, comments, context, flags


def extract_nothing(fileobj, keywords, comment_tags, options):
Expand All @@ -404,7 +412,7 @@ def extract_python(fileobj, keywords, comment_tags, options):
"""Extract messages from Python source code.
It returns an iterator yielding tuples in the following form ``(lineno,
funcname, message, comments)``.
funcname, message, comments, flags)``.
:param fileobj: the seekable, file-like object the messages should be
extracted from
Expand All @@ -422,6 +430,7 @@ def extract_python(fileobj, keywords, comment_tags, options):
translator_comments = []
in_def = in_translator_comments = False
comment_tag = None
flags = set()

encoding = parse_encoding(fileobj) or options.get('encoding', 'UTF-8')
future_flags = parse_future_flags(fileobj, encoding)
Expand Down Expand Up @@ -468,6 +477,9 @@ def extract_python(fileobj, keywords, comment_tags, options):
else:
messages.append(None)

if has_python_format(message for message in messages if message):
flags.add("python-format")

if len(messages) > 1:
messages = tuple(messages)
else:
Expand All @@ -479,12 +491,13 @@ def extract_python(fileobj, keywords, comment_tags, options):
translator_comments = []

yield (message_lineno, funcname, messages,
[comment[1] for comment in translator_comments])
[comment[1] for comment in translator_comments], flags)

funcname = lineno = message_lineno = None
call_stack = -1
messages = []
translator_comments = []
flags = set()
in_translator_comments = False
if nested:
funcname = value
Expand Down Expand Up @@ -610,7 +623,7 @@ def extract_javascript(fileobj, keywords, comment_tags, options):

if messages is not None:
yield (message_lineno, funcname, messages,
[comment[1] for comment in translator_comments])
[comment[1] for comment in translator_comments], set())

funcname = message_lineno = last_argument = None
concatenate_next = False
Expand Down
4 changes: 2 additions & 2 deletions babel/messages/frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,14 +502,14 @@ def callback(filename, method, options):
strip_comment_tags=self.strip_comments,
directory_filter=self.directory_filter,
)
for filename, lineno, message, comments, context in extracted:
for filename, lineno, message, comments, context, flags in extracted:
if os.path.isfile(path):
filepath = filename # already normalized
else:
filepath = os.path.normpath(os.path.join(path, filename))

catalog.add(message, None, [(filepath, lineno)],
auto_comments=comments, context=context)
auto_comments=comments, context=context, flags=flags)

self.log.info('writing PO template file to %s', self.output_file)
write_po(outfile, catalog, width=self.width,
Expand Down
18 changes: 18 additions & 0 deletions babel/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,3 +263,21 @@ def dst(self, dt):

def _cmp(a, b):
return (a > b) - (a < b)


PYTHON_FORMAT = re.compile(r'''
\%
(?:\(([\w]*)\))?
(
[-#0\ +]?(?:\*|[\d]+)?
(?:\.(?:\*|[\d]+))?
[hlL]?
)
([diouxXeEfFgGcrs%])
''', re.VERBOSE)


def has_python_format(ids):
if isinstance(ids, str):
ids = [ids]
return any(PYTHON_FORMAT.search(id) for id in ids)
17 changes: 0 additions & 17 deletions tests/messages/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,6 @@

class MessageTestCase(unittest.TestCase):

def test_python_format(self):
assert catalog.PYTHON_FORMAT.search('foo %d bar')
assert catalog.PYTHON_FORMAT.search('foo %s bar')
assert catalog.PYTHON_FORMAT.search('foo %r bar')
assert catalog.PYTHON_FORMAT.search('foo %(name).1f')
assert catalog.PYTHON_FORMAT.search('foo %(name)3.3f')
assert catalog.PYTHON_FORMAT.search('foo %(name)3f')
assert catalog.PYTHON_FORMAT.search('foo %(name)06d')
assert catalog.PYTHON_FORMAT.search('foo %(name)Li')
assert catalog.PYTHON_FORMAT.search('foo %(name)#d')
assert catalog.PYTHON_FORMAT.search('foo %(name)-4.4hs')
assert catalog.PYTHON_FORMAT.search('foo %(name)*.3f')
assert catalog.PYTHON_FORMAT.search('foo %(name).*f')
assert catalog.PYTHON_FORMAT.search('foo %(name)3.*f')
assert catalog.PYTHON_FORMAT.search('foo %(name)*.*f')
assert catalog.PYTHON_FORMAT.search('foo %()s')

def test_translator_comments(self):
mess = catalog.Message('foo', user_comments=['Comment About `foo`'])
self.assertEqual(mess.user_comments, ['Comment About `foo`'])
Expand Down
Loading

0 comments on commit 5c20283

Please sign in to comment.