Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enclose white spaces in references #1105

Merged
merged 4 commits into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 67 additions & 2 deletions babel/messages/pofile.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,50 @@
return unescape(string)


def _extract_locations(line: str) -> list[str]:
"""Extract locations from location comments.

Locations are extracted while properly handling First Strong
Isolate (U+2068) and Pop Directional Isolate (U+2069), used by
gettext to enclose filenames with spaces and tabs in their names.
"""
if "\u2068" not in line and "\u2069" not in line:
akx marked this conversation as resolved.
Show resolved Hide resolved
return line.lstrip().split()

locations = []
akx marked this conversation as resolved.
Show resolved Hide resolved
location = ""
in_filename = False
for c in line:
if c == "\u2068":
if in_filename:
raise ValueError("location comment contains more First Strong Isolate "
"characters, than Pop Directional Isolate characters")
in_filename = True
continue
elif c == "\u2069":
if not in_filename:
raise ValueError("location comment contains more Pop Directional Isolate "
"characters, than First Strong Isolate characters")
in_filename = False
continue
elif c == " ":
if in_filename:
location += c
elif location:
locations.append(location)
location = ""
else:
location += c
else:
if location:
if in_filename:
raise ValueError("location comment contains more First Strong Isolate "
"characters, than Pop Directional Isolate characters")
locations.append(location)

return locations


class PoFileError(Exception):
"""Exception thrown by PoParser when an invalid po file is encountered."""

Expand Down Expand Up @@ -269,7 +313,7 @@
self._finish_current_message()

if line[1:].startswith(':'):
for location in line[2:].lstrip().split():
for location in _extract_locations(line[2:]):
pos = location.rfind(':')
if pos >= 0:
try:
Expand Down Expand Up @@ -307,7 +351,10 @@
if line[1:].startswith('~'):
self._process_message_line(lineno, line[2:].lstrip(), obsolete=True)
else:
self._process_comment(line)
try:
self._process_comment(line)
except ValueError as exc:
self._invalid_pofile(line, lineno, str(exc))

Check warning on line 357 in babel/messages/pofile.py

View check run for this annotation

Codecov / codecov/patch

babel/messages/pofile.py#L356-L357

Added lines #L356 - L357 were not covered by tests
else:
self._process_message_line(lineno, line)

Expand Down Expand Up @@ -474,6 +521,23 @@
return '""\n' + '\n'.join([(prefix + escape(line)) for line in lines])


def _enclose_filename_if_necessary(filename: str) -> str:
"""Enclose filenames which include white spaces or tabs.

Do the same as gettext and enclose filenames which contain white
spaces or tabs with First Strong Isolate (U+2068) and Pop
Directional Isolate (U+2069).
"""
if " " not in filename and "\t" not in filename:
return filename

if not filename.startswith("\u2068"):
filename = "\u2068" + filename
if not filename.endswith("\u2069"):
filename += "\u2069"
return filename


def write_po(
fileobj: SupportsWrite[bytes],
catalog: Catalog,
Expand Down Expand Up @@ -626,6 +690,7 @@

for filename, lineno in locations:
location = filename.replace(os.sep, '/')
location = _enclose_filename_if_necessary(location)
if lineno and include_lineno:
location = f"{location}:{lineno:d}"
if location not in locs:
Expand Down
112 changes: 112 additions & 0 deletions tests/messages/test_pofile.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from babel.core import Locale
from babel.messages import pofile
from babel.messages.catalog import Catalog, Message
from babel.messages.pofile import _enclose_filename_if_necessary, _extract_locations
from babel.util import FixedOffsetTimezone


Expand Down Expand Up @@ -438,6 +439,19 @@ def test_missing_plural_in_the_middle(self):
assert message.string[1] == ''
assert message.string[2] == 'Vohs [text]'

def test_with_location(self):
buf = StringIO('''\
#: main.py:1 \u2068filename with whitespace.py\u2069:123
msgid "foo"
msgstr "bar"
''')
catalog = pofile.read_po(buf, locale='de_DE')
assert len(catalog) == 1
message = catalog['foo']
assert message.string == 'bar'
assert message.locations == [("main.py", 1), ("filename with whitespace.py", 123)]


def test_abort_invalid_po_file(self):
invalid_po = '''
msgctxt ""
Expand Down Expand Up @@ -841,6 +855,59 @@ def test_no_include_lineno(self):
msgid "foo"
msgstr ""'''

def test_white_space_in_location(self):
catalog = Catalog()
catalog.add('foo', locations=[('main.py', 1)])
catalog.add('foo', locations=[('utils b.py', 3)])
buf = BytesIO()
pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3
msgid "foo"
msgstr ""'''

def test_white_space_in_location_already_enclosed(self):
catalog = Catalog()
catalog.add('foo', locations=[('main.py', 1)])
catalog.add('foo', locations=[('\u2068utils b.py\u2069', 3)])
buf = BytesIO()
pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3
msgid "foo"
msgstr ""'''

def test_tab_in_location(self):
catalog = Catalog()
catalog.add('foo', locations=[('main.py', 1)])
catalog.add('foo', locations=[('utils\tb.py', 3)])
buf = BytesIO()
pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3
msgid "foo"
msgstr ""'''

def test_tab_in_location_already_enclosed(self):
catalog = Catalog()
catalog.add('foo', locations=[('main.py', 1)])
catalog.add('foo', locations=[('\u2068utils\tb.py\u2069', 3)])
buf = BytesIO()
pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3
msgid "foo"
msgstr ""'''


class RoundtripPoTestCase(unittest.TestCase):

def test_enclosed_filenames_in_location_comment(self):
catalog = Catalog()
catalog.add("foo", lineno=2, locations=[("main 1.py", 1)], string="")
catalog.add("bar", lineno=6, locations=[("other.py", 2)], string="")
catalog.add("baz", lineno=10, locations=[("main 1.py", 3), ("other.py", 4)], string="")
buf = BytesIO()
pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
buf.seek(0)
catalog2 = pofile.read_po(buf)
assert True is catalog.is_identical(catalog2)

class PofileFunctionsTestCase(unittest.TestCase):

Expand All @@ -864,6 +931,51 @@ def test_denormalize_on_msgstr_without_empty_first_line(self):
assert expected_denormalized == pofile.denormalize(f'""\n{msgstr}')


@pytest.mark.parametrize(("line", "locations"), [
("\u2068file1.po\u2069", ["file1.po"]),
("file1.po \u2068file 2.po\u2069 file3.po", ["file1.po", "file 2.po", "file3.po"]),
("file1.po:1 \u2068file 2.po\u2069:2 file3.po:3", ["file1.po:1", "file 2.po:2", "file3.po:3"]),
("\u2068file1.po\u2069:1 \u2068file\t2.po\u2069:2 file3.po:3",
["file1.po:1", "file\t2.po:2", "file3.po:3"]),
("file1.po file2.po", ["file1.po", "file2.po"]),
("file1.po \u2068\u2069 file2.po", ["file1.po", "file2.po"]),
])
def test_extract_locations_valid_location_comment(line, locations):
assert locations == _extract_locations(line)


@pytest.mark.parametrize(("line",), [
("\u2068file 1.po",),
("file 1.po\u2069",),
("\u2069file 1.po\u2068",),
("\u2068file 1.po:1 \u2068file 2.po\u2069:2",),
("\u2068file 1.po\u2069:1 file 2.po\u2069:2",),
])
def test_extract_locations_invalid_location_comment(line):
with pytest.raises(ValueError):
_extract_locations(line)


@pytest.mark.parametrize(("filename",), [
("file.po",),
("file_a.po",),
("file-a.po",),
("file\n.po",),
("\u2068file.po\u2069",),
("\u2068file a.po\u2069",),
])
def test_enclose_filename_if_necessary_no_change(filename):
assert filename == _enclose_filename_if_necessary(filename)


@pytest.mark.parametrize(("filename",), [
("file a.po",),
("file\ta.po",),
])
def test_enclose_filename_if_necessary_enclosed(filename):
assert "\u2068" + filename + "\u2069" == _enclose_filename_if_necessary(filename)


def test_unknown_language_roundtrip():
buf = StringIO(r'''
msgid ""
Expand Down