From 9e017d4c526f25ae0688e21b95ed9db694a351ca Mon Sep 17 00:00:00 2001 From: Tiago Nobrega Date: Thu, 18 Jan 2024 15:47:00 -0300 Subject: [PATCH] fix: handle errors when decoding bytes from subprocesses (#222) Since these bytes come from the output of unknown processes, there is no guarantee that they are valid utf-8 text. We also have no way to know what encoding they have (if any), so the best we can do is replace the invalid bytes with the usual unicode marker for that. Fixes #221 --- craft_cli/messages.py | 5 +++-- tests/unit/test_messages_stream_cm.py | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/craft_cli/messages.py b/craft_cli/messages.py index 75e199a..baffb02 100644 --- a/craft_cli/messages.py +++ b/craft_cli/messages.py @@ -239,8 +239,9 @@ def _write(self, data: bytes) -> None: useful_line = data[pointer:newline_position] pointer = newline_position + 1 - # write the useful line to intended outputs - unicode_line = useful_line.decode("utf8") + # write the useful line to intended outputs. Decode with errors="replace" + # here because we don't know where this line is coming from. + unicode_line = useful_line.decode("utf8", errors="replace") # replace tabs with a set number of spaces so that the printer # can correctly count the characters. unicode_line = unicode_line.replace("\t", " ") diff --git a/tests/unit/test_messages_stream_cm.py b/tests/unit/test_messages_stream_cm.py index b1ffcb5..dbba643 100644 --- a/tests/unit/test_messages_stream_cm.py +++ b/tests/unit/test_messages_stream_cm.py @@ -257,6 +257,24 @@ def test_pipereader_tabs(recording_printer, stream): assert msg.text == ":: 123 456" # tabs expanded into 2 spaces +@pytest.mark.parametrize( + ("invalid_text", "expected"), + [(b"\xf0\x28\x8c\xbc", "�(��"), (b"\xf0\x90\x28\xbc", "�(�"), (b"\xf0\x90\x8c\x28", "�(")], +) +def test_pipereader_invalid_utf8(recording_printer, invalid_text, expected): + """Check that bytes that aren't valid utf-8 text don't crash.""" + invalid_bytes = b"valid prefix " + invalid_text + b" valid suffix\n" + + flags = {"use_timestamp": False, "ephemeral": False, "end_line": True} + prt = _PipeReaderThread(recording_printer, sys.stdout, flags) + prt.start() + os.write(prt.write_pipe, invalid_bytes) + prt.stop() + + (msg,) = recording_printer.written_terminal_lines + assert msg.text == f":: valid prefix {expected} valid suffix" + + def test_pipereader_chunk_assembler(recording_printer, monkeypatch): """Converts ok arbitrary chunks to lines.""" monkeypatch.setattr(messages, "_PIPE_READER_CHUNK_SIZE", 5)