From 75e045096b2580ab22d6a3b7530622ce8c96d98c Mon Sep 17 00:00:00 2001 From: Patrick Ziegler Date: Tue, 9 Apr 2024 18:31:37 +0200 Subject: [PATCH 1/6] [GR-45840] Deal with encoding errors during printing --- src/mx/_impl/support/logging.py | 48 ++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/src/mx/_impl/support/logging.py b/src/mx/_impl/support/logging.py index a194588a..a128abde 100644 --- a/src/mx/_impl/support/logging.py +++ b/src/mx/_impl/support/logging.py @@ -26,6 +26,7 @@ from __future__ import annotations import sys, signal, threading +import traceback from typing import Any, NoReturn, Optional from .options import _opts, _opts_parsed_deferrables @@ -55,6 +56,39 @@ } +def _print_impl(msg: Optional[str] = None, end: Optional[str] = "\n", file=sys.stdout): + try: + print(msg, end=end, file=file) + except UnicodeEncodeError as e: + # In case any text is printed that can't be encoded (e.g. if output console does not use a unicode encoding), + # print an error message and try to print text as good as possible. + + error_handler = "backslashreplace" + offending_str = ( + e.object[e.start : e.end].encode(e.encoding, errors=error_handler).decode(e.encoding, errors="ignore") + ) + + # Not using our log functions here to avoid infinite recursion in case these calls cause unicode errors + def print_err(err_msg: str): + print(colorize(err_msg, color="red"), file=sys.stderr) + + print_err(f"[ENCODING ERROR] {e}. Encoding: {e.encoding}. Offending characters: '{offending_str}'") + + if "verbose" in _opts and _opts.verbose: + traceback.print_stack() + else: + print_err("Turn on verbose mode (-v) to see the stack trace") + + print_err(f"Printing text with '{error_handler}' error handler:") + + # Encode and decode with the target encoding to get a string that can be safely printed + print( + msg.encode(e.encoding, errors=error_handler).decode(e.encoding, errors="ignore"), + end=end, + file=file, + ) + + def log(msg: Optional[str] = None, end: Optional[str] = "\n"): """ Write a message to the console. @@ -64,7 +98,7 @@ def log(msg: Optional[str] = None, end: Optional[str] = "\n"): if vars(_opts).get("quiet"): return if msg is None: - print(end=end) + _print_impl(end=end) else: # https://docs.python.org/2/reference/simple_stmts.html#the-print-statement # > A '\n' character is written at the end, unless the print statement @@ -79,7 +113,7 @@ def log(msg: Optional[str] = None, end: Optional[str] = "\n"): # instruction is omitted. By manually adding the newline to the string, # there is only a single PRINT_ITEM instruction which is executed # atomically, but still prints the newline. - print(str(msg), end=end) + _print_impl(str(msg), end=end) def logv(msg: Optional[str] = None, end="\n") -> None: @@ -115,9 +149,9 @@ def log_error(msg: Optional[str] = None, end="\n") -> None: to redirect it. """ if msg is None: - print(file=sys.stderr, end=end) + _print_impl(file=sys.stderr, end=end) else: - print(colorize(str(msg), stream=sys.stderr), file=sys.stderr, end=end) + _print_impl(colorize(str(msg), stream=sys.stderr), file=sys.stderr, end=end) def log_deprecation(msg: Optional[str] = None) -> None: @@ -125,9 +159,9 @@ def log_deprecation(msg: Optional[str] = None) -> None: Write an deprecation warning to the console. """ if msg is None: - print(file=sys.stderr) + _print_impl(file=sys.stderr) else: - print(colorize(str(f"[MX DEPRECATED] {msg}"), color="yellow", stream=sys.stderr), file=sys.stderr) + _print_impl(colorize(str(f"[MX DEPRECATED] {msg}"), color="yellow", stream=sys.stderr), file=sys.stderr) def colorize(msg: Optional[str], color="red", bright=True, stream=sys.stderr) -> Optional[str]: @@ -162,7 +196,7 @@ def warn(msg: str, context=None) -> None: else: contextMsg = str(context) msg = contextMsg + ":\n" + msg - print(colorize("WARNING: " + msg, color="magenta", bright=True, stream=sys.stderr), file=sys.stderr) + _print_impl(colorize("WARNING: " + msg, color="magenta", bright=True, stream=sys.stderr), file=sys.stderr) def abort(codeOrMessage: str | int, context=None, killsig=signal.SIGTERM) -> NoReturn: From b6625a410340c52c79d3006fd0fbcaf16f1aa608 Mon Sep 17 00:00:00 2001 From: Patrick Ziegler Date: Tue, 9 Apr 2024 18:42:45 +0200 Subject: [PATCH 2/6] Check for non-unicode encoding at startup In the CI, this will fail --- src/mx/_impl/mx.py | 6 +++++- src/mx/_impl/support/logging.py | 31 ++++++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/src/mx/_impl/mx.py b/src/mx/_impl/mx.py index 27aad283..2013a7ad 100755 --- a/src/mx/_impl/mx.py +++ b/src/mx/_impl/mx.py @@ -365,7 +365,8 @@ from .build.daemon import Daemon from .support.comparable import compare, Comparable from .support.envvars import env_var_to_bool, get_env -from .support.logging import abort, abort_or_warn, colorize, log, logv, logvv, log_error, nyi, warn +from .support.logging import abort, abort_or_warn, colorize, log, logv, logvv, log_error, nyi, warn, \ + _check_stdout_encoding from .support.options import _opts, _opts_parsed_deferrables from .support.path import _safe_path, lstat from .support.processes import _addSubprocess, _check_output_str, _currentSubprocesses, _is_process_alive, _kill_process, _removeSubprocess, _waitWithTimeout, waitOn @@ -17935,6 +17936,9 @@ def main(): _opts.__dict__['very_verbose'] = '-V' in sys.argv _opts.__dict__['warn'] = '--no-warning' not in sys.argv _opts.__dict__['quiet'] = '--quiet' in sys.argv + + _check_stdout_encoding() + global _vc_systems _vc_systems = [HgConfig(), GitConfig(), BinaryVC()] diff --git a/src/mx/_impl/support/logging.py b/src/mx/_impl/support/logging.py index a128abde..64093f49 100644 --- a/src/mx/_impl/support/logging.py +++ b/src/mx/_impl/support/logging.py @@ -25,11 +25,6 @@ # ---------------------------------------------------------------------------------------------------- from __future__ import annotations -import sys, signal, threading -import traceback -from typing import Any, NoReturn, Optional - -from .options import _opts, _opts_parsed_deferrables __all__ = [ "abort", @@ -44,6 +39,13 @@ "warn", ] +import sys, signal, threading +import traceback +from typing import Any, NoReturn, Optional + +from .options import _opts, _opts_parsed_deferrables + + # https://en.wikipedia.org/wiki/ANSI_escape_code#Colors _ansi_color_table = { "black": "30", @@ -56,6 +58,25 @@ } +def _check_stdout_encoding(): + # Importing here to avoid broken circular import + from .system import is_continuous_integration + + encoding = sys.stdout.encoding + + if "utf" not in encoding: + msg = ( + "Python's stdout does not use a unicode encoding.\n" + "This may cause encoding errors when printing special characters.\n" + "Please set up your system or console to use a unicode encoding.\n" + "When piping mx output, you can force UTF-8 encoding with the environment variable PYTHONIOENCODING=utf-8" + ) + if is_continuous_integration(): + abort(msg) + else: + warn(msg) + + def _print_impl(msg: Optional[str] = None, end: Optional[str] = "\n", file=sys.stdout): try: print(msg, end=end, file=file) From eff65cf5bc130ba59f2c97fe519d8e35a1d1f122 Mon Sep 17 00:00:00 2001 From: Patrick Ziegler Date: Mon, 29 Apr 2024 10:42:33 +0200 Subject: [PATCH 3/6] Export get_tools_jdk Is used in existing suite code --- src/mx/_impl/mx.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mx/_impl/mx.py b/src/mx/_impl/mx.py index 2013a7ad..5da87696 100755 --- a/src/mx/_impl/mx.py +++ b/src/mx/_impl/mx.py @@ -310,6 +310,7 @@ "current_mx_command", "main", "version", + "get_tools_jdk", "JavaCompliance", # Re-export from mx_javacompliance "webserver", # Re-export from mx_webserver ] From c2e034a2a70e67e4648a5b5e115fc43a72f68a2d Mon Sep 17 00:00:00 2001 From: Patrick Ziegler Date: Mon, 29 Apr 2024 10:46:36 +0200 Subject: [PATCH 4/6] Bump mx version --- src/mx/_impl/mx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mx/_impl/mx.py b/src/mx/_impl/mx.py index 5da87696..902bd4db 100755 --- a/src/mx/_impl/mx.py +++ b/src/mx/_impl/mx.py @@ -18170,7 +18170,7 @@ def alarm_handler(signum, frame): _CACHE_DIR = get_env('MX_CACHE_DIR', join(dot_mx_dir(), 'cache')) # The version must be updated for every PR (checked in CI) and the comment should reflect the PR's issue -version = VersionSpec("7.23.0") # Add --fix-unused-imports for mx checkstyle +version = VersionSpec("7.24.0") # GR-45840 Deal with encoding errors during printing _mx_start_datetime = datetime.utcnow() From a925edc39406744a4d054b064e5c72d2ff44ef23 Mon Sep 17 00:00:00 2001 From: Patrick Ziegler Date: Mon, 29 Apr 2024 15:26:47 +0200 Subject: [PATCH 5/6] Bump ci/common.jsonnet --- ci/common.jsonnet | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ci/common.jsonnet b/ci/common.jsonnet index ac61717e..49d2b5e9 100644 --- a/ci/common.jsonnet +++ b/ci/common.jsonnet @@ -128,6 +128,7 @@ local common_json = import "../common.json"; mx: { environment+: { MX_PYTHON: "python3.8", + PYTHONIOENCODING: "utf-8", }, packages+: { python3: "==3.8.10", @@ -145,6 +146,8 @@ local common_json = import "../common.json"; "Dumping debug output to '(?P[^']+)'", # Keep in sync with com.oracle.svm.hosted.NativeImageOptions#DEFAULT_ERROR_FILE_NAME " (?P.+/svm_err_b_\\d+T\\d+\\.\\d+_pid\\d+\\.md)", + # Keep in sync with jdk.graal.compiler.test.SubprocessUtil#makeArgfile + " @(?P.*SubprocessUtil.*\\.argfile)", ], }, From f2ae9874b2b8b2bfa9d78d599f9838187c96137a Mon Sep 17 00:00:00 2001 From: Patrick Ziegler Date: Mon, 29 Apr 2024 16:44:08 +0200 Subject: [PATCH 6/6] Fix pylint --- src/mx/_impl/support/logging.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/mx/_impl/support/logging.py b/src/mx/_impl/support/logging.py index 64093f49..7abe85b3 100644 --- a/src/mx/_impl/support/logging.py +++ b/src/mx/_impl/support/logging.py @@ -246,8 +246,6 @@ def abort(codeOrMessage: str | int, context=None, killsig=signal.SIGTERM) -> NoR sys.stdout.flush() if is_continuous_integration() or (_opts and hasattr(_opts, "verbose") and _opts.verbose): - import traceback - traceback.print_stack() if context is not None: if callable(context):