Skip to content

Commit

Permalink
[GR-45840] Deal with encoding errors during printing
Browse files Browse the repository at this point in the history
PullRequest: mx/1783
  • Loading branch information
patrick96 committed Apr 29, 2024
2 parents 923a35e + f2ae987 commit 1a03439
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 15 deletions.
3 changes: 3 additions & 0 deletions ci/common.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ local common_json = import "../common.json";
mx: {
environment+: {
MX_PYTHON: "python3.8",
PYTHONIOENCODING: "utf-8",
},
packages+: {
python3: "==3.8.10",
Expand All @@ -145,6 +146,8 @@ local common_json = import "../common.json";
"Dumping debug output to '(?P<filename>[^']+)'",
# Keep in sync with com.oracle.svm.hosted.NativeImageOptions#DEFAULT_ERROR_FILE_NAME
" (?P<filename>.+/svm_err_b_\\d+T\\d+\\.\\d+_pid\\d+\\.md)",
# Keep in sync with jdk.graal.compiler.test.SubprocessUtil#makeArgfile
" @(?P<filename>.*SubprocessUtil.*\\.argfile)",
],
},

Expand Down
9 changes: 7 additions & 2 deletions src/mx/_impl/mx.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@
"current_mx_command",
"main",
"version",
"get_tools_jdk",
"JavaCompliance", # Re-export from mx_javacompliance
"webserver", # Re-export from mx_webserver
]
Expand Down Expand Up @@ -365,7 +366,8 @@
from .build.daemon import Daemon
from .support.comparable import compare, Comparable
from .support.envvars import env_var_to_bool, get_env
from .support.logging import abort, abort_or_warn, colorize, log, logv, logvv, log_error, nyi, warn
from .support.logging import abort, abort_or_warn, colorize, log, logv, logvv, log_error, nyi, warn, \
_check_stdout_encoding
from .support.options import _opts, _opts_parsed_deferrables
from .support.path import _safe_path, lstat
from .support.processes import _addSubprocess, _check_output_str, _currentSubprocesses, _is_process_alive, _kill_process, _removeSubprocess, _waitWithTimeout, waitOn
Expand Down Expand Up @@ -17935,6 +17937,9 @@ def main():
_opts.__dict__['very_verbose'] = '-V' in sys.argv
_opts.__dict__['warn'] = '--no-warning' not in sys.argv
_opts.__dict__['quiet'] = '--quiet' in sys.argv

_check_stdout_encoding()

global _vc_systems
_vc_systems = [HgConfig(), GitConfig(), BinaryVC()]

Expand Down Expand Up @@ -18165,7 +18170,7 @@ def alarm_handler(signum, frame):
_CACHE_DIR = get_env('MX_CACHE_DIR', join(dot_mx_dir(), 'cache'))

# The version must be updated for every PR (checked in CI) and the comment should reflect the PR's issue
version = VersionSpec("7.23.0") # Add --fix-unused-imports for mx checkstyle
version = VersionSpec("7.24.0") # GR-45840 Deal with encoding errors during printing

_mx_start_datetime = datetime.utcnow()

Expand Down
79 changes: 66 additions & 13 deletions src/mx/_impl/support/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@
# ----------------------------------------------------------------------------------------------------

from __future__ import annotations
import sys, signal, threading
from typing import Any, NoReturn, Optional

from .options import _opts, _opts_parsed_deferrables

__all__ = [
"abort",
Expand All @@ -43,6 +39,13 @@
"warn",
]

import sys, signal, threading
import traceback
from typing import Any, NoReturn, Optional

from .options import _opts, _opts_parsed_deferrables


# https://en.wikipedia.org/wiki/ANSI_escape_code#Colors
_ansi_color_table = {
"black": "30",
Expand All @@ -55,6 +58,58 @@
}


def _check_stdout_encoding():
# Importing here to avoid broken circular import
from .system import is_continuous_integration

encoding = sys.stdout.encoding

if "utf" not in encoding:
msg = (
"Python's stdout does not use a unicode encoding.\n"
"This may cause encoding errors when printing special characters.\n"
"Please set up your system or console to use a unicode encoding.\n"
"When piping mx output, you can force UTF-8 encoding with the environment variable PYTHONIOENCODING=utf-8"
)
if is_continuous_integration():
abort(msg)
else:
warn(msg)


def _print_impl(msg: Optional[str] = None, end: Optional[str] = "\n", file=sys.stdout):
try:
print(msg, end=end, file=file)
except UnicodeEncodeError as e:
# In case any text is printed that can't be encoded (e.g. if output console does not use a unicode encoding),
# print an error message and try to print text as good as possible.

error_handler = "backslashreplace"
offending_str = (
e.object[e.start : e.end].encode(e.encoding, errors=error_handler).decode(e.encoding, errors="ignore")
)

# Not using our log functions here to avoid infinite recursion in case these calls cause unicode errors
def print_err(err_msg: str):
print(colorize(err_msg, color="red"), file=sys.stderr)

print_err(f"[ENCODING ERROR] {e}. Encoding: {e.encoding}. Offending characters: '{offending_str}'")

if "verbose" in _opts and _opts.verbose:
traceback.print_stack()
else:
print_err("Turn on verbose mode (-v) to see the stack trace")

print_err(f"Printing text with '{error_handler}' error handler:")

# Encode and decode with the target encoding to get a string that can be safely printed
print(
msg.encode(e.encoding, errors=error_handler).decode(e.encoding, errors="ignore"),
end=end,
file=file,
)


def log(msg: Optional[str] = None, end: Optional[str] = "\n"):
"""
Write a message to the console.
Expand All @@ -64,7 +119,7 @@ def log(msg: Optional[str] = None, end: Optional[str] = "\n"):
if vars(_opts).get("quiet"):
return
if msg is None:
print(end=end)
_print_impl(end=end)
else:
# https://docs.python.org/2/reference/simple_stmts.html#the-print-statement
# > A '\n' character is written at the end, unless the print statement
Expand All @@ -79,7 +134,7 @@ def log(msg: Optional[str] = None, end: Optional[str] = "\n"):
# instruction is omitted. By manually adding the newline to the string,
# there is only a single PRINT_ITEM instruction which is executed
# atomically, but still prints the newline.
print(str(msg), end=end)
_print_impl(str(msg), end=end)


def logv(msg: Optional[str] = None, end="\n") -> None:
Expand Down Expand Up @@ -115,19 +170,19 @@ def log_error(msg: Optional[str] = None, end="\n") -> None:
to redirect it.
"""
if msg is None:
print(file=sys.stderr, end=end)
_print_impl(file=sys.stderr, end=end)
else:
print(colorize(str(msg), stream=sys.stderr), file=sys.stderr, end=end)
_print_impl(colorize(str(msg), stream=sys.stderr), file=sys.stderr, end=end)


def log_deprecation(msg: Optional[str] = None) -> None:
"""
Write an deprecation warning to the console.
"""
if msg is None:
print(file=sys.stderr)
_print_impl(file=sys.stderr)
else:
print(colorize(str(f"[MX DEPRECATED] {msg}"), color="yellow", stream=sys.stderr), file=sys.stderr)
_print_impl(colorize(str(f"[MX DEPRECATED] {msg}"), color="yellow", stream=sys.stderr), file=sys.stderr)


def colorize(msg: Optional[str], color="red", bright=True, stream=sys.stderr) -> Optional[str]:
Expand Down Expand Up @@ -162,7 +217,7 @@ def warn(msg: str, context=None) -> None:
else:
contextMsg = str(context)
msg = contextMsg + ":\n" + msg
print(colorize("WARNING: " + msg, color="magenta", bright=True, stream=sys.stderr), file=sys.stderr)
_print_impl(colorize("WARNING: " + msg, color="magenta", bright=True, stream=sys.stderr), file=sys.stderr)


def abort(codeOrMessage: str | int, context=None, killsig=signal.SIGTERM) -> NoReturn:
Expand Down Expand Up @@ -191,8 +246,6 @@ def abort(codeOrMessage: str | int, context=None, killsig=signal.SIGTERM) -> NoR

sys.stdout.flush()
if is_continuous_integration() or (_opts and hasattr(_opts, "verbose") and _opts.verbose):
import traceback

traceback.print_stack()
if context is not None:
if callable(context):
Expand Down

0 comments on commit 1a03439

Please sign in to comment.