From 596f42c947d17a2897b9694461bccda4164e2429 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Thu, 6 May 2021 21:32:05 +0200 Subject: [PATCH] Fixes issue #130 Adds support for OSC hyperlink sequences. --- ansi2html/converter.py | 101 +++++++++++++++++++++++++++++----------- tests/test_ansi2html.py | 12 +++++ 2 files changed, 86 insertions(+), 27 deletions(-) diff --git a/ansi2html/converter.py b/ansi2html/converter.py index 347c51d..c4a9970 100644 --- a/ansi2html/converter.py +++ b/ansi2html/converter.py @@ -84,7 +84,7 @@ \\usepackage{fancyvrb} \\usepackage[usenames,dvipsnames]{xcolor} %% \\definecolor{red-sd}{HTML}{7ed2d2} - +%(hyperref)s \\title{%(title)s} \\fvset{commandchars=\\\\\\{\\}} @@ -116,6 +116,7 @@ class _State: def __init__(self): + self.inside_span = False self.reset() def reset(self): @@ -220,15 +221,10 @@ def append_color_unless_default( return css_classes -def linkify(line, latex_mode): - url_matcher = re.compile( - r"(((((https?|ftps?|gopher|telnet|nntp)://)|" - r"(mailto:|news:))(%[0-9A-Fa-f]{2}|[-()_.!~*" - r"\';/?:@&=+$,A-Za-z0-9])+)([).!\';/?:,][\s])?)" - ) - if latex_mode: - return url_matcher.sub(r"\\url{\1}", line) - return url_matcher.sub(r'\1', line) +class OSC_Link: + def __init__(self, url, text): + self.url = url + self.text = text def map_vt100_box_code(char): @@ -282,6 +278,7 @@ def __init__( self.scheme = scheme self.title = title self._attrs = None + self.hyperref = False if inline: self.styles = dict( @@ -293,6 +290,27 @@ def __init__( self.vt100_box_codes_prog = re.compile("\033\\(([B0])") self.ansi_codes_prog = re.compile("\033\\[" "([\\d;]*)" "([a-zA-z])") + self.url_matcher = re.compile( + r"(((((https?|ftps?|gopher|telnet|nntp)://)|" + r"(mailto:|news:))(%[0-9A-Fa-f]{2}|[-()_.!~*" + r"\';/?:@&=+$,A-Za-z0-9])+)([).!\';/?:,][\s])?)" + ) + self.osc_link_re = re.compile("\033\\]8;;(.*?)\007(.*?)\033\\]8;;\007") + + def do_linkify(self, line): + if not isinstance(line, str): + return line # If line is an object, e.g. OSC_Link, it + # will be expanded to a string later + if self.latex: + return self.url_matcher.sub(r"\\url{\1}", line) + return self.url_matcher.sub(r'\1', line) + + def handle_osc_links(self, part): + if self.latex: + self.hyperref = True + return """\\href{%s}{%s}""" % (part.url, part.text) + else: + return """%s""" % (part.url, part.text) def apply_regex(self, ansi): styles_used = set() @@ -300,11 +318,20 @@ def apply_regex(self, ansi): parts = self._collapse_cursor(parts) parts = list(parts) - if self.linkify: - parts = [linkify(part, self.latex) for part in parts] + def _check_links(parts): + for part in parts: + if isinstance(part, str): + if self.linkify: + yield self.do_linkify(part) + else: + yield part + elif isinstance(part, OSC_Link): + yield self.handle_osc_links(part) + else: + yield part + parts = list(_check_links(parts)) combined = "".join(parts) - if self.markup_lines and not self.latex: combined = "\n".join( [ @@ -312,7 +339,6 @@ def apply_regex(self, ansi): for i, line in enumerate(combined.split("\n")) ] ) - return combined, styles_used def _apply_regex(self, ansi, styles_used): @@ -347,9 +373,36 @@ def _vt100_box_drawing(): yield ansi[last_end:] ansi = "".join(_vt100_box_drawing()) + def _osc_link(ansi): + last_end = 0 + for match in self.osc_link_re.finditer(ansi): + trailer = ansi[last_end : match.start()] + yield trailer + url = match.groups()[0] + text = match.groups()[1] + yield OSC_Link(url, text) + last_end = match.end() + yield ansi[last_end:] state = _State() - inside_span = False + for part in _osc_link(ansi): + if isinstance(part, OSC_Link): + yield part + else: + if ((sys.version_info.major == 3 and sys.version_info.minor >= 3) + or sys.version_info.major > 3): + # yield from requires python >= 3.3 + yield from self._handle_ansi_code(part, styles_used, state) + else: + for sub_part in self._handle_ansi_code(part, styles_used, state): + yield sub_part + if state.inside_span: + if self.latex: + yield "}" + else: + yield "" + + def _handle_ansi_code(self, ansi, styles_used, state): last_end = 0 # the index of the last end of a code we've seen for match in self.ansi_codes_prog.finditer(ansi): yield ansi[last_end : match.start()] @@ -385,8 +438,8 @@ def _vt100_box_drawing(): # Process reset marker, drop everything before if last_null_index is not None: params = params[last_null_index + 1 :] - if inside_span: - inside_span = False + if state.inside_span: + state.inside_span = False if self.latex: yield "}" else: @@ -412,12 +465,12 @@ def _vt100_box_drawing(): parameter = None state.adjust(v, parameter=parameter) - if inside_span: + if state.inside_span: if self.latex: yield "}" else: yield "" - inside_span = False + state.inside_span = False css_classes = state.to_css_classes() if not css_classes: @@ -444,15 +497,8 @@ def _vt100_box_drawing(): yield "\\textcolor{%s}{" % " ".join(css_classes) else: yield '' % " ".join(css_classes) - inside_span = True - + state.inside_span = True yield ansi[last_end:] - if inside_span: - if self.latex: - yield "}" - else: - yield "" - inside_span = False def _collapse_cursor(self, parts): """Act on any CursorMoveUp commands by deleting preceding tokens""" @@ -523,6 +569,7 @@ def convert(self, ansi, full=True, ensure_trailing_newline=False): "font_size": self.font_size, "content": attrs["body"], "output_encoding": self.output_encoding, + "hyperref" : "\\usepackage{hyperref}" if self.hyperref else "" } def produce_headers(self): diff --git a/tests/test_ansi2html.py b/tests/test_ansi2html.py index b863a93..7b16321 100644 --- a/tests/test_ansi2html.py +++ b/tests/test_ansi2html.py @@ -63,6 +63,18 @@ def test_not_linkify(self): html = Ansi2HTMLConverter().convert(ansi) assert target not in html + def test_osc_link(self): + ansi = "[\x1b[01;35m\x1b[K\x1b]8;;https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wtype-limits\x07-Wtype-limits\x1b]8;;\x07\x1b[m\x1b[K]\n" + target = '[-Wtype-limits]' + html = Ansi2HTMLConverter().convert(ansi) + assert target in html + + def test_osc_link_latex(self): + ansi = "[\x1b[01;35m\x1b[K\x1b]8;;https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wtype-limits\x07-Wtype-limits\x1b]8;;\x07\x1b[m\x1b[K]\n" + target = '[\\textcolor{ansi1 ansi35}{\\href{https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wtype-limits}{-Wtype-limits}}]' + html = Ansi2HTMLConverter(latex=True).convert(ansi) + assert target in html + def test_conversion(self): for input_filename, expected_output_filename in ( ("ansicolor.txt", "ansicolor.html"),