Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds support for OSC hyperlink sequences. #131

Merged
merged 4 commits into from
Sep 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 68 additions & 27 deletions ansi2html/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@
\\usepackage{fancyvrb}
\\usepackage[usenames,dvipsnames]{xcolor}
%% \\definecolor{red-sd}{HTML}{7ed2d2}

%(hyperref)s
\\title{%(title)s}

\\fvset{commandchars=\\\\\\{\\}}
Expand Down Expand Up @@ -116,6 +116,7 @@

class _State:
def __init__(self):
self.inside_span = False
self.reset()

def reset(self):
Expand Down Expand Up @@ -220,15 +221,10 @@ def append_color_unless_default(
return css_classes


def linkify(line, latex_mode):
url_matcher = re.compile(
r"(((((https?|ftps?|gopher|telnet|nntp)://)|"
r"(mailto:|news:))(%[0-9A-Fa-f]{2}|[-()_.!~*"
r"\';/?:@&=+$,A-Za-z0-9])+)([).!\';/?:,][\s])?)"
)
if latex_mode:
return url_matcher.sub(r"\\url{\1}", line)
return url_matcher.sub(r'<a href="\1">\1</a>', line)
class OSC_Link:
def __init__(self, url, text):
self.url = url
self.text = text


def map_vt100_box_code(char):
Expand Down Expand Up @@ -282,6 +278,7 @@ def __init__(
self.scheme = scheme
self.title = title
self._attrs = None
self.hyperref = False

if inline:
self.styles = dict(
Expand All @@ -293,26 +290,54 @@ def __init__(

self.vt100_box_codes_prog = re.compile("\033\\(([B0])")
self.ansi_codes_prog = re.compile("\033\\[" "([\\d;]*)" "([a-zA-z])")
self.url_matcher = re.compile(
r"(((((https?|ftps?|gopher|telnet|nntp)://)|"
r"(mailto:|news:))(%[0-9A-Fa-f]{2}|[-()_.!~*"
r"\';/?:@&=+$,A-Za-z0-9])+)([).!\';/?:,][\s])?)"
)
self.osc_link_re = re.compile("\033\\]8;;(.*?)\007(.*?)\033\\]8;;\007")

def do_linkify(self, line):
if not isinstance(line, str):
return line # If line is an object, e.g. OSC_Link, it
# will be expanded to a string later
if self.latex:
return self.url_matcher.sub(r"\\url{\1}", line)
return self.url_matcher.sub(r'<a href="\1">\1</a>', line)

def handle_osc_links(self, part):
if self.latex:
self.hyperref = True
return """\\href{%s}{%s}""" % (part.url, part.text)
return """<a href="%s">%s</a>""" % (part.url, part.text)

def apply_regex(self, ansi):
styles_used = set()
parts = self._apply_regex(ansi, styles_used)
parts = self._collapse_cursor(parts)
parts = list(parts)

if self.linkify:
parts = [linkify(part, self.latex) for part in parts]
def _check_links(parts):
for part in parts:
if isinstance(part, str):
if self.linkify:
yield self.do_linkify(part)
else:
yield part
elif isinstance(part, OSC_Link):
yield self.handle_osc_links(part)
else:
yield part

parts = list(_check_links(parts))
combined = "".join(parts)

if self.markup_lines and not self.latex:
combined = "\n".join(
[
"""<span id="line-%i">%s</span>""" % (i, line)
for i, line in enumerate(combined.split("\n"))
]
)

return combined, styles_used

def _apply_regex(self, ansi, styles_used):
Expand Down Expand Up @@ -348,8 +373,30 @@ def _vt100_box_drawing():

ansi = "".join(_vt100_box_drawing())

def _osc_link(ansi):
last_end = 0
for match in self.osc_link_re.finditer(ansi):
trailer = ansi[last_end : match.start()]
yield trailer
url = match.groups()[0]
text = match.groups()[1]
yield OSC_Link(url, text)
last_end = match.end()
yield ansi[last_end:]

state = _State()
inside_span = False
for part in _osc_link(ansi):
if isinstance(part, OSC_Link):
yield part
else:
yield from self._handle_ansi_code(part, styles_used, state)
if state.inside_span:
if self.latex:
yield "}"
else:
yield "</span>"

def _handle_ansi_code(self, ansi, styles_used, state):
last_end = 0 # the index of the last end of a code we've seen
for match in self.ansi_codes_prog.finditer(ansi):
yield ansi[last_end : match.start()]
Expand Down Expand Up @@ -385,8 +432,8 @@ def _vt100_box_drawing():
# Process reset marker, drop everything before
if last_null_index is not None:
params = params[last_null_index + 1 :]
if inside_span:
inside_span = False
if state.inside_span:
state.inside_span = False
if self.latex:
yield "}"
else:
Expand All @@ -412,12 +459,12 @@ def _vt100_box_drawing():
parameter = None
state.adjust(v, parameter=parameter)

if inside_span:
if state.inside_span:
if self.latex:
yield "}"
else:
yield "</span>"
inside_span = False
state.inside_span = False

css_classes = state.to_css_classes()
if not css_classes:
Expand All @@ -444,15 +491,8 @@ def _vt100_box_drawing():
yield "\\textcolor{%s}{" % " ".join(css_classes)
else:
yield '<span class="%s">' % " ".join(css_classes)
inside_span = True

state.inside_span = True
yield ansi[last_end:]
if inside_span:
if self.latex:
yield "}"
else:
yield "</span>"
inside_span = False

def _collapse_cursor(self, parts):
"""Act on any CursorMoveUp commands by deleting preceding tokens"""
Expand Down Expand Up @@ -523,6 +563,7 @@ def convert(self, ansi, full=True, ensure_trailing_newline=False):
"font_size": self.font_size,
"content": attrs["body"],
"output_encoding": self.output_encoding,
"hyperref": "\\usepackage{hyperref}" if self.hyperref else "",
}

def produce_headers(self):
Expand Down
12 changes: 12 additions & 0 deletions tests/test_ansi2html.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,18 @@ def test_not_linkify(self):
html = Ansi2HTMLConverter().convert(ansi)
assert target not in html

def test_osc_link(self):
ansi = "[\x1b[01;35m\x1b[K\x1b]8;;https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wtype-limits\x07-Wtype-limits\x1b]8;;\x07\x1b[m\x1b[K]\n"
target = '[<span class="ansi1 ansi35"><a href="https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wtype-limits">-Wtype-limits</a></span>]'
html = Ansi2HTMLConverter().convert(ansi)
assert target in html

def test_osc_link_latex(self):
ansi = "[\x1b[01;35m\x1b[K\x1b]8;;https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wtype-limits\x07-Wtype-limits\x1b]8;;\x07\x1b[m\x1b[K]\n"
target = "[\\textcolor{ansi1 ansi35}{\\href{https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wtype-limits}{-Wtype-limits}}]"
html = Ansi2HTMLConverter(latex=True).convert(ansi)
assert target in html

def test_conversion(self):
for input_filename, expected_output_filename in (
("ansicolor.txt", "ansicolor.html"),
Expand Down