diff --git a/TexSoup/reader.py b/TexSoup/reader.py index 11cdd75..304b557 100644 --- a/TexSoup/reader.py +++ b/TexSoup/reader.py @@ -7,20 +7,23 @@ from TexSoup.tokens import ( TC, tokenize, - SKIP_ENVS, + SKIP_ENV_NAMES, + MATH_ENV_NAMES, ) import functools import string import sys -MATH_ENVS = ( +MODE_MATH = 'mode:math' +MODE_NON_MATH = 'mode:non-math' +MATH_SIMPLE_ENVS = ( TexDisplayMathModeEnv, TexMathModeEnv, TexDisplayMathEnv, TexMathEnv ) -MATH_TOKEN_TO_ENV = {env.token_begin: env for env in MATH_ENVS} +MATH_TOKEN_TO_ENV = {env.token_begin: env for env in MATH_SIMPLE_ENVS} ARG_BEGIN_TO_ENV = {arg.token_begin: arg for arg in arg_type} SIGNATURES = { @@ -44,7 +47,7 @@ def read_tex(buf, skip_envs=(), tolerance=0): """ while buf.hasNext(): yield read_expr(buf, - skip_envs=SKIP_ENVS + skip_envs, + skip_envs=SKIP_ENV_NAMES + skip_envs, tolerance=tolerance) @@ -74,32 +77,36 @@ def wrapper(buf, *args, **kwargs): return wrapper -def read_expr(src, skip_envs=(), tolerance=0): +def read_expr(src, skip_envs=(), tolerance=0, mode=MODE_NON_MATH): r"""Read next expression from buffer :param Buffer src: a buffer of tokens :param Tuple[str] skip_envs: environments to skip parsing :param int tolerance: error tolerance level (only supports 0 or 1) + :param str mode: math or not math mode :return: parsed expression :rtype: [TexExpr, Token] """ c = next(src) if c.category in MATH_TOKEN_TO_ENV.keys(): expr = MATH_TOKEN_TO_ENV[c.category]([], position=c.position) - return read_math_env(src, expr) + return read_math_env(src, expr, tolerance=tolerance) elif c.category == TC.Escape: - name, args = read_command(src, tolerance=tolerance) + name, args = read_command(src, tolerance=tolerance, mode=mode) if name == 'item': + assert mode != MODE_MATH, 'Command \item invalid in math mode.' contents = read_item(src) expr = TexCmd(name, contents, args, position=c.position) elif name == 'begin': assert args, 'Begin command must be followed by an env name.' expr = TexNamedEnv( args[0].string, args=args[1:], position=c.position) + if expr.name in MATH_ENV_NAMES: + mode = MODE_MATH if expr.name in skip_envs: read_skip_env(src, expr) else: - read_env(src, expr, tolerance=tolerance) + read_env(src, expr, tolerance=tolerance, mode=mode) else: expr = TexCmd(name, args=args, position=c.position) return expr @@ -170,6 +177,7 @@ def unclosed_env_handler(src, expr, end): :param Buffer src: a buffer of tokens :param TexExpr expr: expression for the environment + :param int tolerance: error tolerance level (only supports 0 or 1) :param end str: Actual end token (as opposed to expected) """ clo = CharToLineOffset(str(src)) @@ -179,7 +187,7 @@ def unclosed_env_handler(src, expr, end): line, offset, expr.name, expr.end, explanation)) -def read_math_env(src, expr): +def read_math_env(src, expr, tolerance=0): r"""Read the environment from buffer. Advances the buffer until right after the end of the environment. Adds @@ -197,11 +205,13 @@ def read_math_env(src, expr): ... EOFError: [Line: 0, Offset: 7] "$" env expecting $. Reached end of file. """ - content = src.forward_until(lambda c: c.category == expr.token_end) + contents = [] + while src.hasNext() and src.peek().category != expr.token_end: + contents.append(read_expr(src, tolerance=tolerance, mode=MODE_MATH)) if not src.hasNext() or src.peek().category != expr.token_end: unclosed_env_handler(src, expr, src.peek()) next(src) - expr.append(content) + expr.append(*contents) return expr @@ -235,7 +245,7 @@ def condition(s): return s.startswith('\\end{%s}' % expr.name) return expr -def read_env(src, expr, tolerance=0): +def read_env(src, expr, tolerance=0, mode=MODE_NON_MATH): r"""Read the environment from buffer. Advances the buffer until right after the end of the environment. Adds @@ -244,6 +254,7 @@ def read_env(src, expr, tolerance=0): :param Buffer src: a buffer of tokens :param TexExpr expr: expression for the environment :param int tolerance: error tolerance level (only supports 0 or 1) + :param str mode: math or not math mode :rtype: TexExpr >>> from TexSoup.category import categorize @@ -264,10 +275,10 @@ def read_env(src, expr, tolerance=0): while src.hasNext(): if src.peek().category == TC.Escape: name, args = make_read_peek(read_command)( - src, 1, skip=1, tolerance=tolerance) + src, 1, skip=1, tolerance=tolerance, mode=mode) if name == 'end': break - contents.append(read_expr(src, tolerance=tolerance)) + contents.append(read_expr(src, tolerance=tolerance, mode=mode)) error = not src.hasNext() or not args or args[0].string != expr.name if error and tolerance == 0: unclosed_env_handler(src, expr, src.peek((0, 6))) @@ -284,7 +295,8 @@ def read_env(src, expr, tolerance=0): # TODO: handle macro-weirdness e.g., \def\blah[#1][[[[[[[[#2{"#1 . #2"} # TODO: add newcommand macro -def read_args(src, n_required=-1, n_optional=-1, args=None, tolerance=0): +def read_args(src, n_required=-1, n_optional=-1, args=None, tolerance=0, + mode=MODE_NON_MATH): r"""Read all arguments from buffer. This function assumes that the command name has already been parsed. By @@ -300,6 +312,7 @@ def read_args(src, n_required=-1, n_optional=-1, args=None, tolerance=0): :param int n_optional: Number of optional arguments. If < 0, all valid bracket groups will be captured. :param int tolerance: error tolerance level (only supports 0 or 1) + :param str mode: math or not math mode :return: parsed arguments :rtype: TexArgs @@ -325,17 +338,18 @@ def read_args(src, n_required=-1, n_optional=-1, args=None, tolerance=0): if n_required == 0 and n_optional == 0: return args - n_optional = read_arg_optional(src, args, n_optional, tolerance) - n_required = read_arg_required(src, args, n_required, tolerance) + n_optional = read_arg_optional(src, args, n_optional, tolerance, mode) + n_required = read_arg_required(src, args, n_required, tolerance, mode) if src.hasNext() and src.peek().category == TC.BracketBegin: - n_optional = read_arg_optional(src, args, n_optional, tolerance) + n_optional = read_arg_optional(src, args, n_optional, tolerance, mode) if src.hasNext() and src.peek().category == TC.GroupBegin: - n_required = read_arg_required(src, args, n_required, tolerance) + n_required = read_arg_required(src, args, n_required, tolerance, mode) return args -def read_arg_optional(src, args, n_optional=-1, tolerance=0): +def read_arg_optional( + src, args, n_optional=-1, tolerance=0, mode=MODE_NON_MATH): """Read next optional argument from buffer. If the command has remaining optional arguments, look for: @@ -349,6 +363,7 @@ def read_arg_optional(src, args, n_optional=-1, tolerance=0): :param int n_optional: Number of optional arguments. If < 0, all valid bracket groups will be captured. :param int tolerance: error tolerance level (only supports 0 or 1) + :param str mode: math or not math mode :return: number of remaining optional arguments :rtype: int """ @@ -358,12 +373,13 @@ def read_arg_optional(src, args, n_optional=-1, tolerance=0): if spacer: src.backward(1) break - args.append(read_arg(src, next(src), tolerance=tolerance)) + args.append(read_arg(src, next(src), tolerance=tolerance, mode=mode)) n_optional -= 1 return n_optional -def read_arg_required(src, args, n_required=-1, tolerance=0): +def read_arg_required( + src, args, n_required=-1, tolerance=0, mode=MODE_NON_MATH): r"""Read next required argument from buffer. If the command has remaining required arguments, look for: @@ -379,6 +395,7 @@ def read_arg_required(src, args, n_required=-1, tolerance=0): :param int n_required: Number of required arguments. If < 0, all valid brace groups will be captured. :param int tolerance: error tolerance level (only supports 0 or 1) + :param str mode: math or not math mode :return: number of remaining optional arguments :rtype: int @@ -397,7 +414,8 @@ def read_arg_required(src, args, n_required=-1, tolerance=0): spacer = read_spacer(src) if src.hasNext() and src.peek().category == TC.GroupBegin: - args.append(read_arg(src, next(src), tolerance=tolerance)) + args.append(read_arg( + src, next(src), tolerance=tolerance, mode=mode)) n_required -= 1 continue elif src.hasNext() and n_required > 0: @@ -411,7 +429,7 @@ def read_arg_required(src, args, n_required=-1, tolerance=0): return n_required -def read_arg(src, c, tolerance=0): +def read_arg(src, c, tolerance=0, mode=MODE_NON_MATH): r"""Read the argument from buffer. Advances buffer until right before the end of the argument. @@ -419,6 +437,7 @@ def read_arg(src, c, tolerance=0): :param Buffer src: a buffer of tokens :param str c: argument token (starting token) :param int tolerance: error tolerance level (only supports 0 or 1) + :param str mode: math or not math mode :return: the parsed argument :rtype: TexGroup @@ -439,7 +458,7 @@ def read_arg(src, c, tolerance=0): src.forward() return arg(*content[1:], position=c.position) else: - content.append(read_expr(src, tolerance=tolerance)) + content.append(read_expr(src, tolerance=tolerance, mode=mode)) if tolerance == 0: clo = CharToLineOffset(str(src)) @@ -478,7 +497,7 @@ def read_spacer(buf): def read_command(buf, n_required_args=-1, n_optional_args=-1, skip=0, - tolerance=0): + tolerance=0, mode=MODE_NON_MATH): r"""Parses command and all arguments. Assumes escape has just been parsed. No whitespace is allowed between escape and command name. e.g., @@ -505,7 +524,7 @@ def read_command(buf, n_required_args=-1, n_optional_args=-1, skip=0, ('item', []) >>> buf.peek() ' aaa ' - + # >>> buf = Buffer(tokenize(categorize('\\sect abcd'))) # >>> _ = next(buf) # >>> read_command(buf) @@ -519,5 +538,5 @@ def read_command(buf, n_required_args=-1, n_optional_args=-1, skip=0, if n_required_args < 0 and n_optional_args < 0: n_required_args, n_optional_args = SIGNATURES.get(name, (-1, -1)) args = read_args(buf, n_required_args, n_optional_args, - tolerance=tolerance) + tolerance=tolerance, mode=mode) return name, args diff --git a/TexSoup/tokens.py b/TexSoup/tokens.py index 749dd36..9578013 100644 --- a/TexSoup/tokens.py +++ b/TexSoup/tokens.py @@ -12,15 +12,17 @@ import string # Custom higher-level combinations of primitives -SKIP_ENVS = ('verbatim', 'equation', 'lstlisting', 'align', 'alignat', - 'equation*', 'align*', 'math', 'displaymath', 'split', 'array', - 'eqnarray', 'eqnarray*', 'multline', 'multline*', 'gather', - 'gather*', 'flalign', 'flalign*', - '$', '$$', r'\[', r'\]', r'\(', r'\)') -BRACKETS_DELIMITERS = {'(', ')', '<', '>', '[', ']', '{', '}', - r'\{', r'\}', '.' '|', r'\langle', r'\rangle', - r'\lfloor', '\rfloor', r'\lceil', r'\rceil', - r'\ulcorner', r'\urcorner', r'\lbrack', r'\rbrack'} +SKIP_ENV_NAMES = ('lstlisting', 'verbatim') +MATH_ENV_NAMES = ( + 'align', 'align*', 'alignat', 'array', 'displaymath', 'eqnarray', + 'eqnarray*', 'equation', 'equation*', 'flalign', 'flalign*', 'gather', + 'gather*', 'math', 'multline', 'multline*', 'split' +) +BRACKETS_DELIMITERS = { + '(', ')', '<', '>', '[', ']', '{', '}', r'\{', r'\}', '.' '|', r'\langle', + r'\rangle', r'\lfloor', '\rfloor', r'\lceil', r'\rceil', r'\ulcorner', + r'\urcorner', r'\lbrack', r'\rbrack' +} # TODO: looks like left-right do have to match SIZE_PREFIX = ('left', 'right', 'big', 'Big', 'bigg', 'Bigg') PUNCTUATION_COMMANDS = {command + bracket diff --git a/tests/test_api.py b/tests/test_api.py index 11e7ff7..c7f9c40 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -186,6 +186,21 @@ def test_access_position(chikin): assert clo(chikin.section.position) == (4, 0) +def test_math_env_change(): + """Tests that commands in math environments can be found / modified""" + soup = TexSoup(r'\begin{align}\infer{A}{B}\infer{C}{D}\end{align}') + assert soup.infer is not None, repr(soup.expr) + for infer in soup.find_all('infer'): + infer.args = infer.args[::-1] + assert str(soup) == r'\begin{align}\infer{B}{A}\infer{D}{C}\end{align}' + + soup = TexSoup(r'$$\infer{A}{B}\infer{C}{D}$$') + assert soup.infer is not None, repr(soup.expr) + for infer in soup.find_all('infer'): + infer.args = infer.args[::-1] + assert str(soup) == r'$$\infer{B}{A}\infer{D}{C}$$' + + ######### # TEXT # ######## diff --git a/tests/test_parser.py b/tests/test_parser.py index f1950c3..b1334eb 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -410,11 +410,11 @@ def test_non_letter_commands(): (whether valid or not). """ for punctuation in '!@#$%^&*_+-=~`<>,./?;:|': - tex = rf""" + tex = r""" \begin{{document}} - \lstinline{{\{punctuation} Word [a-z]+}} + \lstinline{{\{} Word [a-z]+}} \end{{document}} - """ + """.format(punctuation) soup = TexSoup(tex) assert str(soup) == tex