From 8f9e8626af75bd252a125a6ca4a636b045a014e1 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Sun, 8 Oct 2023 01:00:55 -0400 Subject: [PATCH] Add GCC compatible preprocessing function --- cxxheaderparser/lexer.py | 6 +- cxxheaderparser/preprocessor.py | 107 +++++++++++++++++++++++++++++++- tests/test_preprocessor.py | 11 +++- 3 files changed, 116 insertions(+), 8 deletions(-) diff --git a/cxxheaderparser/lexer.py b/cxxheaderparser/lexer.py index af7769f..341ef76 100644 --- a/cxxheaderparser/lexer.py +++ b/cxxheaderparser/lexer.py @@ -17,7 +17,7 @@ class LexError(CxxParseError): else: Protocol = object -_line_re = re.compile(r'^\#[\t ]*line (\d+) "(.*)"') +_line_re = re.compile(r'^\#[\t ]*(line)? (\d+) "(.*)"') _multicomment_re = re.compile("\n[\\s]+\\*") @@ -448,8 +448,8 @@ def t_PP_DIRECTIVE(self, t: LexToken): # handle line macros m = _line_re.match(t.value) if m: - self.filename = m.group(2) - self.line_offset = 1 + self.lex.lineno - int(m.group(1)) + self.filename = m.group(3) + self.line_offset = 1 + self.lex.lineno - int(m.group(2)) return None # ignore C++23 warning directive if t.value.startswith("#warning"): diff --git a/cxxheaderparser/preprocessor.py b/cxxheaderparser/preprocessor.py index 30f0fbc..1e5719b 100644 --- a/cxxheaderparser/preprocessor.py +++ b/cxxheaderparser/preprocessor.py @@ -1,11 +1,14 @@ """ -Contains optional preprocessor support via pcpp +Contains optional preprocessor support functions """ import io import re import os +import subprocess +import sys import typing + from .options import PreprocessorFunction @@ -13,6 +16,97 @@ class PreprocessorError(Exception): pass +# +# GCC preprocessor support +# + + +def _gcc_filter(fname: str, fp: typing.TextIO) -> str: + new_output = io.StringIO() + keep = True + fname = fname.replace("\\", "\\\\") + + for line in fp: + if line.startswith("# "): + last_quote = line.rfind('"') + if last_quote != -1: + keep = line[:last_quote].endswith(fname) + + if keep: + new_output.write(line) + + new_output.seek(0) + return new_output.read() + + +def make_gcc_preprocessor( + *, + defines: typing.List[str] = [], + include_paths: typing.List[str] = [], + retain_all_content: bool = False, + encoding: typing.Optional[str] = None, + gcc_args: typing.List[str] = ["g++"], + print_cmd: bool = True, +) -> PreprocessorFunction: + """ + Creates a preprocessor function that uses g++ to preprocess the input text. + + gcc is a high performance and accurate precompiler, but if an #include + directive can't be resolved or other oddity exists in your input it will + throw an error. + + :param defines: list of #define macros specified as "key value" + :param include_paths: list of directories to search for included files + :param retain_all_content: If False, only the parsed file content will be retained + :param encoding: If specified any include files are opened with this encoding + :param gcc_args: This is the path to G++ and any extra args you might want + :param print_cmd: Prints the gcc command as its executed + + .. code-block:: python + + pp = make_gcc_preprocessor() + options = ParserOptions(preprocessor=pp) + + parse_file(content, options=options) + + """ + + if not encoding: + encoding = "utf-8" + + def _preprocess_file(filename: str, content: str) -> str: + cmd = gcc_args + ["-w", "-E", "-C"] + + for p in include_paths: + cmd.append(f"-I{p}") + for d in defines: + cmd.append(f"-D{d.replace(' ', '=')}") + + kwargs = {"encoding": encoding} + if filename == "": + cmd.append("-") + filename = "" + kwargs["input"] = content + else: + cmd.append(filename) + + if print_cmd: + print("+", " ".join(cmd), file=sys.stderr) + + result: str = subprocess.check_output(cmd, **kwargs) # type: ignore + if not retain_all_content: + result = _gcc_filter(filename, io.StringIO(result)) + + return result + + return _preprocess_file + + +# +# PCPP preprocessor support (not installed by default) +# + + try: import pcpp from pcpp import Preprocessor, OutputDirective, Action @@ -41,7 +135,7 @@ def on_comment(self, *ignored): pcpp = None -def _filter_self(fname: str, fp: typing.TextIO) -> str: +def _pcpp_filter(fname: str, fp: typing.TextIO) -> str: # the output of pcpp includes the contents of all the included files, which # isn't what a typical user of cxxheaderparser would want, so we strip out # the line directives and any content that isn't in our original file @@ -74,6 +168,13 @@ def make_pcpp_preprocessor( Creates a preprocessor function that uses pcpp (which must be installed separately) to preprocess the input text. + If missing #include files are encountered, this preprocessor will ignore the + error. This preprocessor is pure python so it's very portable, and is a good + choice if performance isn't critical. + + :param defines: list of #define macros specified as "key value" + :param include_paths: list of directories to search for included files + :param retain_all_content: If False, only the parsed file content will be retained :param encoding: If specified any include files are opened with this encoding :param passthru_includes: If specified any #include directives that match the compiled regex pattern will be part of the output. @@ -127,6 +228,6 @@ def _preprocess_file(filename: str, content: str) -> str: filename = filename.replace(os.sep, "/") break - return _filter_self(filename, fp) + return _pcpp_filter(filename, fp) return _preprocess_file diff --git a/tests/test_preprocessor.py b/tests/test_preprocessor.py index f7d775b..e54f86e 100644 --- a/tests/test_preprocessor.py +++ b/tests/test_preprocessor.py @@ -26,10 +26,17 @@ ) -@pytest.fixture(params=["pcpp"]) +@pytest.fixture(params=["gcc", "pcpp"]) def make_pp(request) -> typing.Callable[..., PreprocessorFunction]: param = request.param - if param == "pcpp": + if param == "gcc": + gcc_path = shutil.which("g++") + if not gcc_path: + pytest.skip("g++ not found") + + subprocess.run([gcc_path, "--version"]) + return preprocessor.make_gcc_preprocessor + elif param == "pcpp": if preprocessor.pcpp is None: pytest.skip("pcpp not installed") return preprocessor.make_pcpp_preprocessor