diff --git a/cxxheaderparser/lexer.py b/cxxheaderparser/lexer.py index af7769f..341ef76 100644 --- a/cxxheaderparser/lexer.py +++ b/cxxheaderparser/lexer.py @@ -17,7 +17,7 @@ class LexError(CxxParseError): else: Protocol = object -_line_re = re.compile(r'^\#[\t ]*line (\d+) "(.*)"') +_line_re = re.compile(r'^\#[\t ]*(line)? (\d+) "(.*)"') _multicomment_re = re.compile("\n[\\s]+\\*") @@ -448,8 +448,8 @@ def t_PP_DIRECTIVE(self, t: LexToken): # handle line macros m = _line_re.match(t.value) if m: - self.filename = m.group(2) - self.line_offset = 1 + self.lex.lineno - int(m.group(1)) + self.filename = m.group(3) + self.line_offset = 1 + self.lex.lineno - int(m.group(2)) return None # ignore C++23 warning directive if t.value.startswith("#warning"): diff --git a/cxxheaderparser/preprocessor.py b/cxxheaderparser/preprocessor.py index e1c720c..fff0b0e 100644 --- a/cxxheaderparser/preprocessor.py +++ b/cxxheaderparser/preprocessor.py @@ -1,15 +1,108 @@ """ -Contains optional preprocessor support via pcpp +Contains optional preprocessor support functions """ import io import re import os +import subprocess +import sys import typing + from .options import PreprocessorFunction from pcpp import Preprocessor, OutputDirective, Action +# +# GCC preprocessor support +# + + +def _gcc_filter(fname: str, fp: typing.TextIO) -> str: + new_output = io.StringIO() + keep = True + fname = fname.replace("\\", "\\\\") + + for line in fp: + if line.startswith("# "): + last_quote = line.rfind('"') + if last_quote != -1: + keep = line[:last_quote].endswith(fname) + + if keep: + new_output.write(line) + + new_output.seek(0) + return new_output.read() + + +def make_gcc_preprocessor( + *, + defines: typing.List[str] = [], + include_paths: typing.List[str] = [], + retain_all_content: bool = False, + encoding: typing.Optional[str] = None, + gcc_args: typing.List[str] = ["g++"], + print_cmd: bool = True, +) -> PreprocessorFunction: + """ + Creates a preprocessor function that uses g++ to preprocess the input text. + + gcc is a high performance and accurate precompiler, but if an #include + directive can't be resolved or other oddity exists in your input it will + throw an error. + + :param defines: list of #define macros specified as "key value" + :param include_paths: list of directories to search for included files + :param retain_all_content: If False, only the parsed file content will be retained + :param encoding: If specified any include files are opened with this encoding + :param gcc_args: This is the path to G++ and any extra args you might want + :param print_cmd: Prints the gcc command as its executed + + .. code-block:: python + + pp = make_gcc_preprocessor() + options = ParserOptions(preprocessor=pp) + + parse_file(content, options=options) + + """ + + if not encoding: + encoding = "utf-8" + + def _preprocess_file(filename: str, content: str) -> str: + cmd = gcc_args + ["-w", "-E", "-C"] + + for p in include_paths: + cmd.append(f"-I{p}") + for d in defines: + cmd.append(f"-D{d.replace(' ', '=')}") + + kwargs = {"encoding": encoding} + if filename == "": + cmd.append("-") + filename = "" + kwargs["input"] = content + else: + cmd.append(filename) + + if print_cmd: + print("+", " ".join(cmd), file=sys.stderr) + + result: str = subprocess.check_output(cmd, **kwargs) # type: ignore + if not retain_all_content: + result = _gcc_filter(filename, io.StringIO(result)) + + return result + + return _preprocess_file + + +# +# PCPP preprocessor support +# + class PreprocessorError(Exception): pass @@ -36,7 +129,7 @@ def on_comment(self, *ignored): return True -def _filter_self(fname: str, fp: typing.TextIO) -> str: +def _pcpp_filter(fname: str, fp: typing.TextIO) -> str: # the output of pcpp includes the contents of all the included files, which # isn't what a typical user of cxxheaderparser would want, so we strip out # the line directives and any content that isn't in our original file @@ -69,6 +162,13 @@ def make_pcpp_preprocessor( Creates a preprocessor function that uses pcpp (which must be installed separately) to preprocess the input text. + If missing #include files are encountered, this preprocessor will ignore the + error. This preprocessor is pure python so it's very portable, and is a good + choice if performance isn't critical. + + :param defines: list of #define macros specified as "key value" + :param include_paths: list of directories to search for included files + :param retain_all_content: If False, only the parsed file content will be retained :param encoding: If specified any include files are opened with this encoding :param passthru_includes: If specified any #include directives that match the compiled regex pattern will be part of the output. @@ -119,6 +219,6 @@ def _preprocess_file(filename: str, content: str) -> str: filename = filename.replace(os.sep, "/") break - return _filter_self(filename, fp) + return _pcpp_filter(filename, fp) return _preprocess_file diff --git a/tests/test_preprocessor.py b/tests/test_preprocessor.py index 2e0e591..9659216 100644 --- a/tests/test_preprocessor.py +++ b/tests/test_preprocessor.py @@ -1,9 +1,13 @@ import os import pathlib +import pytest import re +import shutil +import subprocess +import typing -from cxxheaderparser.options import ParserOptions -from cxxheaderparser.preprocessor import make_pcpp_preprocessor +from cxxheaderparser.options import ParserOptions, PreprocessorFunction +from cxxheaderparser import preprocessor from cxxheaderparser.simple import ( NamespaceScope, ParsedData, @@ -22,12 +26,31 @@ ) -def test_basic_preprocessor() -> None: +@pytest.fixture(params=["gcc", "pcpp"]) +def make_pp(request) -> typing.Callable[..., PreprocessorFunction]: + param = request.param + if param == "gcc": + gcc_path = shutil.which("g++") + if not gcc_path: + pytest.skip("g++ not found") + + subprocess.run([gcc_path, "--version"]) + return preprocessor.make_gcc_preprocessor + elif param == "pcpp": + return preprocessor.make_pcpp_preprocessor + else: + assert False + + +def test_basic_preprocessor( + make_pp: typing.Callable[..., PreprocessorFunction] +) -> None: content = """ #define X 1 int x = X; """ - options = ParserOptions(preprocessor=make_pcpp_preprocessor()) + + options = ParserOptions(preprocessor=make_pp()) data = parse_string(content, cleandoc=True, options=options) assert data == ParsedData( @@ -45,7 +68,10 @@ def test_basic_preprocessor() -> None: ) -def test_preprocessor_omit_content(tmp_path: pathlib.Path) -> None: +def test_preprocessor_omit_content( + make_pp: typing.Callable[..., PreprocessorFunction], + tmp_path: pathlib.Path, +) -> None: """Ensure that content in other headers is omitted""" h_content = '#include "t2.h"' "\n" "int x = X;\n" h2_content = "#define X 2\n" "int omitted = 1;\n" @@ -56,7 +82,7 @@ def test_preprocessor_omit_content(tmp_path: pathlib.Path) -> None: with open(tmp_path / "t2.h", "w") as fp: fp.write(h2_content) - options = ParserOptions(preprocessor=make_pcpp_preprocessor()) + options = ParserOptions(preprocessor=make_pp()) data = parse_file(tmp_path / "t1.h", options=options) assert data == ParsedData( @@ -74,7 +100,10 @@ def test_preprocessor_omit_content(tmp_path: pathlib.Path) -> None: ) -def test_preprocessor_omit_content2(tmp_path: pathlib.Path) -> None: +def test_preprocessor_omit_content2( + make_pp: typing.Callable[..., PreprocessorFunction], + tmp_path: pathlib.Path, +) -> None: """ Ensure that content in other headers is omitted while handling pcpp relative path quirk @@ -91,9 +120,7 @@ def test_preprocessor_omit_content2(tmp_path: pathlib.Path) -> None: with open(tmp_path2 / "t2.h", "w") as fp: fp.write(h2_content) - options = ParserOptions( - preprocessor=make_pcpp_preprocessor(include_paths=[str(tmp_path)]) - ) + options = ParserOptions(preprocessor=make_pp(include_paths=[str(tmp_path)])) # Weirdness happens here os.chdir(tmp_path) @@ -114,7 +141,9 @@ def test_preprocessor_omit_content2(tmp_path: pathlib.Path) -> None: ) -def test_preprocessor_encoding(tmp_path: pathlib.Path) -> None: +def test_preprocessor_encoding( + make_pp: typing.Callable[..., PreprocessorFunction], tmp_path: pathlib.Path +) -> None: """Ensure we can handle alternate encodings""" h_content = b"// \xa9 2023 someone\n" b'#include "t2.h"' b"\n" b"int x = X;\n" @@ -126,7 +155,7 @@ def test_preprocessor_encoding(tmp_path: pathlib.Path) -> None: with open(tmp_path / "t2.h", "wb") as fp: fp.write(h2_content) - options = ParserOptions(preprocessor=make_pcpp_preprocessor(encoding="cp1252")) + options = ParserOptions(preprocessor=make_pp(encoding="cp1252")) data = parse_file(tmp_path / "t1.h", options=options, encoding="cp1252") assert data == ParsedData( @@ -155,7 +184,9 @@ def test_preprocessor_passthru_includes(tmp_path: pathlib.Path) -> None: fp.write("") options = ParserOptions( - preprocessor=make_pcpp_preprocessor(passthru_includes=re.compile(".+")) + preprocessor=preprocessor.make_pcpp_preprocessor( + passthru_includes=re.compile(".+") + ) ) data = parse_file(tmp_path / "t1.h", options=options)