From 7e84b791d1b751aece8ecf6c541a893712c1cc91 Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Mon, 21 Aug 2023 17:19:23 -0400 Subject: [PATCH] Add easy to use preprocessor support via pcpp - Fixes #60 --- .github/workflows/dist.yml | 2 +- README.md | 1 + cxxheaderparser/dump.py | 11 +++- cxxheaderparser/options.py | 8 +++ cxxheaderparser/parser.py | 6 +- cxxheaderparser/preprocessor.py | 98 +++++++++++++++++++++++++++++++++ docs/custom.rst | 9 ++- docs/requirements.txt | 3 +- mypy.ini | 3 + setup.py | 1 + 10 files changed, 136 insertions(+), 6 deletions(-) create mode 100644 cxxheaderparser/preprocessor.py diff --git a/.github/workflows/dist.yml b/.github/workflows/dist.yml index 38114b3..9db0a53 100644 --- a/.github/workflows/dist.yml +++ b/.github/workflows/dist.yml @@ -27,7 +27,7 @@ jobs: python-version: 3.8 - name: Install requirements run: | - pip --disable-pip-version-check install mypy pytest + pip --disable-pip-version-check install mypy pytest pcpp - name: Run mypy run: | mypy . diff --git a/README.md b/README.md index 2ca5bad..e764b87 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,7 @@ Non-goals: headers that contain macros, you should preprocess your code using the excellent pure python preprocessor [pcpp](https://github.com/ned14/pcpp) or your favorite compiler + * See `cxxheaderparser.preprocessor` for how to use * Probably won't be able to parse most IOCCC entries There are two APIs available: diff --git a/cxxheaderparser/dump.py b/cxxheaderparser/dump.py index 6a40719..d4da431 100644 --- a/cxxheaderparser/dump.py +++ b/cxxheaderparser/dump.py @@ -23,10 +23,19 @@ def dumpmain() -> None: parser.add_argument( "--mode", choices=["json", "pprint", "repr", "brepr"], default="pprint" ) + parser.add_argument( + "--pcpp", default=False, action="store_true", help="Use pcpp preprocessor" + ) args = parser.parse_args() - options = ParserOptions(verbose=args.verbose) + preprocessor = None + if args.pcpp: + from .preprocessor import make_pcpp_preprocessor + + preprocessor = make_pcpp_preprocessor() + + options = ParserOptions(verbose=args.verbose, preprocessor=preprocessor) data = parse_file(args.header, options=options) if args.mode == "pprint": diff --git a/cxxheaderparser/options.py b/cxxheaderparser/options.py index b9ecb82..0b5898a 100644 --- a/cxxheaderparser/options.py +++ b/cxxheaderparser/options.py @@ -1,4 +1,8 @@ from dataclasses import dataclass +from typing import Callable, Optional + +#: arguments are (filename, content) +PreprocessorFunction = Callable[[str, str], str] @dataclass @@ -12,3 +16,7 @@ class ParserOptions: #: If true, converts a single void parameter to zero parameters convert_void_to_zero_params: bool = True + + #: A function that will preprocess the header before parsing. See + #: cxxheaderparser.preprocessor for available preprocessors + preprocessor: Optional[PreprocessorFunction] = None diff --git a/cxxheaderparser/parser.py b/cxxheaderparser/parser.py index ed5a99a..1be74c4 100644 --- a/cxxheaderparser/parser.py +++ b/cxxheaderparser/parser.py @@ -81,6 +81,10 @@ def __init__( ) -> None: self.visitor = visitor self.filename = filename + self.options = options if options else ParserOptions() + + if options and options.preprocessor is not None: + content = options.preprocessor(filename, content) self.lex: lexer.TokenStream = lexer.LexerTokenStream(filename, content) @@ -90,8 +94,6 @@ def __init__( self.state: State = NamespaceBlockState(None, global_ns) self.anon_id = 0 - self.options = options if options else ParserOptions() - self.verbose = True if self.options.verbose else False if self.verbose: diff --git a/cxxheaderparser/preprocessor.py b/cxxheaderparser/preprocessor.py new file mode 100644 index 0000000..2ce8fb2 --- /dev/null +++ b/cxxheaderparser/preprocessor.py @@ -0,0 +1,98 @@ +""" +Contains optional preprocessor support via pcpp +""" + +import io +from os.path import relpath +import typing +from .options import PreprocessorFunction + +from pcpp import Preprocessor, OutputDirective, Action + + +class PreprocessorError(Exception): + pass + + +class _CustomPreprocessor(Preprocessor): + def __init__(self): + Preprocessor.__init__(self) + self.errors = [] + + def on_error(self, file, line, msg): + self.errors.append(f"{file}:{line} error: {msg}") + + def on_include_not_found(self, *ignored): + raise OutputDirective(Action.IgnoreAndPassThrough) + + def on_comment(self, *ignored): + return True + + +def _filter_self(fname: str, fp: typing.TextIO) -> str: + # the output of pcpp includes the contents of all the included files, which + # isn't what a typical user of cxxheaderparser would want, so we strip out + # the line directives and any content that isn't in our original file + + # Compute the filename to match based on how pcpp does it + try: + relfname = relpath(fname) + except Exception: + relfname = fname + relfname = relfname.replace("\\", "/") + + relfname += '"\n' + + new_output = io.StringIO() + keep = True + + for line in fp: + if line.startswith("#line"): + keep = line.endswith(relfname) + + if keep: + new_output.write(line) + + new_output.seek(0) + return new_output.read() + + +def make_pcpp_preprocessor( + *, + defines: typing.List[str] = [], + include_paths: typing.List[str] = [], + retain_all_content: bool = False, +) -> PreprocessorFunction: + """ + Creates a preprocessor function that uses pcpp (which must be installed + separately) to preprocess the input text + """ + + def _preprocess_file(filename: str, content: str) -> str: + pp = _CustomPreprocessor() + if include_paths: + for p in include_paths: + pp.add_path(p) + + for define in defines: + pp.define(define) + + if not retain_all_content: + pp.line_directive = "#line" + + pp.parse(content, filename) + + if pp.errors: + raise PreprocessorError("\n".join(pp.errors)) + elif pp.return_code: + raise PreprocessorError("failed with exit code %d" % pp.return_code) + + fp = io.StringIO() + pp.write(fp) + fp.seek(0) + if retain_all_content: + return fp.read() + else: + return _filter_self(filename, fp) + + return _preprocess_file diff --git a/docs/custom.rst b/docs/custom.rst index 5f8328d..ef65238 100644 --- a/docs/custom.rst +++ b/docs/custom.rst @@ -34,4 +34,11 @@ Parser state .. automodule:: cxxheaderparser.parserstate :members: - :undoc-members: \ No newline at end of file + :undoc-members: + +Preprocessor +------------ + +.. automodule:: cxxheaderparser.preprocessor + :members: + :undoc-members: diff --git a/docs/requirements.txt b/docs/requirements.txt index b7e5c45..87090ed 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,3 +1,4 @@ sphinx >= 3.0 sphinx-rtd-theme -sphinx-autodoc-typehints \ No newline at end of file +sphinx-autodoc-typehints +pcpp \ No newline at end of file diff --git a/mypy.ini b/mypy.ini index 6a3cd39..3994d8f 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,5 +1,8 @@ [mypy] exclude = setup\.py|docs +[mypy-pcpp.*] +ignore_missing_imports = True + [mypy-cxxheaderparser._ply.*] ignore_errors = True \ No newline at end of file diff --git a/setup.py b/setup.py index 288746a..57cdb02 100644 --- a/setup.py +++ b/setup.py @@ -68,6 +68,7 @@ long_description=open("README.md").read(), long_description_content_type="text/markdown", install_requires=["dataclasses; python_version < '3.7'"], + extras_require={"pcpp": ["pcpp~=1.30"]}, license="BSD", platforms="Platform Independent", packages=find_packages(),