Skip to content

Commit

Permalink
Merge pull request #77 from robotpy/gcc-preprocessor
Browse files Browse the repository at this point in the history
Add GCC compatible preprocessing function
  • Loading branch information
virtuald authored Oct 8, 2023
2 parents 2a17b27 + 8f9e862 commit d94df61
Show file tree
Hide file tree
Showing 3 changed files with 180 additions and 37 deletions.
6 changes: 3 additions & 3 deletions cxxheaderparser/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class LexError(CxxParseError):
else:
Protocol = object

_line_re = re.compile(r'^\#[\t ]*line (\d+) "(.*)"')
_line_re = re.compile(r'^\#[\t ]*(line)? (\d+) "(.*)"')
_multicomment_re = re.compile("\n[\\s]+\\*")


Expand Down Expand Up @@ -448,8 +448,8 @@ def t_PP_DIRECTIVE(self, t: LexToken):
# handle line macros
m = _line_re.match(t.value)
if m:
self.filename = m.group(2)
self.line_offset = 1 + self.lex.lineno - int(m.group(1))
self.filename = m.group(3)
self.line_offset = 1 + self.lex.lineno - int(m.group(2))
return None
# ignore C++23 warning directive
if t.value.startswith("#warning"):
Expand Down
151 changes: 130 additions & 21 deletions cxxheaderparser/preprocessor.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,141 @@
"""
Contains optional preprocessor support via pcpp
Contains optional preprocessor support functions
"""

import io
import re
import os
import subprocess
import sys
import typing
from .options import PreprocessorFunction

from pcpp import Preprocessor, OutputDirective, Action
from .options import PreprocessorFunction


class PreprocessorError(Exception):
pass


class _CustomPreprocessor(Preprocessor):
def __init__(
self,
encoding: typing.Optional[str],
passthru_includes: typing.Optional["re.Pattern"],
):
Preprocessor.__init__(self)
self.errors: typing.List[str] = []
self.assume_encoding = encoding
self.passthru_includes = passthru_includes
#
# GCC preprocessor support
#


def _gcc_filter(fname: str, fp: typing.TextIO) -> str:
new_output = io.StringIO()
keep = True
fname = fname.replace("\\", "\\\\")

for line in fp:
if line.startswith("# "):
last_quote = line.rfind('"')
if last_quote != -1:
keep = line[:last_quote].endswith(fname)

if keep:
new_output.write(line)

new_output.seek(0)
return new_output.read()


def make_gcc_preprocessor(
*,
defines: typing.List[str] = [],
include_paths: typing.List[str] = [],
retain_all_content: bool = False,
encoding: typing.Optional[str] = None,
gcc_args: typing.List[str] = ["g++"],
print_cmd: bool = True,
) -> PreprocessorFunction:
"""
Creates a preprocessor function that uses g++ to preprocess the input text.
gcc is a high performance and accurate precompiler, but if an #include
directive can't be resolved or other oddity exists in your input it will
throw an error.
:param defines: list of #define macros specified as "key value"
:param include_paths: list of directories to search for included files
:param retain_all_content: If False, only the parsed file content will be retained
:param encoding: If specified any include files are opened with this encoding
:param gcc_args: This is the path to G++ and any extra args you might want
:param print_cmd: Prints the gcc command as its executed
.. code-block:: python
pp = make_gcc_preprocessor()
options = ParserOptions(preprocessor=pp)
parse_file(content, options=options)
"""

if not encoding:
encoding = "utf-8"

def _preprocess_file(filename: str, content: str) -> str:
cmd = gcc_args + ["-w", "-E", "-C"]

for p in include_paths:
cmd.append(f"-I{p}")
for d in defines:
cmd.append(f"-D{d.replace(' ', '=')}")

kwargs = {"encoding": encoding}
if filename == "<str>":
cmd.append("-")
filename = "<stdin>"
kwargs["input"] = content
else:
cmd.append(filename)

if print_cmd:
print("+", " ".join(cmd), file=sys.stderr)

result: str = subprocess.check_output(cmd, **kwargs) # type: ignore
if not retain_all_content:
result = _gcc_filter(filename, io.StringIO(result))

return result

return _preprocess_file


#
# PCPP preprocessor support (not installed by default)
#

def on_error(self, file, line, msg):
self.errors.append(f"{file}:{line} error: {msg}")

def on_include_not_found(self, *ignored):
raise OutputDirective(Action.IgnoreAndPassThrough)
try:
import pcpp
from pcpp import Preprocessor, OutputDirective, Action

def on_comment(self, *ignored):
return True
class _CustomPreprocessor(Preprocessor):
def __init__(
self,
encoding: typing.Optional[str],
passthru_includes: typing.Optional["re.Pattern"],
):
Preprocessor.__init__(self)
self.errors: typing.List[str] = []
self.assume_encoding = encoding
self.passthru_includes = passthru_includes

def on_error(self, file, line, msg):
self.errors.append(f"{file}:{line} error: {msg}")

def _filter_self(fname: str, fp: typing.TextIO) -> str:
def on_include_not_found(self, *ignored):
raise OutputDirective(Action.IgnoreAndPassThrough)

def on_comment(self, *ignored):
return True

except ImportError:
pcpp = None


def _pcpp_filter(fname: str, fp: typing.TextIO) -> str:
# the output of pcpp includes the contents of all the included files, which
# isn't what a typical user of cxxheaderparser would want, so we strip out
# the line directives and any content that isn't in our original file
Expand Down Expand Up @@ -69,6 +168,13 @@ def make_pcpp_preprocessor(
Creates a preprocessor function that uses pcpp (which must be installed
separately) to preprocess the input text.
If missing #include files are encountered, this preprocessor will ignore the
error. This preprocessor is pure python so it's very portable, and is a good
choice if performance isn't critical.
:param defines: list of #define macros specified as "key value"
:param include_paths: list of directories to search for included files
:param retain_all_content: If False, only the parsed file content will be retained
:param encoding: If specified any include files are opened with this encoding
:param passthru_includes: If specified any #include directives that match the
compiled regex pattern will be part of the output.
Expand All @@ -82,6 +188,9 @@ def make_pcpp_preprocessor(
"""

if pcpp is None:
raise PreprocessorError("pcpp is not installed")

def _preprocess_file(filename: str, content: str) -> str:
pp = _CustomPreprocessor(encoding, passthru_includes)
if include_paths:
Expand Down Expand Up @@ -119,6 +228,6 @@ def _preprocess_file(filename: str, content: str) -> str:
filename = filename.replace(os.sep, "/")
break

return _filter_self(filename, fp)
return _pcpp_filter(filename, fp)

return _preprocess_file
60 changes: 47 additions & 13 deletions tests/test_preprocessor.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import os
import pathlib
import pytest
import re
import shutil
import subprocess
import typing

from cxxheaderparser.options import ParserOptions
from cxxheaderparser.preprocessor import make_pcpp_preprocessor
from cxxheaderparser.options import ParserOptions, PreprocessorFunction
from cxxheaderparser import preprocessor
from cxxheaderparser.simple import (
NamespaceScope,
ParsedData,
Expand All @@ -22,12 +26,33 @@
)


def test_basic_preprocessor() -> None:
@pytest.fixture(params=["gcc", "pcpp"])
def make_pp(request) -> typing.Callable[..., PreprocessorFunction]:
param = request.param
if param == "gcc":
gcc_path = shutil.which("g++")
if not gcc_path:
pytest.skip("g++ not found")

subprocess.run([gcc_path, "--version"])
return preprocessor.make_gcc_preprocessor
elif param == "pcpp":
if preprocessor.pcpp is None:
pytest.skip("pcpp not installed")
return preprocessor.make_pcpp_preprocessor
else:
assert False


def test_basic_preprocessor(
make_pp: typing.Callable[..., PreprocessorFunction]
) -> None:
content = """
#define X 1
int x = X;
"""
options = ParserOptions(preprocessor=make_pcpp_preprocessor())

options = ParserOptions(preprocessor=make_pp())
data = parse_string(content, cleandoc=True, options=options)

assert data == ParsedData(
Expand All @@ -45,7 +70,10 @@ def test_basic_preprocessor() -> None:
)


def test_preprocessor_omit_content(tmp_path: pathlib.Path) -> None:
def test_preprocessor_omit_content(
make_pp: typing.Callable[..., PreprocessorFunction],
tmp_path: pathlib.Path,
) -> None:
"""Ensure that content in other headers is omitted"""
h_content = '#include "t2.h"' "\n" "int x = X;\n"
h2_content = "#define X 2\n" "int omitted = 1;\n"
Expand All @@ -56,7 +84,7 @@ def test_preprocessor_omit_content(tmp_path: pathlib.Path) -> None:
with open(tmp_path / "t2.h", "w") as fp:
fp.write(h2_content)

options = ParserOptions(preprocessor=make_pcpp_preprocessor())
options = ParserOptions(preprocessor=make_pp())
data = parse_file(tmp_path / "t1.h", options=options)

assert data == ParsedData(
Expand All @@ -74,7 +102,10 @@ def test_preprocessor_omit_content(tmp_path: pathlib.Path) -> None:
)


def test_preprocessor_omit_content2(tmp_path: pathlib.Path) -> None:
def test_preprocessor_omit_content2(
make_pp: typing.Callable[..., PreprocessorFunction],
tmp_path: pathlib.Path,
) -> None:
"""
Ensure that content in other headers is omitted while handling pcpp
relative path quirk
Expand All @@ -91,9 +122,7 @@ def test_preprocessor_omit_content2(tmp_path: pathlib.Path) -> None:
with open(tmp_path2 / "t2.h", "w") as fp:
fp.write(h2_content)

options = ParserOptions(
preprocessor=make_pcpp_preprocessor(include_paths=[str(tmp_path)])
)
options = ParserOptions(preprocessor=make_pp(include_paths=[str(tmp_path)]))

# Weirdness happens here
os.chdir(tmp_path)
Expand All @@ -114,7 +143,9 @@ def test_preprocessor_omit_content2(tmp_path: pathlib.Path) -> None:
)


def test_preprocessor_encoding(tmp_path: pathlib.Path) -> None:
def test_preprocessor_encoding(
make_pp: typing.Callable[..., PreprocessorFunction], tmp_path: pathlib.Path
) -> None:
"""Ensure we can handle alternate encodings"""
h_content = b"// \xa9 2023 someone\n" b'#include "t2.h"' b"\n" b"int x = X;\n"

Expand All @@ -126,7 +157,7 @@ def test_preprocessor_encoding(tmp_path: pathlib.Path) -> None:
with open(tmp_path / "t2.h", "wb") as fp:
fp.write(h2_content)

options = ParserOptions(preprocessor=make_pcpp_preprocessor(encoding="cp1252"))
options = ParserOptions(preprocessor=make_pp(encoding="cp1252"))
data = parse_file(tmp_path / "t1.h", options=options, encoding="cp1252")

assert data == ParsedData(
Expand All @@ -144,6 +175,7 @@ def test_preprocessor_encoding(tmp_path: pathlib.Path) -> None:
)


@pytest.mark.skipif(preprocessor.pcpp is None, reason="pcpp not installed")
def test_preprocessor_passthru_includes(tmp_path: pathlib.Path) -> None:
"""Ensure that all #include pass through"""
h_content = '#include "t2.h"\n'
Expand All @@ -155,7 +187,9 @@ def test_preprocessor_passthru_includes(tmp_path: pathlib.Path) -> None:
fp.write("")

options = ParserOptions(
preprocessor=make_pcpp_preprocessor(passthru_includes=re.compile(".+"))
preprocessor=preprocessor.make_pcpp_preprocessor(
passthru_includes=re.compile(".+")
)
)
data = parse_file(tmp_path / "t1.h", options=options)

Expand Down

0 comments on commit d94df61

Please sign in to comment.