Skip to content

Commit

Permalink
Add GCC compatible preprocessing function
Browse files Browse the repository at this point in the history
  • Loading branch information
virtuald committed Oct 6, 2023
1 parent 2a17b27 commit bca0edf
Show file tree
Hide file tree
Showing 3 changed files with 138 additions and 19 deletions.
6 changes: 3 additions & 3 deletions cxxheaderparser/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class LexError(CxxParseError):
else:
Protocol = object

_line_re = re.compile(r'^\#[\t ]*line (\d+) "(.*)"')
_line_re = re.compile(r'^\#[\t ]*(line)? (\d+) "(.*)"')
_multicomment_re = re.compile("\n[\\s]+\\*")


Expand Down Expand Up @@ -448,8 +448,8 @@ def t_PP_DIRECTIVE(self, t: LexToken):
# handle line macros
m = _line_re.match(t.value)
if m:
self.filename = m.group(2)
self.line_offset = 1 + self.lex.lineno - int(m.group(1))
self.filename = m.group(3)
self.line_offset = 1 + self.lex.lineno - int(m.group(2))
return None
# ignore C++23 warning directive
if t.value.startswith("#warning"):
Expand Down
97 changes: 94 additions & 3 deletions cxxheaderparser/preprocessor.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,99 @@
"""
Contains optional preprocessor support via pcpp
Contains optional preprocessor support functions
"""

import io
import re
import os
import subprocess
import sys
import typing

from .options import PreprocessorFunction

from pcpp import Preprocessor, OutputDirective, Action

#
# GCC preprocessor support
#


def _gcc_filter(fname: str, fp: typing.TextIO) -> str:
new_output = io.StringIO()
keep = True

for line in fp:
if line.startswith("# "):
last_quote = line.rfind('"')
if last_quote != -1:
keep = line[:last_quote].endswith(fname)

if keep:
new_output.write(line)

new_output.seek(0)
return new_output.read()


def make_gcc_preprocessor(
*,
defines: typing.List[str] = [],
include_paths: typing.List[str] = [],
retain_all_content: bool = False,
encoding: typing.Optional[str] = None,
gcc_args: typing.List[str] = ["g++"],
print_cmd: bool = True,
) -> PreprocessorFunction:
"""
Creates a preprocessor function that uses g++ to preprocess the input text.
gcc is a high performance and accurate precompiler, but if an #include
directive or other oddity exists in your input it will throw an error.
:param defines: list of #define macros specified as "key value"
:param include_paths: list of directories to search for included files
:param retain_all_content: If False, only the parsed file content will be retained
:param encoding: If specified any include files are opened with this encoding
:param gcc_args: This is the path to G++ and any extra args you might want
:param print_cmd: Prints the gcc command as its executed
"""

if not encoding:
encoding = "utf-8"

def _preprocess_file(filename: str, content: str) -> str:
cmd = gcc_args + ["-w", "-E", "-C"]

for p in include_paths:
cmd.append(f"-I{p}")
for d in defines:
cmd.append(f"-D{d.replace(' ', '=')}")

kwargs = {"encoding": encoding}
if filename == "<str>":
cmd.append("-")
filename = "<stdin>"
kwargs["input"] = content
else:
cmd.append(filename)

if print_cmd:
print("+", " ".join(cmd), file=sys.stderr)

result: str = subprocess.check_output(cmd, **kwargs) # type: ignore
if not retain_all_content:
result = _gcc_filter(filename, io.StringIO(result))

return result

return _preprocess_file


#
# PCPP preprocessor support
#


class PreprocessorError(Exception):
pass
Expand All @@ -36,7 +120,7 @@ def on_comment(self, *ignored):
return True


def _filter_self(fname: str, fp: typing.TextIO) -> str:
def _pcpp_filter(fname: str, fp: typing.TextIO) -> str:
# the output of pcpp includes the contents of all the included files, which
# isn't what a typical user of cxxheaderparser would want, so we strip out
# the line directives and any content that isn't in our original file
Expand Down Expand Up @@ -69,6 +153,13 @@ def make_pcpp_preprocessor(
Creates a preprocessor function that uses pcpp (which must be installed
separately) to preprocess the input text.
If missing #include files are encountered, this preprocessor will ignore the
error. This preprocessor is pure python so it's very portable, and is a good
choice if performance isn't critical.
:param defines: list of #define macros specified as "key value"
:param include_paths: list of directories to search for included files
:param retain_all_content: If False, only the parsed file content will be retained
:param encoding: If specified any include files are opened with this encoding
:param passthru_includes: If specified any #include directives that match the
compiled regex pattern will be part of the output.
Expand Down Expand Up @@ -119,6 +210,6 @@ def _preprocess_file(filename: str, content: str) -> str:
filename = filename.replace(os.sep, "/")
break

return _filter_self(filename, fp)
return _pcpp_filter(filename, fp)

return _preprocess_file
54 changes: 41 additions & 13 deletions tests/test_preprocessor.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import os
import pathlib
import pytest
import re
import shutil
import typing

from cxxheaderparser.options import ParserOptions
from cxxheaderparser.preprocessor import make_pcpp_preprocessor
from cxxheaderparser.options import ParserOptions, PreprocessorFunction
from cxxheaderparser import preprocessor
from cxxheaderparser.simple import (
NamespaceScope,
ParsedData,
Expand All @@ -22,12 +25,29 @@
)


def test_basic_preprocessor() -> None:
@pytest.fixture(params=["gcc", "pcpp"])
def make_pp(request) -> typing.Callable[..., PreprocessorFunction]:
param = request.param
if param == "gcc":
gcc_path = shutil.which("g++")
if not gcc_path:
pytest.skip("g++ not found")
return preprocessor.make_gcc_preprocessor
elif param == "pcpp":
return preprocessor.make_pcpp_preprocessor
else:
assert False


def test_basic_preprocessor(
make_pp: typing.Callable[..., PreprocessorFunction]
) -> None:
content = """
#define X 1
int x = X;
"""
options = ParserOptions(preprocessor=make_pcpp_preprocessor())

options = ParserOptions(preprocessor=make_pp())
data = parse_string(content, cleandoc=True, options=options)

assert data == ParsedData(
Expand All @@ -45,7 +65,10 @@ def test_basic_preprocessor() -> None:
)


def test_preprocessor_omit_content(tmp_path: pathlib.Path) -> None:
def test_preprocessor_omit_content(
make_pp: typing.Callable[..., PreprocessorFunction],
tmp_path: pathlib.Path,
) -> None:
"""Ensure that content in other headers is omitted"""
h_content = '#include "t2.h"' "\n" "int x = X;\n"
h2_content = "#define X 2\n" "int omitted = 1;\n"
Expand All @@ -56,7 +79,7 @@ def test_preprocessor_omit_content(tmp_path: pathlib.Path) -> None:
with open(tmp_path / "t2.h", "w") as fp:
fp.write(h2_content)

options = ParserOptions(preprocessor=make_pcpp_preprocessor())
options = ParserOptions(preprocessor=make_pp())
data = parse_file(tmp_path / "t1.h", options=options)

assert data == ParsedData(
Expand All @@ -74,7 +97,10 @@ def test_preprocessor_omit_content(tmp_path: pathlib.Path) -> None:
)


def test_preprocessor_omit_content2(tmp_path: pathlib.Path) -> None:
def test_preprocessor_omit_content2(
make_pp: typing.Callable[..., PreprocessorFunction],
tmp_path: pathlib.Path,
) -> None:
"""
Ensure that content in other headers is omitted while handling pcpp
relative path quirk
Expand All @@ -91,9 +117,7 @@ def test_preprocessor_omit_content2(tmp_path: pathlib.Path) -> None:
with open(tmp_path2 / "t2.h", "w") as fp:
fp.write(h2_content)

options = ParserOptions(
preprocessor=make_pcpp_preprocessor(include_paths=[str(tmp_path)])
)
options = ParserOptions(preprocessor=make_pp(include_paths=[str(tmp_path)]))

# Weirdness happens here
os.chdir(tmp_path)
Expand All @@ -114,7 +138,9 @@ def test_preprocessor_omit_content2(tmp_path: pathlib.Path) -> None:
)


def test_preprocessor_encoding(tmp_path: pathlib.Path) -> None:
def test_preprocessor_encoding(
make_pp: typing.Callable[..., PreprocessorFunction], tmp_path: pathlib.Path
) -> None:
"""Ensure we can handle alternate encodings"""
h_content = b"// \xa9 2023 someone\n" b'#include "t2.h"' b"\n" b"int x = X;\n"

Expand All @@ -126,7 +152,7 @@ def test_preprocessor_encoding(tmp_path: pathlib.Path) -> None:
with open(tmp_path / "t2.h", "wb") as fp:
fp.write(h2_content)

options = ParserOptions(preprocessor=make_pcpp_preprocessor(encoding="cp1252"))
options = ParserOptions(preprocessor=make_pp(encoding="cp1252"))
data = parse_file(tmp_path / "t1.h", options=options, encoding="cp1252")

assert data == ParsedData(
Expand Down Expand Up @@ -155,7 +181,9 @@ def test_preprocessor_passthru_includes(tmp_path: pathlib.Path) -> None:
fp.write("")

options = ParserOptions(
preprocessor=make_pcpp_preprocessor(passthru_includes=re.compile(".+"))
preprocessor=preprocessor.make_pcpp_preprocessor(
passthru_includes=re.compile(".+")
)
)
data = parse_file(tmp_path / "t1.h", options=options)

Expand Down

0 comments on commit bca0edf

Please sign in to comment.