Skip to content

Commit

Permalink
Restructure preprocessor support
Browse files Browse the repository at this point in the history
- Remove partial support for #define and other PP directives
- Allow pragma to span multiple lines
- Pragma now emits a list of tokens instead of a single string
  • Loading branch information
virtuald committed Jul 23, 2023
1 parent b07e1f8 commit c0bb443
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 58 deletions.
55 changes: 46 additions & 9 deletions cxxheaderparser/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class LexToken(Protocol):

#: private
lexer: lex.Lexer
lexmatch: "re.Match"


PhonyEnding: LexToken = lex.LexToken() # type: ignore
Expand Down Expand Up @@ -175,7 +176,10 @@ class PlyLexer:
# Comments
"COMMENT_SINGLELINE",
"COMMENT_MULTILINE",
"PRECOMP_MACRO",
"LINE_MACRO",
"PRAGMA_MACRO",
"INCLUDE_MACRO",
"PP_MACRO",
# misc
"DIVIDE",
"NEWLINE",
Expand Down Expand Up @@ -434,16 +438,32 @@ def t_NAME(self, t: LexToken) -> LexToken:
t.type = t.value
return t

@TOKEN(r"\#.*")
def t_PRECOMP_MACRO(self, t: LexToken) -> typing.Optional[LexToken]:
m = _line_re.match(t.value)
if m:
self.filename = m.group(2)
@TOKEN(r'\#[\t ]*line (\d+) "(.*)"')
def t_LINE_MACRO(self, t: LexToken) -> None:
m = t.lexmatch
self.filename = m.group(2)
self.line_offset = 1 + self.lex.lineno - int(m.group(1))

self.line_offset = 1 + self.lex.lineno - int(m.group(1))
return None
@TOKEN(r"\#[\t ]*pragma")
def t_PRAGMA_MACRO(self, t: LexToken) -> LexToken:
return t

@TOKEN(r"\#[\t ]*include (.*)")
def t_INCLUDE_MACRO(self, t: LexToken) -> LexToken:
return t

@TOKEN(r"\#(.*)")
def t_PP_MACRO(self, t: LexToken):
if "define" in t.value:
msgtype = "#define"
else:
return t
msgtype = "preprocessor"
self._error(
"cxxheaderparser does not support "
+ msgtype
+ " directives, please use a C++ preprocessor first",
t,
)

t_DIVIDE = r"/(?!/)"
t_ELLIPSIS = r"\.\.\."
Expand Down Expand Up @@ -541,6 +561,12 @@ def get_doxygen_after(self) -> typing.Optional[str]:
"WHITESPACE",
}

_discard_types_except_newline = {
"COMMENT_SINGLELINE",
"COMMENT_MULTILINE",
"WHITESPACE",
}

def token(self) -> LexToken:
tokbuf = self.tokbuf
while True:
Expand All @@ -563,6 +589,17 @@ def token_eof_ok(self) -> typing.Optional[LexToken]:
if not self._fill_tokbuf(tokbuf):
return None

def token_newline_eof_ok(self) -> typing.Optional[LexToken]:
tokbuf = self.tokbuf
while True:
while tokbuf:
tok = tokbuf.popleft()
if tok.type not in self._discard_types_except_newline:
return tok

if not self._fill_tokbuf(tokbuf):
return None

def token_if(self, *types: str) -> typing.Optional[LexToken]:
tok = self.token_eof_ok()
if tok is None:
Expand Down
32 changes: 21 additions & 11 deletions cxxheaderparser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,8 @@ def parse(self) -> None:
"{": self._on_empty_block_start,
"}": self._on_block_end,
"DBL_LBRACKET": self._consume_attribute_specifier_seq,
"PRECOMP_MACRO": self._process_preprocessor_token,
"INCLUDE_MACRO": self._process_include_macro,
"PRAGMA_MACRO": self._process_pragma_macro,
";": lambda _1, _2: None,
}

Expand Down Expand Up @@ -361,20 +362,29 @@ def parse(self) -> None:
_preprocessor_compress_re = re.compile(r"^#[\t ]+")
_preprocessor_split_re = re.compile(r"[\t ]+")

def _process_preprocessor_token(
self, tok: LexToken, doxygen: typing.Optional[str]
) -> None:
def _process_include_macro(self, tok: LexToken, doxygen: typing.Optional[str]):
value = self._preprocessor_compress_re.sub("#", tok.value)
svalue = self._preprocessor_split_re.split(value, 1)
if len(svalue) == 2:
self.state.location = tok.location
macro = svalue[0].lower().replace(" ", "")
if macro.startswith("#include"):
self.visitor.on_include(self.state, svalue[1])
elif macro.startswith("#define"):
self.visitor.on_define(self.state, svalue[1])
elif macro.startswith("#pragma"):
self.visitor.on_pragma(self.state, svalue[1])
self.visitor.on_include(self.state, svalue[1])
else:
raise CxxParseError("incomplete #include directive", tok)

def _process_pragma_macro(self, _: LexToken, doxygen: typing.Optional[str]):
# consume all tokens until the end of the line
# -- but if we find a paren, get the group
tokens: LexTokenList = []
while True:
tok = self.lex.token_newline_eof_ok()
if not tok or tok.type == "NEWLINE":
break
if tok.type in self._balanced_token_map:
tokens.extend(self._consume_balanced_tokens(tok))
else:
tokens.append(tok)

self.visitor.on_pragma(self.state, self._create_value(tokens))

#
# Various
Expand Down
16 changes: 3 additions & 13 deletions cxxheaderparser/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
UsingAlias,
UsingDecl,
Variable,
Value,
)

from .parserstate import (
Expand Down Expand Up @@ -123,14 +124,9 @@ class NamespaceScope:
Block = typing.Union[ClassScope, NamespaceScope]


@dataclass
class Define:
content: str


@dataclass
class Pragma:
content: str
content: Value


@dataclass
Expand Down Expand Up @@ -171,9 +167,6 @@ class N::C {
#: Global namespace
namespace: NamespaceScope = field(default_factory=lambda: NamespaceScope())

#: Any ``#define`` preprocessor directives encountered
defines: typing.List[Define] = field(default_factory=list)

#: Any ``#pragma`` directives encountered
pragmas: typing.List[Pragma] = field(default_factory=list)

Expand Down Expand Up @@ -208,10 +201,7 @@ def __init__(self) -> None:

self.data = ParsedData(self.namespace)

def on_define(self, state: State, content: str) -> None:
self.data.defines.append(Define(content))

def on_pragma(self, state: State, content: str) -> None:
def on_pragma(self, state: State, content: Value) -> None:
self.data.pragmas.append(Pragma(content))

def on_include(self, state: State, filename: str) -> None:
Expand Down
10 changes: 2 additions & 8 deletions cxxheaderparser/visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
UsingAlias,
UsingDecl,
Variable,
Value,
)

from .parserstate import (
Expand All @@ -36,14 +37,7 @@ class CxxVisitor(Protocol):
Defines the interface used by the parser to emit events
"""

def on_define(self, state: State, content: str) -> None:
"""
.. warning:: cxxheaderparser intentionally does not have a C preprocessor
implementation. If you are parsing code with macros in it,
use a conforming preprocessor like ``pcpp``
"""

def on_pragma(self, state: State, content: str) -> None:
def on_pragma(self, state: State, content: Value) -> None:
"""
Called once for each ``#pragma`` directive encountered
"""
Expand Down
61 changes: 44 additions & 17 deletions tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,50 +20,77 @@
Pragma,
parse_string,
ParsedData,
Define,
)

#
# minimal preprocessor support
#


def test_define() -> None:
def test_includes() -> None:
content = """
#define simple
#define complex(thing) stuff(thing)
# define spaced
#include <global.h>
#include "local.h"
# include "space.h"
"""
data = parse_string(content, cleandoc=True)

assert data == ParsedData(
defines=[
Define(content="simple"),
Define(content="complex(thing) stuff(thing)"),
Define(content="spaced"),
],
includes=[Include("<global.h>"), Include('"local.h"'), Include('"space.h"')]
)


def test_includes() -> None:
def test_pragma() -> None:
content = """
#include <global.h>
#include "local.h"
#pragma once
"""
data = parse_string(content, cleandoc=True)

assert data == ParsedData(includes=[Include("<global.h>"), Include('"local.h"')])
assert data == ParsedData(
pragmas=[Pragma(content=Value(tokens=[Token(value="once")]))]
)


def test_pragma() -> None:
def test_pragma_more() -> None:
content = """
#pragma once
#pragma (some content here)
#pragma (even \
more \
content here)
"""
data = parse_string(content, cleandoc=True)

assert data == ParsedData(pragmas=[Pragma(content="once")])
assert data == ParsedData(
pragmas=[
Pragma(
content=Value(
tokens=[
Token(value="("),
Token(value="some"),
Token(value="content"),
Token(value="here"),
Token(value=")"),
]
)
),
Pragma(
content=Value(
tokens=[
Token(value="("),
Token(value="even"),
Token(value="more"),
Token(value="content"),
Token(value="here"),
Token(value=")"),
]
)
),
]
)


#
Expand Down

0 comments on commit c0bb443

Please sign in to comment.