Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove pp and handle continuations #56

Merged
merged 2 commits into from
Aug 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 55 additions & 10 deletions cxxheaderparser/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class LexToken(Protocol):

#: private
lexer: lex.Lexer
lexmatch: "re.Match"


PhonyEnding: LexToken = lex.LexToken() # type: ignore
Expand Down Expand Up @@ -175,7 +176,10 @@ class PlyLexer:
# Comments
"COMMENT_SINGLELINE",
"COMMENT_MULTILINE",
"PRECOMP_MACRO",
"LINE_DIRECTIVE",
"PRAGMA_DIRECTIVE",
"INCLUDE_DIRECTIVE",
"PP_DIRECTIVE",
# misc
"DIVIDE",
"NEWLINE",
Expand Down Expand Up @@ -434,16 +438,35 @@ def t_NAME(self, t: LexToken) -> LexToken:
t.type = t.value
return t

@TOKEN(r"\#.*")
def t_PRECOMP_MACRO(self, t: LexToken) -> typing.Optional[LexToken]:
m = _line_re.match(t.value)
if m:
self.filename = m.group(2)
@TOKEN(r'\#[\t ]*line (\d+) "(.*)"')
def t_LINE_DIRECTIVE(self, t: LexToken) -> None:
m = t.lexmatch
self.filename = m.group(2)
self.line_offset = 1 + self.lex.lineno - int(m.group(1))

self.line_offset = 1 + self.lex.lineno - int(m.group(1))
return None
@TOKEN(r"\#[\t ]*pragma")
def t_PRAGMA_DIRECTIVE(self, t: LexToken) -> LexToken:
return t

@TOKEN(r"\#[\t ]*include (.*)")
def t_INCLUDE_DIRECTIVE(self, t: LexToken) -> LexToken:
return t

@TOKEN(r"\#(.*)")
def t_PP_DIRECTIVE(self, t: LexToken):
# ignore C++23 warning directive
if t.value.startswith("#warning"):
return
if "define" in t.value:
msgtype = "#define"
else:
return t
msgtype = "preprocessor"
self._error(
virtuald marked this conversation as resolved.
Show resolved Hide resolved
"cxxheaderparser does not support "
+ msgtype
+ " directives, please use a C++ preprocessor first",
t,
)

t_DIVIDE = r"/(?!/)"
t_ELLIPSIS = r"\.\.\."
Expand Down Expand Up @@ -541,6 +564,12 @@ def get_doxygen_after(self) -> typing.Optional[str]:
"WHITESPACE",
}

_discard_types_except_newline = {
"COMMENT_SINGLELINE",
"COMMENT_MULTILINE",
"WHITESPACE",
}

def token(self) -> LexToken:
tokbuf = self.tokbuf
while True:
Expand All @@ -563,6 +592,17 @@ def token_eof_ok(self) -> typing.Optional[LexToken]:
if not self._fill_tokbuf(tokbuf):
return None

def token_newline_eof_ok(self) -> typing.Optional[LexToken]:
tokbuf = self.tokbuf
while True:
while tokbuf:
tok = tokbuf.popleft()
if tok.type not in self._discard_types_except_newline:
return tok

if not self._fill_tokbuf(tokbuf):
return None

def token_if(self, *types: str) -> typing.Optional[LexToken]:
tok = self.token_eof_ok()
if tok is None:
Expand Down Expand Up @@ -659,7 +699,12 @@ def _fill_tokbuf(self, tokbuf: typing.Deque[LexToken]) -> bool:
tokbuf.append(tok)

if tok.type == "NEWLINE":
break
# detect/remove line continuations
if len(tokbuf) > 2 and tokbuf[-2].type == "\\":
tokbuf.pop()
tokbuf.pop()
else:
break

# detect/combine user defined literals
if tok.type in udl_start:
Expand Down
32 changes: 21 additions & 11 deletions cxxheaderparser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,8 @@ def parse(self) -> None:
"{": self._on_empty_block_start,
"}": self._on_block_end,
"DBL_LBRACKET": self._consume_attribute_specifier_seq,
"PRECOMP_MACRO": self._process_preprocessor_token,
"INCLUDE_DIRECTIVE": self._process_include_directive,
"PRAGMA_DIRECTIVE": self._process_pragma_directive,
";": lambda _1, _2: None,
}

Expand Down Expand Up @@ -361,20 +362,29 @@ def parse(self) -> None:
_preprocessor_compress_re = re.compile(r"^#[\t ]+")
_preprocessor_split_re = re.compile(r"[\t ]+")

def _process_preprocessor_token(
self, tok: LexToken, doxygen: typing.Optional[str]
) -> None:
def _process_include_directive(self, tok: LexToken, doxygen: typing.Optional[str]):
value = self._preprocessor_compress_re.sub("#", tok.value)
svalue = self._preprocessor_split_re.split(value, 1)
if len(svalue) == 2:
self.state.location = tok.location
macro = svalue[0].lower().replace(" ", "")
if macro.startswith("#include"):
self.visitor.on_include(self.state, svalue[1])
elif macro.startswith("#define"):
self.visitor.on_define(self.state, svalue[1])
elif macro.startswith("#pragma"):
self.visitor.on_pragma(self.state, svalue[1])
self.visitor.on_include(self.state, svalue[1])
else:
raise CxxParseError("incomplete #include directive", tok)

def _process_pragma_directive(self, _: LexToken, doxygen: typing.Optional[str]):
# consume all tokens until the end of the line
# -- but if we find a paren, get the group
tokens: LexTokenList = []
while True:
tok = self.lex.token_newline_eof_ok()
if not tok or tok.type == "NEWLINE":
break
if tok.type in self._balanced_token_map:
tokens.extend(self._consume_balanced_tokens(tok))
else:
tokens.append(tok)

self.visitor.on_pragma(self.state, self._create_value(tokens))

#
# Various
Expand Down
16 changes: 3 additions & 13 deletions cxxheaderparser/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
UsingAlias,
UsingDecl,
Variable,
Value,
)

from .parserstate import (
Expand Down Expand Up @@ -123,14 +124,9 @@ class NamespaceScope:
Block = typing.Union[ClassScope, NamespaceScope]


@dataclass
class Define:
content: str


@dataclass
class Pragma:
content: str
content: Value


@dataclass
Expand Down Expand Up @@ -171,9 +167,6 @@ class N::C {
#: Global namespace
namespace: NamespaceScope = field(default_factory=lambda: NamespaceScope())

#: Any ``#define`` preprocessor directives encountered
defines: typing.List[Define] = field(default_factory=list)

#: Any ``#pragma`` directives encountered
pragmas: typing.List[Pragma] = field(default_factory=list)

Expand Down Expand Up @@ -208,10 +201,7 @@ def __init__(self) -> None:

self.data = ParsedData(self.namespace)

def on_define(self, state: State, content: str) -> None:
self.data.defines.append(Define(content))

def on_pragma(self, state: State, content: str) -> None:
def on_pragma(self, state: State, content: Value) -> None:
self.data.pragmas.append(Pragma(content))

def on_include(self, state: State, filename: str) -> None:
Expand Down
10 changes: 2 additions & 8 deletions cxxheaderparser/visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
UsingAlias,
UsingDecl,
Variable,
Value,
)

from .parserstate import (
Expand All @@ -36,14 +37,7 @@ class CxxVisitor(Protocol):
Defines the interface used by the parser to emit events
"""

def on_define(self, state: State, content: str) -> None:
"""
.. warning:: cxxheaderparser intentionally does not have a C preprocessor
implementation. If you are parsing code with macros in it,
use a conforming preprocessor like ``pcpp``
"""

def on_pragma(self, state: State, content: str) -> None:
def on_pragma(self, state: State, content: Value) -> None:
"""
Called once for each ``#pragma`` directive encountered
"""
Expand Down
3 changes: 0 additions & 3 deletions tests/test_enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,17 +116,14 @@ def test_enum_w_multiline_expr() -> None:
Token(value="<<"),
Token(value="24"),
Token(value="|"),
Token(value="\\"),
Token(value="'A'"),
Token(value="<<"),
Token(value="16"),
Token(value="|"),
Token(value="\\"),
Token(value="'S'"),
Token(value="<<"),
Token(value="8"),
Token(value="|"),
Token(value="\\"),
Token(value="'H'"),
Token(value=")"),
]
Expand Down
102 changes: 85 additions & 17 deletions tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,50 +20,77 @@
Pragma,
parse_string,
ParsedData,
Define,
)

#
# minimal preprocessor support
#


def test_define() -> None:
def test_includes() -> None:
content = """
#define simple
#define complex(thing) stuff(thing)
# define spaced
#include <global.h>
#include "local.h"
# include "space.h"
"""
data = parse_string(content, cleandoc=True)

assert data == ParsedData(
defines=[
Define(content="simple"),
Define(content="complex(thing) stuff(thing)"),
Define(content="spaced"),
],
includes=[Include("<global.h>"), Include('"local.h"'), Include('"space.h"')]
)


def test_includes() -> None:
def test_pragma() -> None:
content = """
#include <global.h>
#include "local.h"

#pragma once

"""
data = parse_string(content, cleandoc=True)

assert data == ParsedData(includes=[Include("<global.h>"), Include('"local.h"')])
assert data == ParsedData(
pragmas=[Pragma(content=Value(tokens=[Token(value="once")]))]
)


def test_pragma() -> None:
def test_pragma_more() -> None:
content = """

#pragma once
#pragma (some content here)
#pragma (even \
more \
content here)

"""
data = parse_string(content, cleandoc=True)

assert data == ParsedData(pragmas=[Pragma(content="once")])
assert data == ParsedData(
pragmas=[
Pragma(
content=Value(
tokens=[
Token(value="("),
Token(value="some"),
Token(value="content"),
Token(value="here"),
Token(value=")"),
]
)
),
Pragma(
content=Value(
tokens=[
Token(value="("),
Token(value="even"),
Token(value="more"),
Token(value="content"),
Token(value="here"),
Token(value=")"),
]
)
),
]
)


#
Expand Down Expand Up @@ -267,3 +294,44 @@ def test_user_defined_literal() -> None:
]
)
)


#
# Line continuation
#


def test_line_continuation() -> None:
content = """
static int \
variable;
"""
data = parse_string(content, cleandoc=True)

assert data == ParsedData(
namespace=NamespaceScope(
variables=[
Variable(
name=PQName(segments=[NameSpecifier(name="variable")]),
type=Type(
typename=PQName(segments=[FundamentalSpecifier(name="int")])
),
static=True,
)
]
)
)


#
# #warning (C++23)
#


def test_warning_directive() -> None:
content = """
#warning "this is a warning"
"""
data = parse_string(content, cleandoc=True)

assert data == ParsedData()