Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement parsing free conversion operators #95

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 60 additions & 40 deletions cxxheaderparser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from . import lexer
from .errors import CxxParseError
from .lexer import LexToken, Location, PhonyEnding
from .lexer import LexToken, Location, PhonyEnding, PlyLexer
from .options import ParserOptions
from .parserstate import (
ClassBlockState,
Expand Down Expand Up @@ -39,6 +39,7 @@
NameSpecifier,
NamespaceAlias,
NamespaceDecl,
Operator,
PQNameSegment,
Parameter,
PQName,
Expand Down Expand Up @@ -697,7 +698,7 @@ def _parse_template_specialization(self) -> TemplateSpecialization:

try:
parsed_type, mods = self._parse_type(None)
if parsed_type is None:
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_parse_type now return Union of types. According to typing rules, there should be if checking which type is returned (that is the reason why function should not return Union)

if not isinstance(parsed_type, Type):
raise self._parse_error(None)

mods.validate(var_ok=False, meth_ok=False, msg="")
Expand Down Expand Up @@ -1022,7 +1023,7 @@ def _parse_using_typealias(
"""

parsed_type, mods = self._parse_type(None)
if parsed_type is None:
if not isinstance(parsed_type, Type):
raise self._parse_error(None)

mods.validate(var_ok=False, meth_ok=False, msg="parsing typealias")
Expand Down Expand Up @@ -1770,7 +1771,7 @@ def _parse_parameter(
else:
# required typename + decorators
parsed_type, mods = self._parse_type(tok)
if parsed_type is None:
if not isinstance(parsed_type, Type):
raise self._parse_error(None)

mods.validate(var_ok=False, meth_ok=False, msg="parsing parameter")
Expand Down Expand Up @@ -1883,7 +1884,7 @@ def _parse_trailing_return_type(
)

parsed_type, mods = self._parse_type(None)
if parsed_type is None:
if not isinstance(parsed_type, Type):
raise self._parse_error(None)

mods.validate(var_ok=False, meth_ok=False, msg="parsing trailing return type")
Expand Down Expand Up @@ -2301,7 +2302,7 @@ def _parse_type(
self,
tok: typing.Optional[LexToken],
operator_ok: bool = False,
) -> typing.Tuple[typing.Optional[Type], ParsedTypeModifiers]:
) -> typing.Tuple[typing.Union[Type, Operator], ParsedTypeModifiers]:
"""
This parses a typename and stops parsing when it hits something
that it doesn't understand. The caller uses the results to figure
Expand All @@ -2310,7 +2311,7 @@ def _parse_type(
This only parses the base type, does not parse pointers, references,
or additional const/volatile qualifiers

The returned type will only be None if operator_ok is True and an
The returned type will only be `Operator` if operator_ok is True and an
operator is encountered.
"""

Expand All @@ -2331,8 +2332,6 @@ def _parse_type(
tok = get_token()

pqname: typing.Optional[PQName] = None
pqname_optional = False
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is no longer used


_pqname_start_tokens = self._pqname_start_tokens
_attribute_start = self._attribute_start_tokens

Expand All @@ -2343,13 +2342,23 @@ def _parse_type(
if pqname is not None:
# found second set of names, done here
break

if operator_ok and tok_type == "operator":
# special case: conversion operators such as operator bool
pqname_optional = True
break
pqname, _ = self._parse_pqname(
tok, compound_ok=True, fn_ok=False, fund_ok=True
mods = ParsedTypeModifiers(vars, both, meths)
po = self._parse_member_operator()
return po, mods
Comment on lines +2365 to +2367
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here we are parsing the old cases + some additional parsing that was in _parse_operator_conversion. I extracted that to a new function to make code a bit simpler.


pqname, op = self._parse_pqname(
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We must run _parse_pqname because we do not know if this is operator or another function. But if we already ran this function, we cannot return (or I do not found such functions) processed tokens,

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, I changed argument fn_ok to True to not raise error when operator is detected.

tok, compound_ok=True, fn_ok=True, fund_ok=True
)

if op is not None:
# special case: conversion operator, but also a free operator
mods = ParsedTypeModifiers(vars, both, meths)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is only parsed when we detected operator as a free operator.

po = self._parse_free_operator(pqname, op, mods, const, volatile)
return po, mods

elif tok_type in self._parse_type_ptr_ref_paren:
if pqname is None:
raise self._parse_error(tok)
Expand All @@ -2374,20 +2383,40 @@ def _parse_type(

tok = get_token()

if pqname is None:
if not pqname_optional:
raise self._parse_error(tok)
parsed_type = None
else:
# Construct a type from the parsed name
parsed_type = Type(pqname, const, volatile)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be simplified now, because pqname cannot be None anymore.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, mypy find that this pqname cannot be None anymore is not true.
pqname is initialized by None before the loop (after loop it always not None, but there is no harm to check this).

# Construct a type from the parsed name
parsed_type = Type(pqname, const, volatile)

self.lex.return_token(tok)

# Always return the modifiers
mods = ParsedTypeModifiers(vars, both, meths)
return parsed_type, mods

def _parse_member_operator(self) -> Operator:
"""This function parses operator from class body."""
ctype, cmods = self._parse_type(None)
if not isinstance(ctype, Type):
raise self._parse_error(None)
pqname = PQName([NameSpecifier("operator")])
return Operator(pqname, "conversion", ctype, cmods)

def _parse_free_operator(
self,
pqname: PQName,
op: str,
mods: ParsedTypeModifiers,
const: bool,
volatile: bool,
) -> Operator:
"""This function parses operator implemented outside class body."""
last_seg = pqname.segments[-1]
assert last_seg.name.startswith("operator")
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bit strange to me: why there is not used more complex type in NameSpecifier and operator name is concatenated? Anyway, we do not care here about this name. We take the real operator name from op variable.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The operator name gets concatenated in _parse_pqname_name... I think it would be better for that function to not concatenate and let the eventual user of the operator make that call.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, I changed _parse_pqname_name to not concatenate this. This caused even more large change for something that seems like it should be rather simple, because I changed return types for several functions from str|None to list[LexToken].

last_seg.name = "operator"

type_name = PQName([NameSpecifier(p) for p in op.split(PlyLexer.t_DBL_COLON)])
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the part where I am the most hesitant, because we split here operator by ::.
It seems good enough (the code is parsed after all), but if there is a better way, I think it should be changed.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also don't like this.

t = Type(type_name, const, volatile)
return Operator(pqname, "conversion", t, mods)

def _parse_decl(
self,
parsed_type: Type,
Expand Down Expand Up @@ -2538,41 +2567,32 @@ def _parse_decl(

def _parse_operator_conversion(
self,
operator: Operator,
mods: ParsedTypeModifiers,
location: Location,
doxygen: typing.Optional[str],
template: TemplateDeclTypeVar,
is_typedef: bool,
is_friend: bool,
) -> None:
tok = self._next_token_must_be("operator")

Comment on lines -2548 to -2549
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The token tok has already been processed in _parse_type.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, perhaps a more appropriate name for this function would be _parse_type_or_operator ?

if is_typedef:
raise self._parse_error(tok, "operator not permitted in typedef")

# next piece must be the conversion type
ctype, cmods = self._parse_type(None)
if ctype is None:
raise self._parse_error(None)
raise self._parse_error(None, "operator not permitted in typedef")

cmods.validate(var_ok=False, meth_ok=False, msg="parsing conversion operator")
operator.cmods.validate(
var_ok=False, meth_ok=False, msg="parsing conversion operator"
)

# Check for any cv decorations for the type
rtype = self._parse_cv_ptr(ctype)
rtype = self._parse_cv_ptr(operator.ctype)

# then this must be a method
self._next_token_must_be("(")

# make our own pqname/op here
segments: typing.List[PQNameSegment] = [NameSpecifier("operator")]
pqname = PQName(segments)
op = "conversion"

if self._parse_function(
mods,
rtype,
pqname,
op,
operator.pqname,
operator.operator_name,
template,
doxygen,
location,
Expand Down Expand Up @@ -2612,7 +2632,7 @@ def _parse_declarations(

# Check to see if this might be a class/enum declaration
if (
parsed_type is not None
isinstance(parsed_type, Type)
and parsed_type.typename.classkey
and self._maybe_parse_class_enum_decl(
parsed_type, mods, doxygen, template, is_typedef, is_friend, location
Expand All @@ -2635,10 +2655,10 @@ def _parse_declarations(

mods.validate(var_ok=var_ok, meth_ok=meth_ok, msg=msg)

if parsed_type is None:
if isinstance(parsed_type, Operator):
# this means an operator was encountered, deal with the special case
self._parse_operator_conversion(
mods, location, doxygen, template, is_typedef, is_friend
parsed_type, mods, location, doxygen, template, is_typedef, is_friend
)
return

Expand Down
17 changes: 17 additions & 0 deletions cxxheaderparser/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@

from .tokfmt import tokfmt, Token

if typing.TYPE_CHECKING:
from .parserstate import ParsedTypeModifiers


@dataclass
class Value:
Expand Down Expand Up @@ -298,6 +301,20 @@ def format_decl(self, name: str):
return f"{c}{v}{self.typename.format()} {name}"


@dataclass
class Operator:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think if this isn't exposed to users, it probably doesn't belong in types.py

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok,
I had thought about it before, but since this module contains a lot of data classes, I decided to put it there.
I removed it from types.py and moved to parser.py, because only there it is used (I personally prefer to keep many small files, but I see that convention in this project is to keep small number of modules).

"""An internal structure for parsing operator."""

pqname: PQName
"""Possibly qualified name for operator."""
operator_name: str
"""Conversion operator have always `conversion` str in this attribute."""
ctype: Type
"""Return type for this operator."""
cmods: "ParsedTypeModifiers"
"""Return type modifiers for this operator."""

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is only for internal usage to store data between _parse_type and _parse_operator_conversion in _parse_declarations method.


@dataclass
class Array:
"""
Expand Down
127 changes: 127 additions & 0 deletions tests/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,3 +746,130 @@ def test_free_operator() -> None:
]
)
)


def test_free_conversion_operator() -> None:
virtuald marked this conversation as resolved.
Show resolved Hide resolved
content = """
Foo::operator Type1() { return SomeMethod(); }
const Foo::operator Type2() const { return SomeMethod(); }
volatile Foo::operator Type3() const { return SomeMethod(); }

Foo::operator Foo::Type4() { return SomeMethod(); }
const Foo::operator Foo::Type5() const { return SomeMethod(); }
volatile Foo::operator Foo::Type6() const { return SomeMethod(); }
"""
data = parse_string(content, cleandoc=True)

assert data == ParsedData(
namespace=NamespaceScope(
method_impls=[
Method(
return_type=Type(
typename=PQName(segments=[NameSpecifier(name="Type1")])
),
name=PQName(
segments=[
NameSpecifier(name="Foo"),
NameSpecifier(name="operator"),
]
),
parameters=[],
has_body=True,
operator="conversion",
),
Method(
return_type=Type(
typename=PQName(segments=[NameSpecifier(name="Type2")]),
const=True,
),
name=PQName(
segments=[
NameSpecifier(name="Foo"),
NameSpecifier(name="operator"),
]
),
parameters=[],
has_body=True,
operator="conversion",
const=True,
),
Method(
return_type=Type(
typename=PQName(segments=[NameSpecifier(name="Type3")]),
volatile=True,
),
name=PQName(
segments=[
NameSpecifier(name="Foo"),
NameSpecifier(name="operator"),
]
),
parameters=[],
has_body=True,
operator="conversion",
const=True,
),
Method(
return_type=Type(
typename=PQName(
segments=[
NameSpecifier(name="Foo"),
NameSpecifier(name="Type4"),
]
)
),
name=PQName(
segments=[
NameSpecifier(name="Foo"),
NameSpecifier(name="operator"),
]
),
parameters=[],
has_body=True,
operator="conversion",
),
Method(
return_type=Type(
typename=PQName(
segments=[
NameSpecifier(name="Foo"),
NameSpecifier(name="Type5"),
]
),
const=True,
),
name=PQName(
segments=[
NameSpecifier(name="Foo"),
NameSpecifier(name="operator"),
]
),
parameters=[],
has_body=True,
operator="conversion",
const=True,
),
Method(
return_type=Type(
typename=PQName(
segments=[
NameSpecifier(name="Foo"),
NameSpecifier(name="Type6"),
]
),
volatile=True,
),
name=PQName(
segments=[
NameSpecifier(name="Foo"),
NameSpecifier(name="operator"),
]
),
parameters=[],
has_body=True,
operator="conversion",
const=True,
),
]
)
)
Loading