diff --git a/src/hawkmoth/parser.py b/src/hawkmoth/parser.py index 2bd8e547..419ed2a3 100644 --- a/src/hawkmoth/parser.py +++ b/src/hawkmoth/parser.py @@ -38,6 +38,7 @@ from clang.cindex import StorageClass, AccessSpecifier, ExceptionSpecificationKind from clang.cindex import Index, TranslationUnit, TranslationUnitLoadError from clang.cindex import Diagnostic +from clang.cindex import SourceLocation, SourceRange from hawkmoth import docstring @@ -185,6 +186,31 @@ def _comment_extract(tu): return top_level_comments, comments +# Workaround for clang sometimes failing to tokenize cursor extents with macro +# expansions in them. A simple 'bool x' variable or struct member cursor will +# fail to tokenize, because bool is a macro. +def _cursor_get_tokens(cursor): + # Try get_tokens() first + tokens = [t for t in cursor.get_tokens()] + if tokens: + yield from tokens + return + + # Fallback to recreating the extent and getting the tokens from the + # translation unit. Notably the repr for both extent and cursor.extent will + # be the same, but under the hood there's something wrong. + tu = cursor.translation_unit + + start = cursor.extent.start + start = SourceLocation.from_position(tu, start.file, start.line, start.column) + + end = cursor.extent.end + end = SourceLocation.from_position(tu, end.file, end.line, end.column) + + extent = SourceRange.from_locations(start, end) + + yield from tu.get_tokens(extent=extent) + def _get_meta(comment, cursor=None): meta = {'line': comment.extent.start.line} if cursor: @@ -200,7 +226,7 @@ def _get_macro_args(cursor): if cursor.kind != CursorKind.MACRO_DEFINITION: return None - tokens = cursor.get_tokens() + tokens = _cursor_get_tokens(cursor) # Use the first two tokens to make sure this starts with 'IDENTIFIER(' one = next(tokens) @@ -246,7 +272,7 @@ def _get_function_quals(cursor): Returns: List of (prefix) function qualifiers. """ - tokens = [t.spelling for t in cursor.get_tokens()] + tokens = [t.spelling for t in _cursor_get_tokens(cursor)] quals = [] if 'static' in tokens: @@ -262,7 +288,7 @@ def _get_method_quals(cursor): Returns: List of prefix method qualifiers and list of suffix method qualifiers. """ - tokens = [t.spelling for t in cursor.get_tokens()] + tokens = [t.spelling for t in _cursor_get_tokens(cursor)] pre_quals = [] pos_quals = [] @@ -338,7 +364,7 @@ def _get_template_line(cursor): # We can do it by looking at the tokens directly. This is slightly # complicated due to variadic template type parameters. def typetype(cursor): - tokens = list(cursor.get_tokens()) + tokens = list(_cursor_get_tokens(cursor)) if tokens[-2].spelling == '...': return f'{tokens[-3].spelling}...' else: @@ -373,7 +399,7 @@ def _specifiers_fixup(cursor, basetype): Returns: List of C++ specifiers for the cursor. """ - tokens = [t.spelling for t in cursor.get_tokens()] + tokens = [t.spelling for t in _cursor_get_tokens(cursor)] type_elem = [] if 'mutable' in tokens: @@ -397,7 +423,7 @@ def _get_scopedenum_type(cursor): ``None`` otherwise. """ if cursor.kind == CursorKind.ENUM_DECL and cursor.is_scoped_enum(): - if list(cursor.get_tokens())[3].spelling == ':': + if list(_cursor_get_tokens(cursor))[3].spelling == ':': return f': {cursor.enum_type.spelling}' return None @@ -660,7 +686,7 @@ def _recursive_parse(domain, comments, errors, cursor, nest): elif cursor.kind == CursorKind.ENUM_CONSTANT_DECL: # Show enumerator value if it's explicitly set in source - if '=' in [t.spelling for t in cursor.get_tokens()]: + if '=' in [t.spelling for t in _cursor_get_tokens(cursor)]: value = cursor.enum_value else: value = None @@ -721,7 +747,7 @@ def _parse_undocumented_block(domain, comments, errors, cursor, nest): # For some reason, the Python bindings don't return the cursor kind # LINKAGE_SPEC as one would expect, so we need to do it the hard way. if cursor.kind == CursorKind.UNEXPOSED_DECL: - tokens = cursor.get_tokens() + tokens = _cursor_get_tokens(cursor) ntoken = next(tokens, None) if ntoken and ntoken.spelling == 'extern': ntoken = next(tokens, None)