Source code for sphinx_immaterial.apidoc.cpp.api_parser
"""Converts a preprocessed C++ source file into a JSON API description.
This program expects as input preprocessed C++ source code obtained using the
`-E` (preprocessed), `-C` (preserve comments), and `-dD` (preserve macro
definitions) GCC/Clang options. It also accepts a list of compiler options to
be passed to Clang, although these are mostly irrelevant due to the prior
preprocessing.
It uses a combination of the libclang Python bindings and the C++ parser in the
Sphinx Python package to parse declarations and definitions and produce a JSON
representation of the API.
The JSON representation describes the API as a set of "entities", each with a
unique id, and relations between them.
Libclang can handle arbitrary C++ syntax, but provides very restricted access to
the resultant AST. To workaround those limitations, in some cases extracted
declarations or portions of declarations are converted back to source
representation and re-parsed using the C++ parser provided by the Sphinx C++
domain. The C++ parser in Sphinx is extremely limited and as it does not rely
on a symbol table does not always correctly handle template arguments, but
provides full access to its AST.
This program performs a number of transformations on the declarations:
- Exported entities are filtered using various criteria
- `std::enable_if_t` uses are converted to C++20 requires clauses.
- Internal return types are elided (replaced with auto).
"""
import argparse
import dataclasses
import functools
import json
import os
import pathlib
import re
import time
import typing
from typing import (
cast,
Dict,
Any,
List,
Tuple,
Optional,
Sequence,
Union,
Pattern,
Literal,
Callable,
TypedDict,
)
from textwrap import dedent
import ctypes
import clang.cindex
from clang.cindex import (
Cursor,
CursorKind,
Token,
TokenKind,
TranslationUnit,
SourceLocation,
SourceRange,
)
import docutils.nodes
import pydantic.dataclasses
import sphinx.domains.cpp
import sphinx.util.logging
from typing_extensions import NotRequired
from . import ast_fixes # pylint: disable=unused-import
logger = sphinx.util.logging.getLogger(__name__)
_UNMATCHABLE_REGEXP = re.compile("a^")
def _combine_regexp_list(items: Sequence[Union[str, Pattern[str]]]) -> re.Pattern:
if not items:
return _UNMATCHABLE_REGEXP
def get_parenthesized_source(x: Union[str, Pattern[str]]):
if isinstance(x, re.Pattern):
x = x.pattern
return f"(?:{x})"
return re.compile("|".join(get_parenthesized_source(x) for x in items))
def _make_replacement_pattern(
strings: List[str], prefix: str, suffix: str
) -> re.Pattern:
if not strings:
return _UNMATCHABLE_REGEXP
return re.compile(
"|".join(rf"(?:{prefix}{re.escape(before)}{suffix})" for before in strings)
)
TEMPLATE_PARAMETER_ENABLE_IF_TYPE_PATTERN = re.compile(
r"\s*(?:typename|class)\s*=\s*std\s*::\s*enable_if_t\s*<(.*)>\s*"
)
TEMPLATE_PARAMETER_ENABLE_IF_NON_TYPE_PATTERN = re.compile(
r"\s*std\s*::\s*enable_if_t\s*<(.*)>\s*\*\s*=\s*(nullptr|0)\s*"
)
SPECIAL_GROUP_COMMAND_PATTERN = re.compile(
r"^(?:\\|@)(ingroup|relates|membergroup|id)\s+(.+[^\s])\s*$", re.MULTILINE
)
[docs]
@pydantic.dataclasses.dataclass
class Config:
"""Specifies a C++ API parsing configuration.
Based on this configuration, a description of the API is generated.
"""
input_path: str = "__input.cpp"
"""Path to the input file to parse.
This may either be a path to an existing file, or `.input_content` may
specify its content, in which case the filesystem is not accessed.
If `.input_content` is specified and merely contains :cpp:`#include`
directives, then the actual path does not matter and may be left as the
default value.
"""
input_content: Optional[bytes] = None
"""Specifies the content of `.input_path`.
If unspecified, the content is read from filesystem.
"""
compiler_flags: List[str] = dataclasses.field(default_factory=list)
"""List of compiler flags to pass to Clang."""
verbose: bool = False
"""Parse in verbose mode."""
include_directory_map: Dict[str, str] = dataclasses.field(default_factory=dict)
"""Maps actual include directories to a displayed directory name.
The keys should be prefixes of paths specified in error messages/source
locations identified by clang.
The values should be the corresponding prefix to use in the documented
:cpp:`#include` paths.
"""
allow_paths: List[Pattern] = dataclasses.field(
default_factory=lambda: [re.compile("")]
)
"""List of regular expressions matching *allowed* paths.
Only entities defined in files that match `.allow_paths`, and don't match
`.disallow_paths`, are documented. By default all entities are documented,
but this default is not normally usable, because it will include entities
defined in the standard library and third-party libraries.
.. important::
When building on Windows, all path separators are normalized to :python:`"/"`.
Therefore, in the specified regular expressions, always use :python:`"/"` to
match a path separator.
"""
disallow_paths: List[Pattern] = dataclasses.field(default_factory=list)
"""List of regular expressions matching *disallowed* paths.
Entities defined in files matching any of these patterns are not documented.
.. important::
When building on Windows, all path separators are normalized to :python:`"/"`.
Therefore, in the specified regular expressions, always use :python:`"/"` to
match a path separator.
"""
disallow_namespaces: List[Pattern] = dataclasses.field(default_factory=list)
"""List of regular expressions matching *disallowed* namespaces.
Entities defined in namespaces matching any of the specified patterns are
not documented.
"""
allow_symbols: List[Pattern] = dataclasses.field(
default_factory=lambda: [re.compile("")]
)
"""List of regular expressions matching *allowed* symbols.
Only symbols matching one of the `.allow_symbols` patterns, and not matching
`.disallow_symbols`, are documented. By default, all symbols are allowed.
"""
disallow_symbols: List[Pattern] = dataclasses.field(default_factory=list)
"""List of regular expressions matching *disallowed* symbols.
Symbols matching any of these patterns are undocumented.
"""
allow_macros: List[Pattern] = dataclasses.field(
default_factory=lambda: [re.compile("")]
)
"""List of regular expressions matching *allowed* macros.
Only macros names matching `.allow_macros`, and not matching
`.disallow_macros`, are documented.
"""
disallow_macros: List[Pattern] = dataclasses.field(default_factory=list)
"""List of regular expressions matching *disallowed* macro names.
Macros matching any of these patterns are undocumented.
"""
ignore_diagnostics: List[Pattern] = dataclasses.field(default_factory=list)
"""List of regular expressions matching diagnostics to ignore.
Diagnostics matching any of these patterns are ignored.
"""
template_parameter_enable_if_patterns: List[Pattern] = dataclasses.field(
default_factory=lambda: [
TEMPLATE_PARAMETER_ENABLE_IF_TYPE_PATTERN,
TEMPLATE_PARAMETER_ENABLE_IF_NON_TYPE_PATTERN,
]
)
type_replacements: Dict[str, str] = dataclasses.field(default_factory=dict)
hide_types: List[Pattern] = dataclasses.field(default_factory=list)
"""List of regular expressions matching *hidden* types.
Matching return types are replaced with :cpp:`auto`, and matching
initializers are elided.
"""
ignore_template_parameters: List[Pattern] = dataclasses.field(default_factory=list)
"""List of regular expressions matching *ignored* template parameters.
Template parameters with a declaration matching any of these patterns are
excluded from the generated documentation.
"""
hide_initializers: List[Pattern] = dataclasses.field(
default_factory=lambda: [re.compile(r"^=\s*(?:(true|false)\s*$|\[)")]
)
"""List of regular expressions matching initializers to elide.
Any matching initializer expression is elided from the generated
documentation.
"""
# Derived from `allow_paths`.
allow_path_pattern: Pattern = dataclasses.field(init=False)
# Derived from `disallow_paths`.
disallow_path_pattern: Pattern = dataclasses.field(init=False)
# Derived from `allow_symbols`.
allow_symbols_pattern: Pattern = dataclasses.field(init=False)
# Derived from `disallow_symbols`.
disallow_symbols_pattern: Pattern = dataclasses.field(init=False)
# Derived from `allow_macros`.
allow_macros_pattern: Pattern = dataclasses.field(init=False)
# Derived from `disallow_macros`.
disallow_macros_pattern: Pattern = dataclasses.field(init=False)
# Derived from `ignore_diagnostics`.
ignore_diagnostics_pattern: Pattern = dataclasses.field(init=False)
# Derived from `hide_types`.
hide_types_pattern: Pattern = dataclasses.field(init=False)
# Derived from `type_replacements`.
type_replacements_pattern: Pattern = dataclasses.field(init=False)
# Derived from `ignore_template_parameters`.
ignore_template_parameters_pattern: Pattern = dataclasses.field(init=False)
# Derived from `hide_initializers`.
hide_initializers_pattern: Pattern = dataclasses.field(init=False)
include_directory_map_pattern: Pattern = dataclasses.field(init=False)
disallow_namespaces_pattern: Pattern = dataclasses.field(init=False)
def __post_init__(self):
self.allow_path_pattern = _combine_regexp_list(self.allow_paths) # type: ignore[misc]
self.disallow_path_pattern = _combine_regexp_list(self.disallow_paths) # type: ignore[misc]
self.allow_path_pattern = _combine_regexp_list(self.allow_paths) # type: ignore[misc]
self.disallow_namespaces_pattern = _combine_regexp_list(
self.disallow_namespaces
)
self.allow_symbols_pattern = _combine_regexp_list(self.allow_symbols) # type: ignore[misc]
self.disallow_symbols_pattern = _combine_regexp_list(self.disallow_symbols) # type: ignore[misc]
self.allow_macros_pattern = _combine_regexp_list(self.allow_macros) # type: ignore[misc]
self.disallow_macros_pattern = _combine_regexp_list(self.disallow_macros) # type: ignore[misc]
self.ignore_diagnostics_pattern = _combine_regexp_list(self.ignore_diagnostics) # type: ignore[misc]
self.hide_types_pattern = _combine_regexp_list(self.hide_types) # type: ignore[misc]
self.type_replacements_pattern = _make_replacement_pattern( # type: ignore[misc]
list(self.type_replacements.keys()), prefix=r"\b", suffix=r"\b"
)
self.ignore_template_parameters_pattern = _combine_regexp_list( # type: ignore[misc]
self.ignore_template_parameters
)
self.hide_initializers_pattern = _combine_regexp_list(self.hide_initializers) # type: ignore[misc]
if os.name == "nt":
self.normalized_include_directory_map = { # type: ignore[misc]
key.replace("\\", "/"): value
for key, value in self.include_directory_map.items()
}
else:
self.normalized_include_directory_map = self.include_directory_map # type: ignore[misc]
self.include_directory_map_pattern = _make_replacement_pattern( # type: ignore[misc]
list(self.normalized_include_directory_map.keys()), prefix="^", suffix=""
)
self.cached_mapped_include_directories = {} # type: ignore[misc]
normalized_include_directory_map: Dict[str, str] = dataclasses.field(init=False)
cached_mapped_include_directories: Dict[str, str] = dataclasses.field(init=False)
def map_include_path(self, path: str) -> str:
mapped = self.cached_mapped_include_directories.get(path)
if mapped is not None:
return mapped
if os.name == "nt":
path = path.replace("\\", "/")
if path.startswith("./"):
path = path[2:]
new_mapped = self.include_directory_map_pattern.sub(
lambda m: self.normalized_include_directory_map[m.group(0)], path
)
self.cached_mapped_include_directories[path] = new_mapped
return new_mapped
EntityId = str
EntityKind = Literal[
"class",
"conversion_function",
"function",
"method",
"constructor",
"var",
"alias",
"enum",
]
FunctionEntityKind = Literal[
"conversion_function", "function", "method", "constructor", "destructor"
]
ClassKeyword = Literal["class", "struct"]
class JsonLocation(TypedDict):
file: str
line: int
col: int
class JsonDocComment(TypedDict):
text: str
location: JsonLocation
TemplateParameterKind = Literal["type", "template", "non_type"]
class TemplateParameter(TypedDict):
declaration: str
name: str
kind: TemplateParameterKind
pack: bool
class CppApiEntityBase(TypedDict, total=False):
id: EntityId
parent: NotRequired[EntityId]
scope: NotRequired[str]
doc: NotRequired[Optional[JsonDocComment]]
document_with: NotRequired[EntityId]
siblings: NotRequired[List[EntityId]]
name: str
template_parameters: NotRequired[Optional[List[TemplateParameter]]]
location: JsonLocation
special_id: NotRequired[Optional[str]]
page_name: str
requires: Optional[List[str]]
specializes: Union[None, EntityId, Literal[True]]
related_members: Dict[str, List[EntityId]]
related_nonmembers: Dict[str, List[EntityId]]
special_membergroup: str
special_ingroup: str
special_relates: str
document_prefix: str
nonitpick: List[str]
class FunctionEntity(CppApiEntityBase):
kind: FunctionEntityKind
arity: int
name_substitute: str
friend: bool
declaration: str
class BaseClass(TypedDict):
type: str
access: str
class ClassEntity(CppApiEntityBase):
kind: Literal["class"]
keyword: ClassKeyword
prefix: List[str]
bases: List[BaseClass]
class VarEntity(CppApiEntityBase):
kind: Literal["var"]
declaration: str
name_substitute: str
initializer: Optional[str]
class TypeAliasEntity(CppApiEntityBase):
kind: Literal["alias"]
underlying_type: Optional[str]
class MacroEntity(CppApiEntityBase):
kind: Literal["macro"]
parameters: Optional[List[str]]
class EnumeratorEntity(TypedDict):
kind: Literal["enumerator"]
id: EntityId
name: str
decl: str
doc: Optional[JsonDocComment]
location: JsonLocation
class EnumEntity(CppApiEntityBase):
kind: Literal["enum"]
keyword: Optional[ClassKeyword]
enumerators: List[EnumeratorEntity]
CppApiEntity = Union[
ClassEntity, FunctionEntity, VarEntity, TypeAliasEntity, MacroEntity, EnumEntity
]
def json_location_to_string(location: Optional[JsonLocation]) -> Optional[str]:
if location is None:
return None
return "%s:%s:%s" % (location["file"], location["line"], location["col"])
def get_entity_id(cursor: Cursor) -> EntityId:
# USR workarounds from:
# https://github.com/foonathan/cppast/blob/e558e2d58f519e3a83af770d460672b1d4ba2886/src/libclang/parse_functions.cpp#L13
usr = cursor.get_usr()
if cursor.kind in (CursorKind.FUNCTION_TEMPLATE, CursorKind.CONVERSION_FUNCTION):
# Combine return type with USR to prevent collisions
return f"{usr} {cursor.result_type.spelling}"
if cursor.kind == CursorKind.CLASS_TEMPLATE_PARTIAL_SPECIALIZATION:
# libclang issue: templ<T()> vs templ<T() &>
# but identical USR
# same workaround: combine display name with usr
# (and hope this prevents all collisions...)
return f"{usr} {cursor.displayname}"
return usr
def _substitute_internal_type_names(config: Config, decl: str) -> str:
return config.type_replacements_pattern.sub(
lambda m: config.type_replacements[m.group(0)], decl
)
def get_previous_line_location(tu, location: SourceLocation):
f = location.file
line = location.line
return SourceLocation.from_position(tu, location.file, line - 1, 1)
def get_presumed_location(location: SourceLocation) -> typing.Tuple[str, int, int]:
f, l, c = clang.cindex._CXString(), ctypes.c_uint(), ctypes.c_uint()
clang.cindex.conf.lib.clang_getPresumedLocation(
location, ctypes.byref(f), ctypes.byref(l), ctypes.byref(c)
)
return (clang.cindex._CXString.from_result(f), int(l.value), int(c.value))
def _get_template_cursor_kind(cursor: Cursor) -> CursorKind:
return CursorKind.from_id(clang.cindex.conf.lib.clang_getTemplateCursorKind(cursor))
def _get_specialized_cursor_template(cursor: Cursor) -> typing.Optional[Cursor]:
return clang.cindex.conf.lib.clang_getSpecializedCursorTemplate(cursor)
def _is_doc_comment(token: Token):
return token.spelling.startswith("///")
def _get_full_nested_name(cursor: typing.Optional[Cursor]) -> str:
if cursor is None:
return ""
ancestors = []
while True:
if cursor.kind == CursorKind.TRANSLATION_UNIT:
break
if cursor.kind == CursorKind.NAMESPACE:
name = cursor.spelling
else:
name = cursor.displayname
ancestors.append(name + "::")
cursor = cursor.semantic_parent
ancestors.reverse()
return "".join(ancestors)
CLASS_KINDS = (
CursorKind.STRUCT_DECL,
CursorKind.CLASS_DECL,
CursorKind.CLASS_TEMPLATE,
CursorKind.CLASS_TEMPLATE_PARTIAL_SPECIALIZATION,
)
def _get_all_decls(config: Config, cursor: Cursor, allow_file):
NAMESPACE = CursorKind.NAMESPACE
for child in cursor.get_children():
location = child.location
if location.file is None:
continue
kind = child.kind
if kind == NAMESPACE:
if (
not allow_file or allow_file(get_presumed_location(location)[0])
) and not config.disallow_namespaces_pattern.match(child.spelling):
yield from _get_all_decls(config, child, None)
continue
if kind not in ALLOWED_KINDS:
continue
if allow_file and not allow_file(get_presumed_location(location)[0]):
continue
if child.kind == CursorKind.MACRO_DEFINITION:
yield child
continue
yield child
if kind in CLASS_KINDS:
yield from _get_all_decls(config, child, None)
def split_doc_comment_into_lines(cmt: str) -> List[str]:
"""Strip the raw string of an object's comment into lines.
:param cmt: the comment to parse.
:returns: A list of the lines without the surrounding C++ comment syntax.
"""
# split into a list of lines & account for CRLF and LF line endings
body = [line.rstrip("\r") for line in cmt.splitlines()]
# strip all the comment syntax out
if body[0].startswith("//"):
body = [line.lstrip("/").lstrip("!").lstrip("<") for line in body]
elif body[0].startswith("/*"):
body[0] = body[0].lstrip("/").lstrip("*").lstrip("!")
multi_lined_asterisk = True # works also for single-line comments blocks
if len(body) > 1:
line_has_asterisk = [line.startswith("*") for line in body[1:]]
multi_lined_asterisk = line_has_asterisk.count(True) == len(body) - 1
body = [
(line.lstrip("*").lstrip("<") if multi_lined_asterisk else line)
for line in body
]
body[-1] = body[-1].rstrip("*/").rstrip()
body = dedent("\n".join(body)).splitlines()
return [""] if not body else body
NON_DOC_COMMENT = re.compile(
r"(^//[^/\!].*$\n)|(^/\*[^\*\!](?:.|\n)*?\*/$\n)", re.MULTILINE
)
def get_doc_comment(config: Config, cursor: Cursor) -> Optional[JsonDocComment]:
translation_unit = cursor.translation_unit
for token in cursor.get_tokens():
location = token.location
break
else:
location = cursor.location
f = location.file
line = location.line
end_location = SourceLocation.from_position(translation_unit, f, line, 1)
comment = cursor.raw_comment
if not comment:
return None
comment_lines = []
# The first line is never indented (in `raw_comment` form).
# Clang doesn't strip indentation from subsequent lines in an indented block.
# So, dedent all subsequent lines only
first_line_end = comment.find("\n")
comment = comment[:first_line_end] + dedent(comment[first_line_end:])
# remove any non-docstring comments
match = NON_DOC_COMMENT.search(comment)
while match is not None:
# strip comment syntax from the block before non-doc comment
comment_lines.extend(split_doc_comment_into_lines(comment[: match.start()]))
# Append blank lines as replacement of non-doc comment.
# This should retain the src's line numbers
comment_lines.extend(["\n"] * match.group(0).count("\n"))
comment = comment[match.end() :]
match = NON_DOC_COMMENT.search(comment)
if comment: # strip comment from any block that remains after non-doc comment
comment_lines.extend(split_doc_comment_into_lines(comment))
return {
"text": "\n".join(comment_lines),
"location": _get_location_json(config, end_location),
}
class Extractor:
def __init__(self, config: Config):
self.config = config
input_path = config.input_path
input_content = config.input_content
if input_content is None:
input_content = pathlib.Path(input_path).read_bytes()
input_content = re.sub(
b"#pragma clang module", b"//#pragma clang module", input_content
)
self.input_source = input_content
self.index = clang.cindex.Index.create()
start_time = time.time()
self.tu = self.index.parse(
input_path,
unsaved_files=[(input_path, input_content)],
args=tuple(config.compiler_flags) + ("-ferror-limit=0",),
options=( # TranslationUnit.PARSE_SKIP_FUNCTION_BODIES +
TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD
),
)
end_time = time.time()
if config.verbose:
logger.info("Parsed C++ input in %.5f seconds", end_time - start_time)
for diag in self.tu.diagnostics:
if config.ignore_diagnostics_pattern.search(diag.spelling):
if config.verbose:
logger.info(
diag.spelling,
location=_get_location_string(config, diag.location),
)
continue
logger.error(
diag.spelling, location=_get_location_string(config, diag.location)
)
def _allow_file(path: str) -> bool:
path = config.map_include_path(path)
if not config.allow_path_pattern.search(path):
return False
if config.disallow_path_pattern.search(path):
return False
return True
self.decls = list(
_get_all_decls(
config, self.tu.cursor, functools.lru_cache(maxsize=None)(_allow_file)
)
)
EXCLUDED_COMPILER_FLAGS = frozenset(
[
"-Xclang=-disable-noundef-analysis",
]
)
def _transform_type_alias_decl(config: Config, decl: Cursor):
underlying_type: Optional[str] = _substitute_internal_type_names(
config, decl.underlying_typedef_type.spelling
)
assert underlying_type is not None
if config.hide_types_pattern.search(underlying_type):
underlying_type = None
return {
"kind": "alias",
"name": decl.spelling,
"underlying_type": underlying_type,
}
def get_extent_spelling(translation_unit: TranslationUnit, extent: SourceRange) -> str:
"""Returns the C++ source representation for the specified extent.
Comments are excluded and for simplicity all tokens are separated by
whitespace. This results in excessive whitespace, but that does not matter
because this is intended to be parsed by the Sphinx cpp domain anyway.
"""
def get_spellings():
prev_token = None
COMMENT = TokenKind.COMMENT
spellings = []
for token in translation_unit.get_tokens(extent=extent):
if prev_token is not None:
yield prev_token.spelling
prev_token = None
if token.kind == COMMENT:
continue
prev_token = token
# We need to handle the last token specially, because clang sometimes parses
# ">>" as a single token but the extent may cover only the first of the two
# angle brackets.
if prev_token is not None:
spelling = prev_token.spelling
token_end = cast(SourceLocation, prev_token.extent.end)
offset_diff = token_end.offset - cast(SourceLocation, extent.end).offset
if offset_diff != 0:
yield spelling[:-offset_diff]
else:
yield spelling
return " ".join(get_spellings())
def get_related_comments(decl: Cursor):
# casts below are workaround for: https://github.com/tgockel/types-clang/pull/2
start = cast(SourceLocation, decl.extent.start)
end = cast(SourceLocation, decl.extent.end)
# Move forward one line to avoid skipping any comments on the last line
tu = decl.translation_unit
end = SourceLocation.from_position(tu, end.file, end.line + 1, 1)
COMMENT = TokenKind.COMMENT
for token in tu.get_tokens(
extent=SourceRange.from_locations(cast(int, start), cast(int, end))
):
if token.kind != COMMENT:
continue
yield token
NONITPICK_PATTERN = re.compile(r"//\s*NONITPICK:\s*(.*[^\s])\s*")
def get_nonitpick_directives(decl: Cursor) -> List[str]:
directives = []
for comment in get_related_comments(decl):
text = comment.spelling
m = NONITPICK_PATTERN.match(text)
if m is None:
continue
directives.append(m.group(1))
return directives
TEMPLATE_CURSOR_KINDS = frozenset(
[
CursorKind.FUNCTION_TEMPLATE,
CursorKind.CLASS_TEMPLATE,
CursorKind.CLASS_TEMPLATE_PARTIAL_SPECIALIZATION,
CursorKind.TYPE_ALIAS_TEMPLATE_DECL,
]
)
TEMPLATE_PARAMETER_KIND_TO_JSON_KIND = {
CursorKind.TEMPLATE_TYPE_PARAMETER: "type",
CursorKind.TEMPLATE_NON_TYPE_PARAMETER: "non_type",
CursorKind.TEMPLATE_TEMPLATE_PARAMETER: "template",
}
def _clang_template_parameter_to_json(config: Config, decl: Cursor):
param_decl_str = get_extent_spelling(decl.translation_unit, decl.extent)
param = _parse_template_parameter(param_decl_str)
if param is None:
return {
"declaration": param_decl_str,
"name": decl.spelling,
"kind": TEMPLATE_PARAMETER_KIND_TO_JSON_KIND[decl.kind],
# Heuristic to determine if it is a pack.
"pack": "..." in param_decl_str,
}
return _sphinx_ast_template_parameter_to_json(config, param)
def _get_template_parameters(config: Config, decl: Cursor):
if decl.kind not in TEMPLATE_CURSOR_KINDS:
return None
result = []
for child in decl.get_children():
if child.kind not in (
CursorKind.TEMPLATE_TYPE_PARAMETER,
CursorKind.TEMPLATE_NON_TYPE_PARAMETER,
CursorKind.TEMPLATE_TEMPLATE_PARAMETER,
):
continue
result.append(_clang_template_parameter_to_json(config, child))
return result
def _get_non_template_kind(cursor: Cursor):
kind = cursor.kind
if kind not in TEMPLATE_CURSOR_KINDS:
return kind
return _get_template_cursor_kind(cursor)
def _transform_type_alias_template_decl(
config: Config, decl: Cursor
) -> TypeAliasEntity:
underlying_type: Optional[str]
for child in decl.get_children():
if child.kind == CursorKind.TYPE_ALIAS_DECL:
underlying_type = _substitute_internal_type_names(
config, child.underlying_typedef_type.spelling
)
break
else:
raise ValueError("Could not determine underlying type")
requires = []
if re.search(r"^\s*std\s*::\s*enable_if_t\s*<", underlying_type) is not None:
presumed_file, presumed_line, _ = get_presumed_location(decl.location)
parser = sphinx.domains.cpp.DefinitionParser(
underlying_type,
location=(presumed_file, presumed_line),
config=cast(sphinx.config.Config, SphinxConfig()),
)
ast = parser._parse_type(False)
parser.skip_ws()
parser.assert_end()
assert isinstance(ast, sphinx.domains.cpp.ASTType)
requires_expr = _extract_requires_from_enable_if_t_type(config, ast)
if requires_expr is not None:
requires.append(requires_expr)
underlying_type = str(ast)
if config.hide_types_pattern.search(underlying_type) is not None:
underlying_type = None
return {
"kind": "alias",
"name": decl.spelling,
"underlying_type": underlying_type,
"requires": requires,
}
def _get_class_keyword(kind: CursorKind) -> ClassKeyword:
return "class" if kind == CursorKind.CLASS_DECL else "struct"
def _get_bases(config: Config, decl: Cursor):
for child in decl.get_children():
if child.kind != CursorKind.CXX_BASE_SPECIFIER:
continue
type_spelling = _substitute_internal_type_names(config, child.type.spelling)
if config.hide_types_pattern.search(type_spelling) is not None:
continue
yield {"type": type_spelling, "access": child.access_specifier.name.lower()}
def _transform_class_decl(config: Config, decl: Cursor) -> ClassEntity:
obj: ClassEntity = {
"kind": "class",
"keyword": _get_class_keyword(decl.kind),
"name": decl.displayname,
"prefix": _parse_declaration_prefix(decl, is_class=True),
"bases": list(_get_bases(config, decl)),
}
specializes = _get_specialized_cursor_template(decl)
if specializes:
obj["specializes"] = get_entity_id(specializes)
return obj
def _transform_class_template_decl(config: Config, decl: Cursor) -> ClassEntity:
return {
"kind": "class",
"keyword": _get_class_keyword(_get_template_cursor_kind(decl)),
"name": decl.spelling,
"prefix": _parse_declaration_prefix(decl, is_class=True),
"bases": list(_get_bases(config, decl)),
}
def _transform_class_template_partial_specialization_decl(
config: Config, decl: Cursor
) -> ClassEntity:
return {
"kind": "class",
"keyword": _get_class_keyword(_get_template_cursor_kind(decl)),
"name": decl.displayname,
"specializes": get_entity_id(
cast(Cursor, _get_specialized_cursor_template(decl))
),
"prefix": _parse_declaration_prefix(decl, is_class=True),
"bases": list(_get_bases(config, decl)),
}
def _get_function_parameters(decl: Cursor):
if decl.kind == CursorKind.FUNCTION_DECL:
yield from decl.get_arguments()
return
for child in decl.get_children():
if child.kind != CursorKind.PARM_DECL:
continue
yield child
FUNCTION_CURSOR_KIND_TO_JSON_KIND = {
CursorKind.FUNCTION_DECL: "function",
CursorKind.CXX_METHOD: "method",
CursorKind.CONSTRUCTOR: "constructor",
CursorKind.DESTRUCTOR: "destructor",
CursorKind.CONVERSION_FUNCTION: "conversion_function",
}
def _parse_declaration_prefix(decl: Cursor, is_class: bool) -> typing.List[str]:
decl_extent = decl.extent
start_location = decl_extent.start
end_location = None
prefix_parts = []
for child in decl.get_children():
# Skip template introduction
if child.kind in (
CursorKind.TEMPLATE_TYPE_PARAMETER,
CursorKind.TEMPLATE_NON_TYPE_PARAMETER,
CursorKind.TEMPLATE_TEMPLATE_PARAMETER,
):
start_location = child.extent.end
continue
if child.kind.is_attribute():
attr_spelling = get_extent_spelling(decl.translation_unit, child.extent)
prefix_parts.append(f"[[{attr_spelling}]]")
continue
end_location = child.extent.start
break
if not is_class:
for token in decl.translation_unit.get_tokens(
extent=SourceRange.from_locations(
start_location, end_location or decl_extent.end
)
):
# skip `inline` since that is not an important part of the API
if token.spelling in ("explicit", "constexpr"):
prefix_parts.append(token.spelling)
return prefix_parts
def _get_declaration_spelling(decl: Cursor) -> str:
decl_extent = decl.extent
start_location = decl_extent.start
end_location = None
for child in decl.get_children():
if child.kind.is_statement():
end_location = child.extent.start
break
else:
end_location = decl_extent.end
return get_extent_spelling(
decl.translation_unit,
extent=SourceRange.from_locations(start_location, end_location),
)
def _transform_function_decl(config: Config, decl: Cursor):
name = decl.spelling
if name.startswith("<deduction guide for "):
# Exclude deduction guides for now
return None
non_template_kind = decl.kind
if decl.kind == CursorKind.FUNCTION_TEMPLATE:
non_template_kind = _get_template_cursor_kind(decl)
specializes = _get_specialized_cursor_template(decl)
if non_template_kind == CursorKind.CONSTRUCTOR:
# TODO: handle = default, = delete
first_bracket = name.find("<")
if first_bracket != -1:
name = name[:first_bracket]
prefix = _parse_declaration_prefix(decl, is_class=False)
if decl.storage_class == clang.cindex.StorageClass.STATIC:
prefix.insert(0, "static")
source_code = _get_declaration_spelling(decl)
name_substitute = _pick_name_substitute(source_code)
decl_string = (
"".join(x + " " for x in prefix)
+ name_substitute
+ "("
+ ", ".join(
get_extent_spelling(decl.translation_unit, arg.extent)
for arg in _get_function_parameters(decl)
)
+ ")"
)
requires_expr = None
else:
(
decl_string,
bare_name,
template_args,
name_substitute,
requires_expr,
) = _parse_function(config, decl)
name = bare_name
if specializes and template_args is not None:
name += template_args
arity = sum(x.kind == CursorKind.PARM_DECL for x in decl.get_children())
obj = {
"kind": FUNCTION_CURSOR_KIND_TO_JSON_KIND[non_template_kind],
"name": name,
"arity": arity,
"declaration": decl_string,
"name_substitute": name_substitute,
"requires": [requires_expr] if requires_expr else None,
}
if specializes:
obj["specializes"] = get_entity_id(specializes)
return obj
def _transform_enum_decl(config: Config, decl: Cursor) -> EnumEntity:
keyword = None
tokens = list(decl.get_tokens())
assert len(tokens) >= 2
assert tokens[0].spelling == "enum"
token1_spelling = tokens[1].spelling
if token1_spelling in ("class", "struct"):
keyword = cast(ClassKeyword, token1_spelling)
name = decl.spelling
enumerators: List[EnumeratorEntity] = []
for child in decl.get_children():
if child.kind != CursorKind.ENUM_CONSTANT_DECL:
continue
enumerators.append(
{
"kind": "enumerator",
"id": get_entity_id(child),
"name": child.spelling,
"decl": get_extent_spelling(decl.translation_unit, child.extent),
"doc": get_doc_comment(config, child),
"location": _get_location_json(config, child.location),
}
)
return {
"kind": "enum",
"keyword": keyword,
"name": decl.spelling,
"enumerators": enumerators,
}
def _pick_name_substitute(code: str) -> str:
i = 0
while True:
substitute = f"__x{i}"
if substitute not in code:
return substitute
i += 1
def _transform_var_decl(config: Config, decl: Cursor) -> VarEntity:
exprs = [x for x in decl.get_children() if x.kind.is_expression()]
presumed_filename, presumed_line, _ = get_presumed_location(decl.location)
if len(exprs) > 1:
raise ValueError(
"%s:%d: Expected VAR decl to have at most one expression as a child: %r, but has: %d"
% (presumed_filename, presumed_line, decl.spelling, len(exprs))
)
prefix = _parse_declaration_prefix(decl, is_class=False)
type_spelling = decl.type.spelling
if "(lambda at " in type_spelling:
type_spelling = "auto"
name_substitute = _pick_name_substitute(type_spelling)
initializer = None
if len(exprs) == 1:
initializer = "= " + get_extent_spelling(
decl.translation_unit, exprs[0].extent
).rstrip(";")
if _is_internal_initializer(config, initializer):
initializer = None
declaration = " ".join(prefix) + " " + type_spelling + " " + name_substitute
return {
"kind": "var",
"name": decl.spelling,
"declaration": declaration,
"name_substitute": name_substitute,
"initializer": initializer,
}
class SphinxConfig:
cpp_id_attributes: Any = []
cpp_paren_attributes: Any = []
def _parse_name(name: str, template_prefix: str) -> sphinx.domains.cpp.ASTNestedName:
parser = sphinx.domains.cpp.DefinitionParser(
f"{template_prefix} int {name}",
location=("", 0),
config=cast(sphinx.config.Config, SphinxConfig()),
)
ast = parser.parse_declaration("member", "member")
parser.skip_ws()
parser.assert_end(allowSemicolon=True)
return ast.name
def _substitute_name(
top_ast: sphinx.domains.cpp.ASTDeclaration,
ast: sphinx.domains.cpp.ASTType,
source_code: str,
) -> str:
name_substitute = _pick_name_substitute(source_code)
template_args = ast.name.names[-1].templateArgs
name_substitute_with_args = name_substitute
if template_args is not None:
name_substitute_with_args += str(template_args)
template_prefix = ""
if top_ast.templatePrefix is not None:
template_prefix = str(top_ast.templatePrefix.templates[-1])
ast.name = _parse_name(name_substitute_with_args, template_prefix=template_prefix)
return name_substitute_with_args
def _maybe_wrap_requires_expr_in_parentheses(expr: str) -> str:
parser = sphinx.domains.cpp.DefinitionParser(
"requires " + expr,
location=("", 0),
config=cast(sphinx.config.Config, SphinxConfig()),
)
try:
parser._parse_requires_clause()
parser.skip_ws()
parser.assert_end()
return expr
except: # pylint: disable=bare-except
return f"({expr})"
def _extract_requires_from_enable_if_t_type(
config: Config, ast: sphinx.domains.cpp.ASTType
) -> typing.Optional[str]:
if not isinstance(
ast.declSpecs.trailingTypeSpec, sphinx.domains.cpp.ASTTrailingTypeSpecName
):
return None
decl_specs = ast.declSpecs
trailing_type_spec = decl_specs.trailingTypeSpec
if not str(trailing_type_spec).startswith("std::enable_if_t<"):
return None
template_args = trailing_type_spec.name.names[1].templateArgs.args # type: ignore[attr-defined]
requires_expr = str(template_args[0])
if len(template_args) == 2:
result_type = str(template_args[1])
else:
result_type = "void"
parser = sphinx.domains.cpp.DefinitionParser(
result_type, location=("", 0), config=cast(sphinx.config.Config, SphinxConfig())
)
new_ast = parser._parse_type(False)
parser.skip_ws()
parser.assert_end()
new_decl_specs = new_ast.declSpecs
def copy_qualifiers(
orig_d: sphinx.domains.cpp.ASTDeclarator,
new_d: sphinx.domains.cpp.ASTDeclarator,
):
if isinstance(new_d, sphinx.domains.cpp.ASTDeclaratorRef):
return sphinx.domains.cpp.ASTDeclaratorRef(
next=copy_qualifiers(orig_d, new_d.next), attrs=new_d.attrs
)
if isinstance(new_d, sphinx.domains.cpp.ASTDeclaratorPtr):
return sphinx.domains.cpp.ASTDeclaratorPtr(
next=copy_qualifiers(orig_d, new_d.next),
volatile=new_d.volatile,
const=new_d.const,
attrs=new_d.attrs,
)
return orig_d
ast.decl = copy_qualifiers(ast.decl, new_ast.decl)
decl_specs.trailingTypeSpec = new_decl_specs.trailingTypeSpec
decl_specs.leftSpecs.const = (
decl_specs.leftSpecs.const or new_decl_specs.leftSpecs.const
)
decl_specs.leftSpecs.volatile = (
decl_specs.leftSpecs.volatile or new_decl_specs.leftSpecs.volatile
)
decl_specs.rightSpecs.const = (
decl_specs.rightSpecs.const or new_decl_specs.rightSpecs.const
)
decl_specs.rightSpecs.volatile = (
decl_specs.rightSpecs.volatile or new_decl_specs.rightSpecs.volatile
)
return _substitute_internal_type_names(config, requires_expr)
_FUNCTION_NAME_REPLACEMENTS = {
"operator[ ]": "operator[]",
"operator( )": "operator()",
}
def _parse_function(config: Config, decl: Cursor):
presumed_file, presumed_line, _ = get_presumed_location(decl.location)
source_code = _get_declaration_spelling(decl)
parser = sphinx.domains.cpp.DefinitionParser(
source_code,
location=(presumed_file, presumed_line),
config=cast(sphinx.config.Config, SphinxConfig()),
)
ast = parser.parse_declaration("function", "function")
parser.skip_ws()
parser.assert_end(allowSemicolon=True)
assert isinstance(ast.declaration, sphinx.domains.cpp.ASTType)
requires_expr = _extract_requires_from_enable_if_t_type(config, ast.declaration)
last_name_element = ast.declaration.name.names[-1]
bare_name = str(last_name_element.identOrOp)
bare_name = _FUNCTION_NAME_REPLACEMENTS.get(bare_name, bare_name)
template_args = last_name_element.templateArgs
template_args_str = str(template_args) if template_args is not None else None
name_substitute = _substitute_name(ast, ast.declaration, source_code)
# Exclude `inline` specifier since it isn't really informative in API
# documentation.
ast.declaration.declSpecs.leftSpecs.inline = False
decl_string = _substitute_internal_type_names(config, str(ast.declaration))
return decl_string, bare_name, template_args_str, name_substitute, requires_expr
def _is_internal_initializer(config: Config, initializer: str) -> bool:
return (
config.hide_initializers_pattern.search(initializer) is not None
or config.hide_types_pattern.search(initializer) is not None
)
def _sphinx_ast_template_parameter_to_json(
config: Config, param: sphinx.domains.cpp.ASTTemplateParam
) -> TemplateParameter:
if isinstance(param, sphinx.domains.cpp.ASTTemplateParamType):
kind = "type"
elif isinstance(param, sphinx.domains.cpp.ASTTemplateParamTemplateType):
kind = "template"
else:
kind = "non_type"
return {
"declaration": _substitute_internal_type_names(config, str(param)),
"name": str(param.get_identifier()),
"kind": cast(TemplateParameterKind, kind),
"pack": param.isPack, # type: ignore[attr-defined]
}
def _transform_unexposed_decl(config: Config, decl: Cursor) -> Optional[VarEntity]:
# libclang unfortunately does not support variable templates; they are only
# exposed as an unexposed decl.
source_code = get_extent_spelling(decl.translation_unit, decl.extent)
# Note: Since `source_code` is reconstructed from the tokens, we don't need to
# worry about inconsistency in spacing.
if not source_code.startswith("template <"):
return None
# Assume that it is a variable template
# Attempt to parse it via sphinx's c++ domain parser
presumed_file, presumed_line, _ = get_presumed_location(decl.location)
try:
parser = sphinx.domains.cpp.DefinitionParser(
source_code,
location=(presumed_file, presumed_line),
config=cast(sphinx.config.Config, SphinxConfig()),
)
ast = parser.parse_declaration("member", "member")
parser.skip_ws()
parser.assert_end(allowSemicolon=True)
declaration = cast(
Union[
sphinx.domains.cpp.ASTTypeWithInit,
sphinx.domains.cpp.ASTTemplateParamConstrainedTypeWithInit,
],
ast.declaration,
)
template_args = declaration.type.name.names[-1].templateArgs
name = str(declaration.type.name.names[-1])
name_substitute = _substitute_name(ast, declaration.type, source_code)
decl_string = _substitute_internal_type_names(config, str(declaration.type))
decl_string = re.sub("(^| )inline ", " ", decl_string)
initializer: Optional[str] = _substitute_internal_type_names(
config, str(declaration.init).strip().rstrip(";").strip()
)
assert initializer is not None
if _is_internal_initializer(config, initializer):
initializer = None
obj: VarEntity = {
"kind": "var",
"name": name,
"template_parameters": [
_sphinx_ast_template_parameter_to_json(
config, cast(sphinx.domains.cpp.ASTTemplateParam, t)
)
for t in cast(
sphinx.domains.cpp.ASTTemplateDeclarationPrefix, ast.templatePrefix
)
.templates[-1]
.params
],
"declaration": decl_string,
"name_substitute": name_substitute,
"initializer": initializer,
}
if template_args is not None:
obj["specializes"] = True
return obj
except Exception as e:
raise ValueError("Failed to parse unexposed") from e
return None
def _parse_macro_parameters(decl: Cursor) -> typing.Optional[typing.List[str]]:
# Check if the macro is a function-like macro
# `cast` below is workaround for: https://github.com/tgockel/types-clang/pull/2
token_iterator = cast(typing.Iterator[Token], decl.get_tokens())
first_token = next(token_iterator)
assert first_token.spelling == decl.spelling
def_start_offset = cast(SourceLocation, first_token.extent.end).offset
try:
next_token = next(token_iterator)
except StopIteration:
return None
if next_token.spelling != "(":
return None
if next_token.location.offset != def_start_offset:
# There is a space between the macro name and the first "(", which means
# this is not a function-like macro.
return None
parameters = []
for token in token_iterator:
if token.kind == TokenKind.COMMENT:
continue
spelling = token.spelling
if spelling == ")":
break
if spelling == ",":
continue
parameters.append(spelling)
else:
presumed_file, presumed_line, _ = get_presumed_location(decl.location)
raise ValueError(
"%s:%d: Failed to parse macro %s"
% (presumed_file, presumed_line, decl.spelling)
)
return parameters
def _transform_macro(config: Config, decl: Cursor) -> Optional[MacroEntity]:
name = decl.spelling
if config.disallow_macros_pattern.search(name) is not None:
return None
return {
"kind": "macro",
"name": name,
"parameters": _parse_macro_parameters(decl),
}
TRANSFORMERS: Dict[CursorKind, Callable[[Config, Cursor], Optional[CppApiEntity]]] = {
CursorKind.TYPE_ALIAS_DECL: _transform_type_alias_decl,
CursorKind.TYPEDEF_DECL: _transform_type_alias_decl,
CursorKind.TYPE_ALIAS_TEMPLATE_DECL: _transform_type_alias_template_decl,
CursorKind.STRUCT_DECL: _transform_class_decl,
CursorKind.CLASS_DECL: _transform_class_decl,
CursorKind.CLASS_TEMPLATE: _transform_class_template_decl,
CursorKind.CLASS_TEMPLATE_PARTIAL_SPECIALIZATION: _transform_class_template_partial_specialization_decl,
CursorKind.FUNCTION_DECL: _transform_function_decl,
CursorKind.CXX_METHOD: _transform_function_decl,
CursorKind.CONVERSION_FUNCTION: _transform_function_decl,
CursorKind.CONSTRUCTOR: _transform_function_decl,
CursorKind.DESTRUCTOR: _transform_function_decl,
CursorKind.FUNCTION_TEMPLATE: _transform_function_decl,
CursorKind.ENUM_DECL: _transform_enum_decl,
CursorKind.VAR_DECL: _transform_var_decl,
CursorKind.FIELD_DECL: _transform_var_decl,
CursorKind.UNEXPOSED_DECL: _transform_unexposed_decl,
CursorKind.MACRO_DEFINITION: _transform_macro,
}
ALLOWED_KINDS = frozenset(list(TRANSFORMERS.keys()) + [CursorKind.FRIEND_DECL])
def _parse_args(output_required: bool):
ap = argparse.ArgumentParser()
ap.add_argument("--config", type=str, required=True)
ap.add_argument("--output", type=str, required=output_required)
return ap.parse_args()
def _merge_decl_json(existing_json, new_json):
if existing_json["doc"] and new_json["doc"]:
raise ValueError("Duplicate doc strings: %r and %r" % (existing_json, new_json))
existing_json["doc"] = existing_json["doc"] or new_json["doc"]
template_parameters = existing_json.get("template_parameters")
if template_parameters:
new_template_parameters = new_json.get("template_parameters")
for i, old_param in enumerate(template_parameters):
new_param = new_template_parameters[i]
if new_param.startswith(old_param):
template_parameters[i] = new_param
elif not old_param.startswith(new_param):
raise ValueError(
"Conflicting template parameter %d: %r and %r"
% (i, existing_json, new_json)
)
def _get_location_json(config: Config, location: SourceLocation) -> JsonLocation:
filename, line, col = get_presumed_location(location)
filename = config.map_include_path(filename)
return {"file": filename, "line": line, "col": col}
def _get_location_string(config: Config, location: SourceLocation) -> str:
filename, line, col = get_presumed_location(location)
filename = config.map_include_path(filename)
return f"{filename}:{line}:{col}"
def _is_immediately_after(decl: Cursor, prev_decl: Cursor) -> bool:
# casts below are workaround for: https://github.com/tgockel/types-clang/pull/2
prev_end = cast(SourceLocation, prev_decl.extent.end)
cur_start = cast(SourceLocation, decl.extent.start)
cur_file, cur_line, _ = get_presumed_location(cur_start)
prev_file, prev_line, _ = get_presumed_location(prev_end)
return cur_file == prev_file and cur_line == prev_line + 1
_NORMALIZED_KIND = {
"constructor": "function",
"conversion_function": "function",
"method": "function",
"function": "function",
"class": "class",
"alias": "alias",
"enum": "enum",
"macro": "macro",
"var": "var",
}
def _kinds_are_compatible(a: str, b: str) -> bool:
return _NORMALIZED_KIND[a] == _NORMALIZED_KIND[b]
class JsonApiGenerator:
def __init__(self, extractor):
self.extractor = extractor
self.config = extractor.config
self.seen_decls = {}
self.output_json = []
self._prev_decl = None
self._document_with_parent = {}
def _resolve_document_with(self, entity_id: EntityId) -> EntityId:
while True:
document_with_parent = self._document_with_parent.get(entity_id)
if document_with_parent is None:
break
entity_id = document_with_parent
return entity_id
def _transform_cursor_to_json(self, decl: Cursor, parent: Optional[Cursor]):
doc = get_doc_comment(self.config, decl)
document_with = None
location = _get_location_json(self.config, decl.location)
if not doc:
if self._prev_decl is not None and _is_immediately_after(
decl, self._prev_decl[0]
):
document_with = self._resolve_document_with(self._prev_decl[1]["id"])
else:
# Exclude undocumented entities
return None
else:
if (
self._prev_decl is not None
and self._prev_decl[1]["location"] == location
):
# Same line as previous declaration, presumably due to macro expansion
# generating multiple declarations.
#
# Document as a sibling of the previous declaration.
document_with = self._resolve_document_with(self._prev_decl[1]["id"])
transformer = TRANSFORMERS.get(decl.kind)
if transformer is None:
return None
json_repr = transformer(self.config, decl)
if json_repr is None:
return None
if parent is None or parent.kind in (
CursorKind.NAMESPACE,
CursorKind.TRANSLATION_UNIT,
):
json_repr["scope"] = _get_full_nested_name(parent)
else:
json_repr["parent"] = get_entity_id(parent)
entity_id = get_entity_id(decl)
if document_with:
prev_json = cast(Any, self._prev_decl)[1]
if (
prev_json is None
or not _kinds_are_compatible(prev_json["kind"], json_repr["kind"])
or prev_json.get("parent") != json_repr.get("parent")
or prev_json.get("scope") != json_repr.get("scope")
):
if not doc:
# Undocumented and can't document with previous decl
return None
document_with = None
if document_with is not None:
doc = None
self._document_with_parent[entity_id] = document_with
json_repr["document_with"] = document_with
extent = decl.extent
json_repr["location"] = location
nonitpick = get_nonitpick_directives(decl)
if nonitpick:
json_repr["nonitpick"] = nonitpick
json_repr["doc"] = doc
if decl.kind != CursorKind.UNEXPOSED_DECL:
template_parameters = _get_template_parameters(self.config, decl)
if json_repr.get("specializes") and template_parameters is None:
template_parameters = []
json_repr["template_parameters"] = template_parameters
json_repr["id"] = entity_id
return json_repr
def add(self, decl: Cursor):
is_friend = False
if decl.kind == CursorKind.FRIEND_DECL:
# Check if this is a hidden friend function.
children = list(decl.get_children())
if len(children) != 1:
return
decl = children[0]
if not decl.kind.is_declaration():
return
is_friend = True
parent = decl.lexical_parent
else:
parent = decl.semantic_parent
json_repr = self._transform_cursor_to_json(decl, parent)
if json_repr is None:
self._prev_decl = None
return
json_repr["friend"] = is_friend
parent_id = json_repr.get("parent")
if parent_id is not None and parent_id not in self.seen_decls:
# Parent is undocumented, skip.
return
self._prev_decl = (decl, json_repr)
entity_id = json_repr["id"]
existing_json_repr = self.seen_decls.get(entity_id)
if existing_json_repr is not None:
_merge_decl_json(existing_json_repr, json_repr)
return
self.seen_decls[entity_id] = json_repr
def _parse_template_parameter(
decl: str,
) -> Optional[sphinx.domains.cpp.ASTTemplateParam]:
# Note: We must include an extra trailing ">" because
# `_parse_template_parameter` fails if the parameter is not followed by "," or
# ">".
parser = sphinx.domains.cpp.DefinitionParser(
decl + ">", location=("", 0), config=cast(sphinx.config.Config, SphinxConfig())
)
parser.allowFallbackExpressionParsing = False
try:
param = parser._parse_template_parameter()
assert parser.skip_string(">")
parser.assert_end()
return param
except sphinx.domains.cpp.DefinitionError:
return None
def _extract_sfinae_replacement(template_parameter: str) -> Optional[Tuple[str, str]]:
param = _parse_template_parameter(template_parameter)
if param is None:
return None
name = str(param.get_identifier())
if not name.lower().startswith("sfinae"):
return None
if isinstance(param, sphinx.domains.cpp.ASTTemplateParamType):
default_type = param.data.default
if default_type is None:
return None
return (name, str(default_type))
if isinstance(param, sphinx.domains.cpp.ASTTemplateParamNonType):
default_value: Optional[sphinx.domains.cpp.ASTBase] = param.param.init
if default_value is None:
return None
if isinstance(default_value, sphinx.domains.cpp.ASTInitializer):
default_value = default_value.value
return (name, str(default_value))
return None
CONDITIONALLY_EXPLICIT_PATTERN = re.compile(r"\(ExplicitRequires\((.+)\)\)")
def _match_template_parameter_enable_if_pattern(
config: Config, decl: str
) -> Optional[str]:
for pattern in config.template_parameter_enable_if_patterns:
m = pattern.fullmatch(decl)
if m is not None:
return m.group(1)
return None
def _transform_template_parameters(config: Config, template_parameters: List[Any]):
"""Transforms template parameters to C++20 form."""
requires = []
new_template_parameters = []
replacements: Dict[str, str] = {}
for template_parameter in template_parameters:
decl = template_parameter["declaration"]
requires_expr = _match_template_parameter_enable_if_pattern(config, decl)
if requires_expr is not None:
requires.append(requires_expr)
continue
if config.ignore_template_parameters_pattern.fullmatch(decl):
continue
# If the template parameter is of the form `YYY SfinaeXXX = Condition`, then
# we want to exclude it from the template parameter list and instead return
# the substitution `{"SfinaeXXX": "Condition"}`. To avoid parsing in cases
# that can't possibly match, first look to see if the name starts with
# `"sfinae"`.
if re.match(r"sfinae", template_parameter["name"], re.IGNORECASE) is not None:
# Possibly match, parse to verify.
replacement = _extract_sfinae_replacement(decl)
if replacement is not None:
replacements[replacement[0]] = replacement[1]
continue
new_template_parameters.append(template_parameter)
return (
new_template_parameters,
requires,
replacements,
)
def _strip_return_type(
declaration: str, template_prefix: str, location: Tuple[str, int]
) -> str:
parser = sphinx.domains.cpp.DefinitionParser(
template_prefix + declaration,
location=location,
config=cast(sphinx.config.Config, SphinxConfig()),
)
ast = parser.parse_declaration("function", "function")
parser.skip_ws()
parser.assert_end()
assert isinstance(ast.declaration, sphinx.domains.cpp.ASTType)
ast.declaration.declSpecs.trailingTypeSpec = (
sphinx.domains.cpp.ASTTrailingTypeSpecFundamental(["auto"], ["auto"])
)
return str(ast.declaration)
_OPERATOR_PAGE_NAMES = {
("operator+", 1): "operator-unary_plus",
("operator-", 1): "operator-negate",
("operator*", 1): "operator-dereference",
("operator~", 1): "operator-complement",
("operator!", 1): "operator-logical_not",
("operator++", 1): "operator-pre_inc",
("operator++", 2): "operator-post_inc",
("operator--", 1): "operator-pre_dec",
("operator--", 2): "operator-post_dec",
("operator<<", 2): "operator-shift_left",
("operator>>", 2): "operator-shift_right",
("operator+", 2): "operator-plus",
("operator-", 2): "operator-minus",
("operator*", 2): "operator-multiplies",
("operator/", 2): "operator-divides",
("operator%", 2): "operator-modulus",
("operator<", 2): "operator-less",
("operator<=", 2): "operator-less_equal",
("operator>=", 2): "operator-greater_equal",
("operator==", 2): "operator-equal_to",
("operator!=", 2): "operator-not_equal_to",
("operator=", 2): "operator-assign",
("operator<<=", 2): "operator-shift_left_assign",
("operator>>=", 2): "operator-shift_right_assign",
("operator*=", 2): "operator-multiplies_assign",
("operator/=", 2): "operator-divides_assign",
("operator%=", 2): "operator-modulus_assign",
("operator+=", 2): "operator-plus_assign",
("operator-=", 2): "operator-minus_assign",
("operator&=", 2): "operator-bitwise_and_assign",
("operator|=", 2): "operator-bitwise_or_assign",
("operator&&", 2): "operator-logical_and",
("operator||", 2): "operator-logical_or",
("operator|", 2): "operator-bitwise_or",
("operator&", 2): "operator-bitwise_and",
("operator^", 2): "operator-bitwise_xor",
("operator,", 2): "operator-comma",
("operator->", 1): "operator-arrow",
("operator&", 1): "operator-address_of",
"operator()": "operator-call",
"operator[]": "operator-subscript",
}
DEFAULT_MEMBER_GROUP_FOR_MEMBER_ENTITY_TYPE = {
"constructor": "Constructors",
"destructor": "Constructors",
"class": "Types",
"method": "Methods",
"enum": "Types",
"alias": "Types",
"conversion_function": "Conversion operators",
"var": "Data members",
"function": "Friend functions",
}
DEFAULT_MEMBER_GROUP_FOR_NON_MEMBER_ENTITY_TYPE = {
"alias": "Related Types",
"enum": "Related Types",
"class": "Related Types",
"function": "Related Functions",
"var": "Related Constants",
"macro": "Related Macros",
}
def _get_default_member_group(entity: CppApiEntity) -> str:
if entity.get("parent"):
return DEFAULT_MEMBER_GROUP_FOR_MEMBER_ENTITY_TYPE[entity["kind"]]
return DEFAULT_MEMBER_GROUP_FOR_NON_MEMBER_ENTITY_TYPE[entity["kind"]]
def _normalize_doc_text(text: str) -> str:
text = re.sub(r"^((?:\\|@)(?:brief|details)\s+)", "", text, flags=re.MULTILINE)
text = re.sub(
r"^(?:\\|@)(t?param)(\[(?:in|out|in,\sout)\])?\s+([a-zA-Z_][^ ]*)",
":\\1 \\3\\2:",
text,
flags=re.MULTILINE,
)
text = re.sub(
r"^(?:\\|@)(error)\s+`([^`]+)`", ":\\1 \\2:", text, flags=re.MULTILINE
)
text = re.sub(
r"^(?:\\|@)(returns?|pre|post|[ds]?checks|invariant|requires)(?: |\n )",
":\\1: ",
text,
flags=re.MULTILINE,
)
text = re.sub(r"^(?:\\|@)(retval)\s+(\S+)", ":\\1 \\2:", text, flags=re.MULTILINE)
text = SPECIAL_GROUP_COMMAND_PATTERN.sub("", text)
return text
FUNCTION_ENTITY_KINDS = frozenset(
["function", "method", "constructor", "destructor", "conversion_function"]
)
def _is_function(entity: CppApiEntity) -> bool:
return entity["kind"] in FUNCTION_ENTITY_KINDS
def _get_path_component_from_special_id(special_id: str) -> str:
special_id = re.sub("[^a-zA-Z0-9_]+", "-", special_id)
return special_id.strip("-")
def _apply_identifier_replacements(
requires_term: str, replacements: Dict[str, str]
) -> str:
for orig_identifier, replacement in replacements.items():
requires_term = re.sub(
r"\b" + re.escape(orig_identifier) + r"\b", replacement, requires_term
)
return requires_term
def _make_explicit_conditional(decl: str, explicit: str) -> str:
new_str = re.sub(r"\bexplicit\b", f"explicit({explicit})", decl, 1)
if new_str == decl:
raise ValueError(
"Failed to insert explicit condition %r into: %s"
% (
explicit,
decl,
)
)
return new_str
def _is_uniform_binary_expr(
expr: sphinx.domains.cpp.ASTBase, allowed_ops: Tuple[str, ...]
) -> bool:
if not isinstance(expr, sphinx.domains.cpp.ASTBinOpExpr):
return False
return all(op in allowed_ops for op in expr.ops)
def _is_logical_and_expr(expr: sphinx.domains.cpp.ASTBase) -> bool:
return _is_uniform_binary_expr(expr, ("&&", "and"))
def _is_primary_expr(expr: sphinx.domains.cpp.ASTBase) -> bool:
return isinstance(
expr,
(
sphinx.domains.cpp.ASTLiteral,
sphinx.domains.cpp.ASTIdExpression,
sphinx.domains.cpp.ASTThisLiteral,
),
)
def _normalize_requires_terms(terms: List[str]) -> List[str]:
if not terms:
return terms
expr = " && ".join(f"({term})" for term in terms)
parser = sphinx.domains.cpp.DefinitionParser(
"requires " + expr,
location=("", 0),
config=cast(sphinx.config.Config, SphinxConfig()),
)
# If we allow fallback parsing, we end up with incorrect parsing and a
# spurious warning.
parser.allowFallbackExpressionParsing = False
ast = parser._parse_requires_clause()
parser.skip_ws()
parser.assert_end()
assert ast is not None
new_terms = []
def process(
expr: Union[sphinx.domains.cpp.ASTType, sphinx.domains.cpp.ASTExpression]
):
while True:
if isinstance(expr, sphinx.domains.cpp.ASTParenExpr):
expr = expr.expr
continue
if isinstance(expr, sphinx.domains.cpp.ASTBinOpExpr) and not expr.ops:
expr = expr.exprs[0]
continue
if (
isinstance(expr, sphinx.domains.cpp.ASTPostfixExpr)
and not expr.postFixes
):
expr = expr.prefix
continue
break
if _is_logical_and_expr(expr):
for sub_expr in cast(sphinx.domains.cpp.ASTBinOpExpr, expr).exprs:
process(sub_expr)
return
if _is_primary_expr(expr):
new_terms.append(str(expr))
return
new_terms.append(f"({expr})")
process(ast.expr)
return new_terms
def _format_template_arguments(entity: CppApiEntity) -> str:
if entity.get("specializes"):
# Template arguments already included in `entity["name"]`.
return ""
template_parameters = entity.get("template_parameters")
if not template_parameters:
return ""
strs = []
for param in template_parameters:
arg = param["name"]
if not arg:
continue
if param["pack"]:
arg += "..."
strs.append(arg)
args_str = ", ".join(strs)
return f"<{args_str}>"
def _get_entity_base_page_name_component(entity: CppApiEntity) -> str:
base_name = entity["name"]
if entity["kind"] == "class" and entity.get("specializes"):
# Strip any template arguments
base_name = re.sub("([^<]*).*", r"\1", base_name)
elif entity["kind"] == "conversion_function":
base_name = "operator-cast"
elif entity["kind"] in ("function", "method") and re.match(
r"operator\b", base_name
):
arity = cast(FunctionEntity, entity)["arity"]
if entity["kind"] == "method":
arity += 1
op_page_name = _OPERATOR_PAGE_NAMES.get((base_name, arity))
if op_page_name is None:
op_page_name = _OPERATOR_PAGE_NAMES[base_name]
base_name = op_page_name
return base_name
def _get_entity_page_name_component(entity: CppApiEntity) -> str:
page_name = _get_entity_base_page_name_component(entity)
special_id = entity.get("special_id")
if special_id is not None:
page_name += f"-{_get_path_component_from_special_id(special_id)}"
return page_name
def _ensure_unique_page_names(
entities_with_page_names: List[EntityId],
entities: Dict[EntityId, CppApiEntity],
warning,
) -> None:
names: Dict[
Tuple[Optional[str], Optional[str], str, Optional[str]], List[EntityId]
] = {}
for entity_id in entities_with_page_names:
entity = entities[entity_id]
parent_id = entity.get("parent")
special_id = entity.get("special_id")
scope = entity.get("scope")
base_name = _get_entity_base_page_name_component(entity)
key = (parent_id, scope, base_name, special_id)
names.setdefault(key, []).append(entity_id)
for (parent_id, scope, base_name, special_id), entity_ids in names.items():
if len(entity_ids) == 1:
continue
page_entities = [entities[entity_id] for entity_id in entity_ids]
warning(
"Disambiguating %d overloads of %s using numerical ids. Definitions at %s",
len(entity_ids),
base_name,
", ".join(
"%s:%d" % (entity["location"]["file"], entity["location"]["line"])
for entity in page_entities
),
)
for i, entity in enumerate(page_entities):
entity["special_id"] = str(i + 1)
class JsonDiagnostic(TypedDict):
message: str
location: Optional[JsonLocation]
class JsonNitpickExclusion(TypedDict):
file: str
line: int
target: str
class JsonApiData(TypedDict):
errors: List[JsonDiagnostic]
warnings: List[JsonDiagnostic]
nonitpick: List[JsonNitpickExclusion]
groups: Dict[str, List[EntityId]]
entities: Dict[str, CppApiEntity]
def organize_entities(
config: Config, entities: Dict[EntityId, CppApiEntity]
) -> JsonApiData:
errors: List[JsonDiagnostic] = []
warnings: List[JsonDiagnostic] = []
def error(msg: str, *args, location: Optional[JsonLocation] = None):
errors.append({"message": msg % args, "location": location})
def warning(msg: str, *args, location: Optional[JsonLocation] = None):
warnings.append({"message": msg % args, "location": location})
def _handle_document_with(entity: CppApiEntity) -> bool:
document_with = entity.get("document_with")
if document_with is None:
return False
sibling_entity: Optional[CppApiEntity] = entities.get(document_with)
if sibling_entity is None:
return False
sibling_entity.setdefault("siblings", []).append(entity["id"])
return True
def _normalize_entity_requires(entity: CppApiEntity):
template_parameters = entity.get("template_parameters")
if template_parameters:
(
template_parameters,
requires,
replacements,
) = _transform_template_parameters(config, template_parameters)
if entity.get("specializes") is None and not template_parameters:
entity["template_parameters"] = None
else:
entity["template_parameters"] = template_parameters
else:
requires = []
replacements = None
explicit = None
existing_requires = entity.get("requires")
if existing_requires:
requires = existing_requires + requires
if _is_function(entity):
func_entity = cast(FunctionEntity, entity)
declaration = func_entity["declaration"]
if replacements:
declaration = _apply_identifier_replacements(declaration, replacements)
if (
func_entity["kind"] != "constructor"
and config.hide_types_pattern.search(
declaration[: declaration.index(func_entity["name_substitute"])]
)
is not None
):
declaration = _strip_return_type(
declaration,
"template <> " if template_parameters is not None else "",
location=(entity["location"]["file"], entity["location"]["line"]),
)
func_entity["declaration"] = declaration
else:
if replacements:
for key in cast(
Tuple[Literal["declaration", "underlying_type"], ...],
("declaration", "underlying_type"),
):
x = cast(Optional[str], entity.get(key, None))
if x is not None:
entity[key] = _apply_identifier_replacements(x, replacements) # type: ignore[typeddict-item]
if replacements:
requires = [
_apply_identifier_replacements(x, replacements) for x in requires
]
requires = _normalize_requires_terms(requires)
new_requires = []
explicit = None
for term in requires:
m = CONDITIONALLY_EXPLICIT_PATTERN.fullmatch(term)
if m is not None:
if explicit is not None:
raise ValueError(
"cannot have more than one conditionally-explicit term"
)
explicit = m.group(1)
else:
new_requires.append(term)
requires = new_requires
if explicit:
if entity["kind"] != "constructor":
raise ValueError(
"conditionally-explicit terms only valid on constructors"
)
entity["declaration"] = _make_explicit_conditional(
entity["declaration"], explicit
)
requires = [x for x in requires if config.hide_types_pattern.search(x) is None]
entity["requires"] = requires
def get_entity_page_name(entity: CppApiEntity) -> str:
components = []
cur_entity = entity
while True:
components.append(_get_entity_page_name_component(cur_entity))
parent_id = cur_entity.get("parent")
if parent_id is None:
break
parent_entity = entities.get(parent_id)
assert parent_entity is not None
cur_entity = parent_entity
components.reverse()
page_name = (cur_entity["scope"] + "::".join(components)).replace("::", ".")
return page_name
def _parse_entity_doc(entity: CppApiEntity):
doc = entity["doc"]
if doc is None:
if _handle_document_with(entity):
return True
return False
doc_text = doc["text"]
for m in SPECIAL_GROUP_COMMAND_PATTERN.finditer(doc_text):
entity[cast(Literal["special_id"], "special_" + m.group(1))] = m.group(
2
).strip()
return True
def get_entity_scope(entity: CppApiEntity) -> str:
components = []
cur_entity = entity
while True:
parent_id = cur_entity.get("parent")
if parent_id is None:
break
parent_entity = entities.get(parent_id)
assert parent_entity is not None
cur_entity = parent_entity
name_with_args = cur_entity["name"]
if not cur_entity.get("specializes"):
name_with_args += _format_template_arguments(cur_entity)
components.append(name_with_args)
components.reverse()
if components:
components.append("")
return cur_entity.get("scope", "") + "::".join(components)
def get_entity_object_name(entity: CppApiEntity) -> str:
name = get_entity_scope(entity) + entity["name"]
special_id = entity.get("special_id")
if special_id:
name += f"[{special_id}]"
return name
unspecialized_names: Dict[
Tuple[Optional[EntityId], Optional[str], str], EntityId
] = {}
names: Dict[str, EntityId] = {}
def resolve_entity_name(
scope: str, relative_entity_name: str
) -> Optional[EntityId]:
if relative_entity_name.startswith("::"):
resolved = relative_entity_name[2:]
entity_id = names.get(resolved)
if entity_id is None:
return None
return entity_id
truncate_idx = len(scope)
while True:
full_name = scope[:truncate_idx] + relative_entity_name
entity_id = names.get(full_name)
if entity_id is not None:
return entity_id
if truncate_idx == 0:
return None
truncate_idx = scope.rfind("::", 0, truncate_idx - 2)
if truncate_idx == -1:
truncate_idx = 0
else:
truncate_idx = truncate_idx + 2
must_resolve_specializes: List[CppApiEntity] = []
all_nonitpick: List[JsonNitpickExclusion] = []
def _handle_nitpick(entity: CppApiEntity, targets: List[str]) -> None:
document_with = entity.get("document_with")
if document_with:
entity = entities[document_with]
location: JsonLocation = entity["location"]
filename: str = location["file"]
line: int = location["line"]
for target in targets:
all_nonitpick.append({"file": filename, "line": line, "target": target})
entities_with_page_names: List[EntityId] = []
for entity in entities.values():
specializes = entity.get("specializes")
if (
entity["kind"] == "var"
and entity.get("template_parameters") is not None
and specializes is None
):
key = (entity.get("parent"), entity.get("scope"), entity["name"])
entity_id = entity["id"]
if unspecialized_names.setdefault(key, entity_id) != entity_id:
raise ValueError("Duplicate unspecialized entity name: %r" % (key,))
if specializes is True:
must_resolve_specializes.append(entity)
if not _parse_entity_doc(entity):
continue
_normalize_entity_requires(entity)
nonitpick = entity.get("nonitpick")
if nonitpick:
_handle_nitpick(entity, nonitpick)
if not entity["doc"]:
continue
entities_with_page_names.append(entity["id"])
for entity in must_resolve_specializes:
name = cast(str, entity["name"])
name = name[: name.index("<")]
unspecialized_key = (entity.get("parent"), entity.get("scope"), name)
unspecialized_id = unspecialized_names.get(unspecialized_key)
if unspecialized_id is not None:
entity["specializes"] = unspecialized_id
_ensure_unique_page_names(entities_with_page_names, entities, warning)
for entity_id in entities_with_page_names:
entity = entities[entity_id]
names[get_entity_object_name(entity)] = entity_id
entity["page_name"] = get_entity_page_name(entity)
doc = entity["doc"]
assert doc is not None
doc["text"] = _normalize_doc_text(doc["text"])
groups: Dict[str, List[EntityId]] = {}
for entity in entities.values():
entity_id = entity["id"]
doc = entity["doc"]
if doc is None:
continue
ingroup = entity.get("special_ingroup")
relates_name = entity.get("special_relates")
member_group = entity.get("special_membergroup")
if (ingroup is not None) and (relates_name is not None):
error(
"Cannot specify both \\ingroup and \\relates for %r",
entity,
location=doc["location"],
)
continue
if ingroup is not None:
ingroup = docutils.nodes.make_id(ingroup)
groups.setdefault(ingroup, []).append(entity_id)
if member_group is not None:
error(
"Cannot specify both \\ingroup and \\membergroup for %r",
entity,
location=doc["location"],
)
continue
parent_id = entity.get("parent")
if relates_name is not None:
scope = get_entity_scope(entity)
relates_id = resolve_entity_name(scope, relates_name)
if relates_id is None:
error(
"Cannot resolve entity reference %r from scope %r",
relates_name,
scope,
location=doc["location"],
)
continue
parent_id = None
else:
if parent_id is None:
warning(
"No group or relates specified for entity %s%s",
entity.get("scope"),
entity["name"],
location=doc["location"],
)
continue
relates_id = parent_id
if member_group is None:
member_group = _get_default_member_group(entity)
assert relates_id is not None
entities[relates_id].setdefault(
cast(
Literal["related_members", "related_nonmembers"],
"related_members" if parent_id is not None else "related_nonmembers",
),
cast(Dict[str, List[EntityId]], {}),
).setdefault(member_group, []).append(entity_id)
return {
"entities": entities,
"groups": groups,
"errors": errors,
"warnings": warnings,
"nonitpick": all_nonitpick,
}
def _get_output_json(extractor: Extractor) -> JsonApiData:
generator = JsonApiGenerator(extractor)
if extractor.config.verbose:
logger.info("Found %d C++ declarations", len(extractor.decls))
for decl in extractor.decls:
generator.add(decl)
return organize_entities(extractor.config, generator.seen_decls)
def generate_output(config: Config) -> JsonApiData:
extractor = Extractor(config)
return _get_output_json(extractor)
def _load_config(config_path: str) -> Config:
config_content = pathlib.Path(config_path).read_text(encoding="utf-8")
context: dict = {}
exec(config_content, context) # pylint: disable=exec-used
config = context["config"]
assert isinstance(config, Config)
return config
def main():
args = _parse_args(output_required=True)
config = _load_config(args.config)
output_json = generate_output(config)
if args.output is not None:
with open(args.output, "w", encoding="utf-8") as f:
json.dump(output_json, f)
if __name__ == "__main__":
main()
Last update:
Mar 28, 2024