Skip to content

C domain, fix performance regression since 3.0.0 #12162

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Apr 23, 2024
4 changes: 4 additions & 0 deletions sphinx/domains/c/_ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ def is_anon(self) -> bool:
def __str__(self) -> str:
return self.identifier

@property
def name(self) -> str:
return self.identifier

def get_display_string(self) -> str:
return "[anonymous]" if self.is_anon() else self.identifier

Expand Down
225 changes: 91 additions & 134 deletions sphinx/domains/c/_symbol.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Any, Callable
from typing import TYPE_CHECKING, Any, Callable, ValuesView

from sphinx.domains.c._ast import (
ASTDeclaration,
Expand All @@ -11,7 +11,7 @@
from sphinx.util import logging

if TYPE_CHECKING:
from collections.abc import Iterator
from collections.abc import Iterator, Sequence

from typing_extensions import Self

Expand All @@ -32,7 +32,7 @@ def __str__(self) -> str:


class SymbolLookupResult:
def __init__(self, symbols: Iterator[Symbol], parentSymbol: Symbol,
def __init__(self, symbols: Sequence[Symbol], parentSymbol: Symbol,
ident: ASTIdentifier) -> None:
self.symbols = symbols
self.parentSymbol = parentSymbol
Expand Down Expand Up @@ -102,18 +102,46 @@ def __init__(
self.isRedeclaration = False
self._assert_invariants()

# Remember to modify Symbol.remove if modifications to the parent change.
self._children: list[Symbol] = []
self._anonChildren: list[Symbol] = []
# note: _children includes _anonChildren
# These properties store the same children for different access
# patterns. Symbol._add_child and Symbol._remove_child should be
# used for modifying them.
self._childrenByName: dict[str, Symbol] = {}
self._childrenByDocname: dict[str, dict[str, Symbol]] = {}
self._anonChildren: set[Symbol] = set()

if self.parent:
self.parent._children.append(self)
self.parent._add_child(self)
if self.declaration:
self.declaration.symbol = self

# Do symbol addition after self._children has been initialised.
self._add_function_params()

@property
def _children(self) -> ValuesView[Symbol]:
return self._childrenByName.values()

def _add_child(self, child: Symbol) -> None:
name = child.ident.name
if name in self._childrenByName:
# Duplicate so don't add - will be reported in _add_symbols()
return
self._childrenByName[name] = child
if child.docname not in self._childrenByDocname:
self._childrenByDocname[child.docname] = {}
self._childrenByDocname[child.docname][name] = child
if child.ident.is_anon():
self._anonChildren.add(child)

def _remove_child(self, child: Symbol) -> None:
name = child.ident.name
self._childrenByName.pop(name)
siblings = self._childrenByDocname.get(child.docname, {})
if name in siblings:
del siblings[name]
if child.ident.is_anon() and child in self._anonChildren:
self._anonChildren.remove(child)

def _fill_empty(self, declaration: ASTDeclaration, docname: str, line: int) -> None:
self._assert_invariants()
assert self.declaration is None
Expand Down Expand Up @@ -154,25 +182,28 @@ def _add_function_params(self) -> None:
Symbol.debug_indent -= 1

def remove(self) -> None:
if self.parent is None:
return
assert self in self.parent._children
self.parent._children.remove(self)
self.parent = None
if self.parent:
self.parent._remove_child(self)
self.parent = None

def clear_doc(self, docname: str) -> None:
for sChild in self._children:
sChild.clear_doc(docname)
if sChild.declaration and sChild.docname == docname:
sChild.declaration = None
sChild.docname = None
sChild.line = None
if sChild.siblingAbove is not None:
sChild.siblingAbove.siblingBelow = sChild.siblingBelow
if sChild.siblingBelow is not None:
sChild.siblingBelow.siblingAbove = sChild.siblingAbove
sChild.siblingAbove = None
sChild.siblingBelow = None
if docname not in self._childrenByDocname:
for child in self._children:
child.clear_doc(docname)
return

children: dict[str, Symbol] = self._childrenByDocname.pop(docname)
for sChild in children.values():
sChild.declaration = None
sChild.docname = None
sChild.line = None
if sChild.siblingAbove is not None:
sChild.siblingAbove.siblingBelow = sChild.siblingBelow
if sChild.siblingBelow is not None:
sChild.siblingBelow.siblingAbove = sChild.siblingAbove
sChild.siblingAbove = None
sChild.siblingBelow = None
self._remove_child(sChild)

def get_all_symbols(self) -> Iterator[Symbol]:
yield self
Expand All @@ -183,14 +214,6 @@ def get_all_symbols(self) -> Iterator[Symbol]:
def children(self) -> Iterator[Symbol]:
yield from self._children

@property
def children_recurse_anon(self) -> Iterator[Symbol]:
for c in self._children:
yield c
if not c.ident.is_anon():
continue
yield from c.children_recurse_anon

def get_lookup_key(self) -> LookupKey:
# The pickle files for the environment and for each document are distinct.
# The environment has all the symbols, but the documents has xrefs that
Expand Down Expand Up @@ -221,68 +244,6 @@ def get_full_nested_name(self) -> ASTNestedName:
names = [s.ident for s in symbols]
return ASTNestedName(names, rooted=False)

def _find_first_named_symbol(self, ident: ASTIdentifier,
matchSelf: bool, recurseInAnon: bool) -> Symbol | None:
# TODO: further simplification from C++ to C
if Symbol.debug_lookup:
Symbol.debug_print("_find_first_named_symbol ->")
res = self._find_named_symbols(ident, matchSelf, recurseInAnon,
searchInSiblings=False)
try:
return next(res)
except StopIteration:
return None

def _find_named_symbols(self, ident: ASTIdentifier,
matchSelf: bool, recurseInAnon: bool,
searchInSiblings: bool) -> Iterator[Symbol]:
# TODO: further simplification from C++ to C
if Symbol.debug_lookup:
Symbol.debug_indent += 1
Symbol.debug_print("_find_named_symbols:")
Symbol.debug_indent += 1
Symbol.debug_print("self:")
logger.debug(self.to_string(Symbol.debug_indent + 1, addEndNewline=False))
Symbol.debug_print("ident: ", ident)
Symbol.debug_print("matchSelf: ", matchSelf)
Symbol.debug_print("recurseInAnon: ", recurseInAnon)
Symbol.debug_print("searchInSiblings: ", searchInSiblings)

def candidates() -> Iterator[Symbol]:
s = self
if Symbol.debug_lookup:
Symbol.debug_print("searching in self:")
logger.debug(s.to_string(Symbol.debug_indent + 1, addEndNewline=False))
while True:
if matchSelf:
yield s
if recurseInAnon:
yield from s.children_recurse_anon
else:
yield from s._children

if s.siblingAbove is None:
break
s = s.siblingAbove
if Symbol.debug_lookup:
Symbol.debug_print("searching in sibling:")
logger.debug(s.to_string(Symbol.debug_indent + 1, addEndNewline=False))

for s in candidates():
if Symbol.debug_lookup:
Symbol.debug_print("candidate:")
logger.debug(s.to_string(Symbol.debug_indent + 1, addEndNewline=False))
if s.ident == ident:
if Symbol.debug_lookup:
Symbol.debug_indent += 1
Symbol.debug_print("matches")
Symbol.debug_indent -= 3
yield s
if Symbol.debug_lookup:
Symbol.debug_indent += 2
if Symbol.debug_lookup:
Symbol.debug_indent -= 2

def _symbol_lookup(
self,
nestedName: ASTNestedName,
Expand Down Expand Up @@ -311,16 +272,14 @@ def _symbol_lookup(
# find the right starting point for lookup
parentSymbol = self
if nestedName.rooted:
while parentSymbol.parent:
while parentSymbol.parent is not None:
parentSymbol = parentSymbol.parent

if ancestorLookupType is not None:
# walk up until we find the first identifier
firstName = names[0]
while parentSymbol.parent:
if parentSymbol.find_identifier(firstName,
matchSelf=matchSelf,
recurseInAnon=recurseInAnon,
searchInSiblings=searchInSiblings):
if firstName.name in parentSymbol._childrenByName:
break
parentSymbol = parentSymbol.parent

Expand All @@ -330,18 +289,15 @@ def _symbol_lookup(

# and now the actual lookup
for ident in names[:-1]:
symbol = parentSymbol._find_first_named_symbol(
ident, matchSelf=matchSelf, recurseInAnon=recurseInAnon)
if symbol is None:
name = ident.name
if name in parentSymbol._childrenByName:
symbol = parentSymbol._childrenByName[name]
else:
symbol = onMissingQualifiedSymbol(parentSymbol, ident)
if symbol is None:
if Symbol.debug_lookup:
Symbol.debug_indent -= 2
return None
# We have now matched part of a nested name, and need to match more
# so even if we should matchSelf before, we definitely shouldn't
# even more. (see also issue #2666)
matchSelf = False
parentSymbol = symbol

if Symbol.debug_lookup:
Expand All @@ -350,15 +306,19 @@ def _symbol_lookup(

# handle the last name
ident = names[-1]
name = ident.name
symbol = parentSymbol._childrenByName.get(name)
if not symbol and recurseInAnon:
for child in parentSymbol._anonChildren:
if name in child._childrenByName:
symbol = child._childrenByName[name]
break

symbols = parentSymbol._find_named_symbols(
ident, matchSelf=matchSelf,
recurseInAnon=recurseInAnon,
searchInSiblings=searchInSiblings)
if Symbol.debug_lookup:
symbols = list(symbols) # type: ignore[assignment]
Symbol.debug_indent -= 2
return SymbolLookupResult(symbols, parentSymbol, ident)

result = [symbol] if symbol else []
return SymbolLookupResult(result, parentSymbol, ident)

def _add_symbols(
self,
Expand Down Expand Up @@ -532,17 +492,17 @@ def merge_with(self, other: Symbol, docnames: list[str],
if Symbol.debug_lookup:
Symbol.debug_indent += 1
Symbol.debug_print("merge_with:")

assert other is not None
for otherChild in other._children:
ourChild = self._find_first_named_symbol(
ident=otherChild.ident, matchSelf=False,
recurseInAnon=False)
if ourChild is None:
otherName = otherChild.ident.name
if otherName not in self._childrenByName:
# TODO: hmm, should we prune by docnames?
self._children.append(otherChild)
otherChild.parent = self
self._add_child(otherChild)
otherChild._assert_invariants()
continue
ourChild = self._childrenByName[otherName]
if otherChild.declaration and otherChild.docname in docnames:
if not ourChild.declaration:
ourChild._fill_empty(otherChild.declaration,
Expand All @@ -560,6 +520,7 @@ def merge_with(self, other: Symbol, docnames: list[str],
# just ignore it, right?
pass
ourChild.merge_with(otherChild, docnames, env)

if Symbol.debug_lookup:
Symbol.debug_indent -= 1

Expand Down Expand Up @@ -608,10 +569,13 @@ def find_identifier(self, ident: ASTIdentifier,
Symbol.debug_indent -= 2
if matchSelf and current.ident == ident:
return current
children = current.children_recurse_anon if recurseInAnon else current._children
for s in children:
if s.ident == ident:
return s
name = ident.name
if name in current._childrenByName:
return current._childrenByName[name]
if recurseInAnon:
for child in current._anonChildren:
if name in child._childrenByName:
return child._childrenByName[name]
if not searchInSiblings:
break
current = current.siblingAbove
Expand All @@ -623,24 +587,17 @@ def direct_lookup(self, key: LookupKey) -> Symbol | None:
Symbol.debug_print("direct_lookup:")
Symbol.debug_indent += 1
s = self
for name, id_ in key.data:
res = None
for cand in s._children:
if cand.ident == name:
res = cand
break
s = res
for ident, id_ in key.data:
s = s._childrenByName.get(ident.name)
if Symbol.debug_lookup:
Symbol.debug_print("name: ", name)
Symbol.debug_print("name: ", ident.name)
Symbol.debug_print("id: ", id_)
if s is not None:
logger.debug(s.to_string(Symbol.debug_indent + 1, addEndNewline=False))
else:
Symbol.debug_print("not found")
if s is None:
if Symbol.debug_lookup:
Symbol.debug_indent -= 2
return None
break
if Symbol.debug_lookup:
Symbol.debug_indent -= 2
return s
Expand Down Expand Up @@ -680,7 +637,7 @@ def to_string(self, indent: int, *, addEndNewline: bool = True) -> str:
res.append('::')
else:
if self.ident:
res.append(str(self.ident))
res.append(self.ident.name)
else:
res.append(str(self.declaration))
if self.declaration:
Expand Down