Skip to content

Commit eeb497f

Browse files
committed
refactor: Improve stats code and performance
1 parent 721ce7d commit eeb497f

File tree

3 files changed

+141
-167
lines changed

3 files changed

+141
-167
lines changed

src/griffe/cli.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
from griffe.git import get_latest_tag, get_repo_root
3434
from griffe.loader import GriffeLoader, load, load_git
3535
from griffe.logger import get_logger
36-
from griffe.stats import _format_stats
3736

3837
if TYPE_CHECKING:
3938
from griffe.extensions.base import Extensions, ExtensionType
@@ -411,7 +410,9 @@ def dump(
411410
elapsed = datetime.now(tz=timezone.utc) - started
412411

413412
if stats:
414-
logger.info(_format_stats({"time_spent_serializing": elapsed.microseconds, **loader.stats()}))
413+
loader_stats = loader.stats()
414+
loader_stats.time_spent_serializing = elapsed.microseconds
415+
logger.info(loader_stats.as_text())
415416

416417
return 0 if len(data_packages) == len(packages) else 1
417418

src/griffe/loader.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
from griffe.importer import dynamic_import
3333
from griffe.logger import get_logger
3434
from griffe.merger import merge_stubs
35-
from griffe.stats import stats
35+
from griffe.stats import Stats
3636

3737
if TYPE_CHECKING:
3838
from griffe.enumerations import Parser
@@ -506,13 +506,16 @@ def resolve_module_aliases(
506506

507507
return resolved, unresolved
508508

509-
def stats(self) -> dict:
509+
def stats(self) -> Stats:
510510
"""Compute some statistics.
511511
512512
Returns:
513513
Some statistics.
514514
"""
515-
return {**stats(self), **self._time_stats}
515+
stats = Stats(self)
516+
stats.time_spent_visiting = self._time_stats["time_spent_visiting"]
517+
stats.time_spent_inspecting = self._time_stats["time_spent_inspecting"]
518+
return stats
516519

517520
def _load_package(self, package: Package | NamespacePackage, *, submodules: bool = True) -> Module:
518521
top_module = self._load_module(package.name, package.path, submodules=submodules)

src/griffe/stats.py

+132-162
Original file line numberDiff line numberDiff line change
@@ -3,172 +3,142 @@
33
from __future__ import annotations
44

55
from collections import defaultdict
6-
from typing import TYPE_CHECKING, Iterable, Union, cast
6+
from pathlib import Path
7+
from typing import TYPE_CHECKING
78

8-
from griffe.dataclasses import Class, Module
9-
from griffe.exceptions import BuiltinModuleError
9+
from griffe.enumerations import Kind
1010

1111
if TYPE_CHECKING:
1212
from griffe.dataclasses import Alias, Object
1313
from griffe.loader import GriffeLoader
1414

1515

16-
def _direct(objects: Iterable[Object | Alias]) -> list[Object | Alias]:
17-
return [obj for obj in objects if not obj.is_alias]
18-
19-
20-
def _n_modules(module: Module) -> int:
21-
submodules = _direct(module.modules.values())
22-
return len(submodules) + sum(_n_modules(cast(Module, mod)) for mod in submodules)
23-
24-
25-
def _n_classes(module_or_class: Module | Class) -> int:
26-
submodules = _direct(module_or_class.modules.values())
27-
subclasses = _direct(module_or_class.classes.values())
28-
mods_or_classes = [mc for mc in (*submodules, *subclasses) if not mc.is_alias]
29-
return len(subclasses) + sum(
30-
_n_classes(cast(Union[Module, Class], mod_or_class)) for mod_or_class in mods_or_classes
31-
)
32-
33-
34-
def _n_functions(module_or_class: Module | Class) -> int:
35-
submodules = _direct(module_or_class.modules.values())
36-
subclasses = _direct(module_or_class.classes.values())
37-
functions = _direct(module_or_class.functions.values())
38-
mods_or_classes = [*submodules, *subclasses]
39-
return len(functions) + sum(
40-
_n_functions(cast(Union[Module, Class], mod_or_class)) for mod_or_class in mods_or_classes
41-
)
42-
43-
44-
def _n_attributes(module_or_class: Module | Class) -> int:
45-
submodules = _direct(module_or_class.modules.values())
46-
subclasses = _direct(module_or_class.classes.values())
47-
attributes = _direct(module_or_class.attributes.values())
48-
mods_or_classes = [*submodules, *subclasses]
49-
return len(attributes) + sum(
50-
_n_attributes(cast(Union[Module, Class], mod_or_class)) for mod_or_class in mods_or_classes
51-
)
52-
53-
54-
def _merge_exts(exts1: dict[str, int], exts2: dict[str, int]) -> dict[str, int]:
55-
for ext, value in exts2.items():
56-
exts1[ext] += value
57-
return exts1
58-
59-
60-
def _sum_extensions(exts: dict[str, int], module: Module) -> None:
61-
current_exts = defaultdict(int)
62-
try:
63-
suffix = module.filepath.suffix # type: ignore[union-attr]
64-
except BuiltinModuleError:
65-
current_exts[""] = 1
66-
except AttributeError:
67-
suffix = ""
68-
else:
69-
if suffix:
70-
current_exts[suffix] = 1
71-
for submodule in _direct(module.modules.values()):
72-
_sum_extensions(current_exts, cast(Module, submodule))
73-
_merge_exts(exts, current_exts)
74-
75-
76-
def stats(loader: GriffeLoader) -> dict:
77-
"""Return some loading statistics.
78-
79-
Parameters:
80-
loader: The loader to compute stats from.
81-
82-
Returns:
83-
Some statistics.
84-
"""
85-
modules_by_extension = defaultdict(
86-
int,
87-
{
88-
"": 0,
89-
".py": 0,
90-
".pyi": 0,
91-
".pyc": 0,
92-
".pyo": 0,
93-
".pyd": 0,
94-
".so": 0,
95-
},
96-
)
97-
top_modules = loader.modules_collection.members.values()
98-
for module in top_modules:
99-
_sum_extensions(modules_by_extension, module)
100-
n_lines = sum(len(lines) for lines in loader.lines_collection.values())
101-
return {
102-
"packages": len(top_modules),
103-
"modules": len(top_modules) + sum(_n_modules(mod) for mod in top_modules),
104-
"classes": sum(_n_classes(mod) for mod in top_modules),
105-
"functions": sum(_n_functions(mod) for mod in top_modules),
106-
"attributes": sum(_n_attributes(mod) for mod in top_modules),
107-
"modules_by_extension": modules_by_extension,
108-
"lines": n_lines,
109-
}
110-
111-
112-
def _format_stats(stats: dict) -> str:
113-
lines = []
114-
packages = stats["packages"]
115-
modules = stats["modules"]
116-
classes = stats["classes"]
117-
functions = stats["functions"]
118-
attributes = stats["attributes"]
119-
objects = sum((modules, classes, functions, attributes))
120-
lines.append("Statistics")
121-
lines.append("---------------------")
122-
lines.append("Number of loaded objects")
123-
lines.append(f" Modules: {modules}")
124-
lines.append(f" Classes: {classes}")
125-
lines.append(f" Functions: {functions}")
126-
lines.append(f" Attributes: {attributes}")
127-
lines.append(f" Total: {objects} across {packages} packages")
128-
per_ext = stats["modules_by_extension"]
129-
builtin = per_ext[""]
130-
regular = per_ext[".py"]
131-
stubs = per_ext[".pyi"]
132-
compiled = modules - builtin - regular - stubs
133-
lines.append("")
134-
lines.append(f"Total number of lines: {stats['lines']}")
135-
lines.append("")
136-
lines.append("Modules")
137-
lines.append(f" Builtin: {builtin}")
138-
lines.append(f" Compiled: {compiled}")
139-
lines.append(f" Regular: {regular}")
140-
lines.append(f" Stubs: {stubs}")
141-
lines.append(" Per extension:")
142-
for ext, number in sorted(per_ext.items()):
143-
if ext:
144-
lines.append(f" {ext}: {number}")
145-
visit_time = stats["time_spent_visiting"] / 1000
146-
inspect_time = stats["time_spent_inspecting"] / 1000
147-
total_time = visit_time + inspect_time
148-
visit_percent = visit_time / total_time * 100
149-
inspect_percent = inspect_time / total_time * 100
150-
try:
151-
visit_time_per_module = visit_time / regular
152-
except ZeroDivisionError:
153-
visit_time_per_module = 0
154-
inspected_modules = builtin + compiled
155-
try:
156-
inspect_time_per_module = visit_time / inspected_modules
157-
except ZeroDivisionError:
158-
inspect_time_per_module = 0
159-
lines.append("")
160-
lines.append(
161-
f"Time spent visiting modules ({regular}): "
162-
f"{visit_time}ms, {visit_time_per_module:.02f}ms/module ({visit_percent:.02f}%)",
163-
)
164-
lines.append(
165-
f"Time spent inspecting modules ({inspected_modules}): "
166-
f"{inspect_time}ms, {inspect_time_per_module:.02f}ms/module ({inspect_percent:.02f}%)",
167-
)
168-
serialize_time = stats["time_spent_serializing"] / 1000
169-
serialize_time_per_module = serialize_time / modules
170-
lines.append(f"Time spent serializing: {serialize_time}ms, {serialize_time_per_module:.02f}ms/module")
171-
return "\n".join(lines)
172-
173-
174-
__all__ = ["stats"]
16+
class Stats:
17+
"""Load statistics for a Griffe loader."""
18+
19+
def __init__(self, loader: GriffeLoader) -> None:
20+
"""Initialiwe the stats object.
21+
22+
Parameters:
23+
loader: The loader to compute stats for.
24+
"""
25+
self.loader = loader
26+
modules_by_extension = defaultdict(
27+
int,
28+
{
29+
"": 0,
30+
".py": 0,
31+
".pyi": 0,
32+
".pyc": 0,
33+
".pyo": 0,
34+
".pyd": 0,
35+
".so": 0,
36+
},
37+
)
38+
top_modules = loader.modules_collection.members.values()
39+
self.by_kind = {
40+
Kind.MODULE: 0,
41+
Kind.CLASS: 0,
42+
Kind.FUNCTION: 0,
43+
Kind.ATTRIBUTE: 0,
44+
}
45+
self.packages = len(top_modules)
46+
self.modules_by_extension = modules_by_extension
47+
self.lines = sum(len(lines) for lines in loader.lines_collection.values())
48+
self.time_spent_visiting = 0
49+
self.time_spent_inspecting = 0
50+
self.time_spent_serializing = 0
51+
for module in top_modules:
52+
self._itercount(module)
53+
54+
def _itercount(self, root: Object | Alias) -> None:
55+
if root.is_alias:
56+
return
57+
self.by_kind[root.kind] += 1
58+
if root.is_module:
59+
if isinstance(root.filepath, Path):
60+
self.modules_by_extension[root.filepath.suffix] += 1
61+
elif root.filepath is None:
62+
self.modules_by_extension[""] += 1
63+
for member in root.members.values():
64+
self._itercount(member)
65+
66+
def as_text(self) -> str:
67+
"""Format the statistics as text.
68+
69+
Returns:
70+
Text stats.
71+
"""
72+
lines = []
73+
packages = self.packages
74+
modules = self.by_kind[Kind.MODULE]
75+
classes = self.by_kind[Kind.CLASS]
76+
functions = self.by_kind[Kind.FUNCTION]
77+
attributes = self.by_kind[Kind.ATTRIBUTE]
78+
objects = sum((modules, classes, functions, attributes))
79+
lines.append("Statistics")
80+
lines.append("---------------------")
81+
lines.append("Number of loaded objects")
82+
lines.append(f" Modules: {modules}")
83+
lines.append(f" Classes: {classes}")
84+
lines.append(f" Functions: {functions}")
85+
lines.append(f" Attributes: {attributes}")
86+
lines.append(f" Total: {objects} across {packages} packages")
87+
per_ext = self.modules_by_extension
88+
builtin = per_ext[""]
89+
regular = per_ext[".py"]
90+
stubs = per_ext[".pyi"]
91+
compiled = modules - builtin - regular - stubs
92+
lines.append("")
93+
lines.append(f"Total number of lines: {self.lines}")
94+
lines.append("")
95+
lines.append("Modules")
96+
lines.append(f" Builtin: {builtin}")
97+
lines.append(f" Compiled: {compiled}")
98+
lines.append(f" Regular: {regular}")
99+
lines.append(f" Stubs: {stubs}")
100+
lines.append(" Per extension:")
101+
for ext, number in sorted(per_ext.items()):
102+
if ext:
103+
lines.append(f" {ext}: {number}")
104+
105+
visit_time = self.time_spent_visiting / 1000
106+
inspect_time = self.time_spent_inspecting / 1000
107+
total_time = visit_time + inspect_time
108+
visit_percent = visit_time / total_time * 100
109+
inspect_percent = inspect_time / total_time * 100
110+
111+
force_inspection = self.loader.force_inspection
112+
visited_modules = 0 if force_inspection else regular
113+
try:
114+
visit_time_per_module = visit_time / visited_modules
115+
except ZeroDivisionError:
116+
visit_time_per_module = 0
117+
118+
inspected_modules = builtin + compiled + (regular if force_inspection else 0)
119+
try:
120+
inspect_time_per_module = inspect_time / inspected_modules
121+
except ZeroDivisionError:
122+
inspect_time_per_module = 0
123+
124+
lines.append("")
125+
lines.append(
126+
f"Time spent visiting modules ({visited_modules}): "
127+
f"{visit_time}ms, {visit_time_per_module:.02f}ms/module ({visit_percent:.02f}%)",
128+
)
129+
lines.append(
130+
f"Time spent inspecting modules ({inspected_modules}): "
131+
f"{inspect_time}ms, {inspect_time_per_module:.02f}ms/module ({inspect_percent:.02f}%)",
132+
)
133+
134+
serialize_time = self.time_spent_serializing / 1000
135+
serialize_time_per_module = serialize_time / modules
136+
lines.append(f"Time spent serializing: {serialize_time}ms, {serialize_time_per_module:.02f}ms/module")
137+
138+
return "\n".join(lines)
139+
140+
141+
stats = Stats
142+
"""Deprecated. Use `Stats` instead."""
143+
144+
__all__ = ["Stats"]

0 commit comments

Comments
 (0)