|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Pre-commit hook: verify that every lint test name used in a result tuple is |
| 4 | +mentioned in the docstring of its enclosing function. |
| 5 | +
|
| 6 | +Applies to nf-core component lint files where results are accumulated as: |
| 7 | +
|
| 8 | + component.passed.append(("category", "test_name", "message", path)) |
| 9 | + component.warned.append(("category", "test_name", "message", path)) |
| 10 | + component.failed.append(("category", "test_name", "message", path)) |
| 11 | +
|
| 12 | +Usage (called by pre-commit with the changed files as arguments): |
| 13 | +
|
| 14 | + python scripts/check_lint_docstrings.py nf_core/modules/lint/module_tests.py ... |
| 15 | +""" |
| 16 | + |
| 17 | +import ast |
| 18 | +import sys |
| 19 | +from pathlib import Path |
| 20 | + |
| 21 | + |
| 22 | +def collect_test_names(func_node: ast.FunctionDef | ast.AsyncFunctionDef) -> dict[str, list[int]]: |
| 23 | + """Return {test_name: [line_numbers]} for all result-tuple appends in the function.""" |
| 24 | + results: dict[str, list[int]] = {} |
| 25 | + # Walk only the direct body — do not descend into nested function definitions. |
| 26 | + nodes_to_visit: list[ast.AST] = list(func_node.body) |
| 27 | + while nodes_to_visit: |
| 28 | + node = nodes_to_visit.pop() |
| 29 | + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): |
| 30 | + continue # skip nested scopes |
| 31 | + if ( |
| 32 | + isinstance(node, ast.Call) |
| 33 | + and isinstance(node.func, ast.Attribute) |
| 34 | + and node.func.attr == "append" |
| 35 | + and isinstance(node.func.value, ast.Attribute) |
| 36 | + and node.func.value.attr in ("passed", "warned", "failed") |
| 37 | + and len(node.args) == 1 |
| 38 | + and isinstance(node.args[0], ast.Tuple) |
| 39 | + and len(node.args[0].elts) >= 2 |
| 40 | + and isinstance(node.args[0].elts[1], ast.Constant) |
| 41 | + and isinstance(node.args[0].elts[1].value, str) |
| 42 | + ): |
| 43 | + test_name = node.args[0].elts[1].value |
| 44 | + results.setdefault(test_name, []).append(node.lineno) |
| 45 | + nodes_to_visit.extend(ast.iter_child_nodes(node)) |
| 46 | + return results |
| 47 | + |
| 48 | + |
| 49 | +def check_file(path: Path) -> list[str]: |
| 50 | + errors = [] |
| 51 | + try: |
| 52 | + source = path.read_text() |
| 53 | + except OSError as e: |
| 54 | + return [f"{path}: could not read file: {e}"] |
| 55 | + |
| 56 | + try: |
| 57 | + tree = ast.parse(source, filename=str(path)) |
| 58 | + except SyntaxError as e: |
| 59 | + return [f"{path}: SyntaxError: {e}"] |
| 60 | + |
| 61 | + module_stem = path.stem # e.g. "module_tests" from "module_tests.py" |
| 62 | + |
| 63 | + for node in ast.walk(tree): |
| 64 | + if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): |
| 65 | + continue |
| 66 | + if node.name != module_stem: |
| 67 | + continue |
| 68 | + docstring = ast.get_docstring(node) or "" |
| 69 | + test_names = collect_test_names(node) |
| 70 | + for test_name, lines in sorted(test_names.items()): |
| 71 | + if test_name not in docstring: |
| 72 | + errors.append( |
| 73 | + f"{path}:{lines[0]}: '{test_name}' used in {node.name}() but not documented in its docstring" |
| 74 | + ) |
| 75 | + break # only one function per file can match the stem |
| 76 | + return errors |
| 77 | + |
| 78 | + |
| 79 | +def main() -> int: |
| 80 | + files = [Path(f) for f in sys.argv[1:] if f.endswith(".py")] |
| 81 | + all_errors: list[str] = [] |
| 82 | + for path in files: |
| 83 | + all_errors.extend(check_file(path)) |
| 84 | + for error in all_errors: |
| 85 | + print(error, file=sys.stderr) |
| 86 | + return 1 if all_errors else 0 |
| 87 | + |
| 88 | + |
| 89 | +if __name__ == "__main__": |
| 90 | + sys.exit(main()) |
0 commit comments