diff --git a/py/README.md b/py/README.md index d9dbf6d9b2..bf86f422eb 100644 --- a/py/README.md +++ b/py/README.md @@ -29,6 +29,7 @@ These packages are specific to Envoy or work to Envoy's specific requirements: - [envoy.distribution.repo](envoy.distribution.repo) - Repository management - [envoy.distribution.verify](envoy.distribution.verify) - Distribution verification - [envoy.docker.utils](envoy.docker.utils) - Docker utilities +- [envoy.docs.literalinclude](envoy.docs.literalinclude) - Tool to check and fix literalinclude line numbers in RST documentation - [envoy.docs.sphinx_runner](envoy.docs.sphinx_runner) - Sphinx documentation runner - [envoy.github.abstract](envoy.github.abstract) - GitHub abstractions - [envoy.github.release](envoy.github.release) - GitHub release management diff --git a/py/envoy.docs.literalinclude/BUILD b/py/envoy.docs.literalinclude/BUILD new file mode 100644 index 0000000000..78fa8823b6 --- /dev/null +++ b/py/envoy.docs.literalinclude/BUILD @@ -0,0 +1 @@ +toolshed_package("envoy.docs.literalinclude") diff --git a/py/envoy.docs.literalinclude/README.rst b/py/envoy.docs.literalinclude/README.rst new file mode 100644 index 0000000000..543bb0f788 --- /dev/null +++ b/py/envoy.docs.literalinclude/README.rst @@ -0,0 +1,103 @@ +Literalinclude Line Number Checker +=================================== + +A tool to automatically detect and fix outdated line numbers in Sphinx ``literalinclude`` directives. + +Problem +------- + +When using ``literalinclude`` directives in Sphinx documentation, you often specify line numbers to include specific portions of code: + +.. code-block:: rst + + .. literalinclude:: /path/to/config.yaml + :lines: 10-25 + :emphasize-lines: 15-18 + +However, when the source files change (lines are added or removed), these line numbers become outdated, potentially showing the wrong code or breaking the documentation build. + +Solution +-------- + +This tool: + +1. Scans RST files for ``literalinclude`` directives with line specifications +2. Uses git history to detect when source files changed after the RST files +3. Identifies directives where line changes affect the specified range +4. Optionally fixes the line numbers automatically + +Installation +------------ + +.. code-block:: bash + + pip install envoy.docs.literalinclude + +Usage +----- + +Check for outdated directives (dry run): + +.. code-block:: bash + + literalinclude-check /path/to/repo + +Check specific directories: + +.. code-block:: bash + + literalinclude-check /path/to/repo --dirs docs api + +Fix outdated directives: + +.. code-block:: bash + + literalinclude-check /path/to/repo --fix + +List all literalinclude directives: + +.. code-block:: bash + + literalinclude-check /path/to/repo --list + +JSON output: + +.. code-block:: bash + + literalinclude-check /path/to/repo --json + +How It Works +------------ + +1. **Discovery**: Finds all RST files in specified directories (default: ``docs`` and ``api``) +2. **Parsing**: Extracts ``literalinclude`` directives with their options: + + - ``:lines:`` - Line ranges to include + - ``:emphasize-lines:`` - Lines to emphasize + - Source file path + +3. **Git Analysis**: For each directive: + + - Checks when the RST file was last modified + - Checks when the source file was last modified + - If source changed after RST, analyzes the diff + +4. **Detection**: Flags directives as outdated if: + + - Lines were added/removed in the source file + - Changes occurred at or before the maximum line number referenced + +5. **Fixing**: Updates the line numbers in RST files to match current source + +Limitations +----------- + +- Requires a git repository +- Best-effort fixing - may not always determine correct new line numbers +- Currently focused on ``:lines:`` and ``:emphasize-lines:`` options +- Does not handle ``:start-after:`` and ``:end-before:`` markers yet + +License +------- + +Apache License 2.0 diff --git a/py/envoy.docs.literalinclude/VERSION b/py/envoy.docs.literalinclude/VERSION new file mode 100644 index 0000000000..6e8bf73aa5 --- /dev/null +++ b/py/envoy.docs.literalinclude/VERSION @@ -0,0 +1 @@ +0.1.0 diff --git a/py/envoy.docs.literalinclude/envoy/__init__.py b/py/envoy.docs.literalinclude/envoy/__init__.py new file mode 100644 index 0000000000..69e3be50da --- /dev/null +++ b/py/envoy.docs.literalinclude/envoy/__init__.py @@ -0,0 +1 @@ +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/py/envoy.docs.literalinclude/envoy/docs/__init__.py b/py/envoy.docs.literalinclude/envoy/docs/__init__.py new file mode 100644 index 0000000000..69e3be50da --- /dev/null +++ b/py/envoy.docs.literalinclude/envoy/docs/__init__.py @@ -0,0 +1 @@ +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/py/envoy.docs.literalinclude/envoy/docs/literalinclude/BUILD b/py/envoy.docs.literalinclude/envoy/docs/literalinclude/BUILD new file mode 100644 index 0000000000..cf9b9e9927 --- /dev/null +++ b/py/envoy.docs.literalinclude/envoy/docs/literalinclude/BUILD @@ -0,0 +1,14 @@ +toolshed_library( + "envoy.docs.literalinclude", + sources=[ + "__init__.py", + "checker.py", + "cmd.py", + "fixer.py", + ], +) + +file( + name="py_typed", + source="py.typed", +) diff --git a/py/envoy.docs.literalinclude/envoy/docs/literalinclude/__init__.py b/py/envoy.docs.literalinclude/envoy/docs/literalinclude/__init__.py new file mode 100644 index 0000000000..3df4b0dfd0 --- /dev/null +++ b/py/envoy.docs.literalinclude/envoy/docs/literalinclude/__init__.py @@ -0,0 +1,18 @@ +"""Literalinclude line number checker and fixer. + +This module provides tools to: +1. Find literalinclude directives in RST files +2. Check if source files have changed since RST files +3. Detect outdated line number specifications +4. Fix the line numbers automatically +""" + +from .checker import LiteralIncludeChecker, LiteralIncludeDirective +from .fixer import LiteralIncludeFixer + + +__all__ = ( + "LiteralIncludeChecker", + "LiteralIncludeDirective", + "LiteralIncludeFixer", +) diff --git a/py/envoy.docs.literalinclude/envoy/docs/literalinclude/checker.py b/py/envoy.docs.literalinclude/envoy/docs/literalinclude/checker.py new file mode 100644 index 0000000000..cb3e7c0384 --- /dev/null +++ b/py/envoy.docs.literalinclude/envoy/docs/literalinclude/checker.py @@ -0,0 +1,374 @@ +"""Checker for literalinclude directives in RST files.""" + +import re +import subprocess +from dataclasses import dataclass +from pathlib import Path +from typing import Optional + + +@dataclass +class LiteralIncludeDirective: + """Represents a literalinclude directive found in an RST file.""" + + rst_file: Path + rst_line_number: int + source_file: Path + lines_spec: Optional[str] = None + emphasize_lines_spec: Optional[str] = None + start_after: Optional[str] = None + end_before: Optional[str] = None + + @property + def has_line_numbers(self) -> bool: + """Check if this directive has line number specifications.""" + return ( + self.lines_spec is not None + or self.emphasize_lines_spec is not None + ) + + @property + def max_line_number(self) -> Optional[int]: + """Get the maximum line number referenced in this directive.""" + max_line = None + + for spec in [self.lines_spec, self.emphasize_lines_spec]: + if spec: + numbers = self._parse_line_spec(spec) + if numbers: + spec_max = max(numbers) + if max_line is None or spec_max > max_line: + max_line = spec_max + + return max_line + + @staticmethod + def _parse_line_spec(spec: str) -> list[int]: + """Parse a line specification like '1-10,15,20-25' into individual line + numbers.""" + lines = [] + parts = spec.split(',') + + for part in parts: + part = part.strip() + if '-' in part: + start, end = part.split('-', 1) + try: + start_num = int(start.strip()) + end_num = int(end.strip()) + lines.extend(range(start_num, end_num + 1)) + except ValueError: + continue + else: + try: + lines.append(int(part)) + except ValueError: + continue + + return lines + + +class LiteralIncludeChecker: + """Checker that finds and validates literalinclude directives.""" + + # Regex patterns for parsing literalinclude directives + LITERALINCLUDE_RE = re.compile( + r'^\.\.\s+literalinclude::\s+(.+?)$', + re.MULTILINE + ) + LINES_RE = re.compile(r'^\s+:lines:\s+(.+?)$', re.MULTILINE) + EMPHASIZE_LINES_RE = re.compile(r'^\s+:emphasize-lines:\s+(.+?)$', re.MULTILINE) # noqa: E501 + START_AFTER_RE = re.compile(r'^\s+:start-after:\s+(.+?)$', re.MULTILINE) + END_BEFORE_RE = re.compile(r'^\s+:end-before:\s+(.+?)$', re.MULTILINE) + + def __init__(self, repo_root: Path): + """Initialize the checker. + + Args: + repo_root: Root directory of the repository + """ + self.repo_root = Path(repo_root).resolve() + + def find_rst_files(self, search_dirs: Optional[list[str]] = None) -> list[Path]: # noqa: E501 + """Find all RST files in specified directories. + + Args: + search_dirs: List of directory names to search (default: ['docs', 'api']) # noqa: E501 + + Returns: + List of Path objects for RST files + """ + if search_dirs is None: + search_dirs = ['docs', 'api'] + + rst_files = [] + for search_dir in search_dirs: + dir_path = self.repo_root / search_dir + if dir_path.exists(): + rst_files.extend(dir_path.rglob('*.rst')) + + return rst_files + + def parse_rst_file(self, rst_file: Path) -> list[LiteralIncludeDirective]: + """Parse an RST file to find literalinclude directives. + + Args: + rst_file: Path to the RST file + + Returns: + List of LiteralIncludeDirective objects + """ + directives = [] + + try: + content = rst_file.read_text() + except (IOError, OSError) as e: + print(f"Warning: Could not read {rst_file}: {e}") + return directives + + lines = content.split('\n') + + for i, line in enumerate(lines, 1): + match = self.LITERALINCLUDE_RE.match(line) + if match: + source_path_str = match.group(1).strip() + + # Resolve the source file path relative to RST file + source_file = self._resolve_source_path(rst_file, source_path_str) # noqa: E501 + + # Look ahead for options + directive = LiteralIncludeDirective( + rst_file=rst_file, + rst_line_number=i, + source_file=source_file + ) + + # Parse options in following lines + j = i + while j < len(lines) and (lines[j].startswith(' :') or lines[j].strip() == ''): # noqa: E501 + option_line = lines[j] + + if match := self.LINES_RE.match(option_line): + directive.lines_spec = match.group(1).strip() + elif match := self.EMPHASIZE_LINES_RE.match(option_line): + directive.emphasize_lines_spec = match.group(1).strip() + elif match := self.START_AFTER_RE.match(option_line): + directive.start_after = match.group(1).strip() + elif match := self.END_BEFORE_RE.match(option_line): + directive.end_before = match.group(1).strip() + + j += 1 + + directives.append(directive) + + return directives + + def _resolve_source_path(self, rst_file: Path, source_path_str: str) -> Path: # noqa: E501 + """Resolve a source file path from a literalinclude directive. + + Args: + rst_file: The RST file containing the directive + source_path_str: The path string from the directive + + Returns: + Resolved Path object + """ + # Handle absolute paths from repo root + if source_path_str.startswith('/'): + return self.repo_root / source_path_str.lstrip('/') + + # Handle relative paths + return (rst_file.parent / source_path_str).resolve() + + def get_file_last_modified(self, file_path: Path) -> Optional[str]: + """Get the last git commit that modified a file. + + Args: + file_path: Path to the file + + Returns: + Commit hash or None if not in git + """ + try: + result = subprocess.run( + ['git', 'log', '-1', '--format=%H', '--', str(file_path)], + cwd=self.repo_root, + capture_output=True, + text=True, + check=True + ) + return result.stdout.strip() + except subprocess.CalledProcessError: + return None + + def get_file_changes_since(self, file_path: Path, since_commit: str) -> Optional[dict]: # noqa: E501 + """Get line changes in a file since a specific commit. + + Args: + file_path: Path to the file + since_commit: Commit hash to compare against + + Returns: + Dictionary with 'added_lines' and 'removed_lines' lists of line numbers, # noqa: E501 + or None if unable to get changes + """ + try: + # Get the diff with line numbers + result = subprocess.run( + ['git', 'diff', '--unified=0', since_commit, 'HEAD', '--', str(file_path)], # noqa: E501 + cwd=self.repo_root, + capture_output=True, + text=True, + check=True + ) + + diff_output = result.stdout + if not diff_output.strip(): + return {'added_lines': [], 'removed_lines': []} + + added_lines = [] + removed_lines = [] + + # Parse unified diff format more accurately + current_old_start = None + current_new_start = None + + for line in diff_output.split('\n'): + if line.startswith('@@'): + # Parse hunk header: @@ -old_start,old_count +new_start,new_count @@ # noqa: E501 + match = re.match(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@', line) # noqa: E501 + if match: + current_old_start = int(match.group(1)) + current_new_start = int(match.group(3)) + elif line.startswith('-') and not line.startswith('---'): + # Line was removed + if current_old_start is not None: + removed_lines.append(current_old_start) + current_old_start += 1 + elif line.startswith('+') and not line.startswith('+++'): + # Line was added + if current_new_start is not None: + added_lines.append(current_new_start) + current_new_start += 1 + elif line.startswith(' '): + # Context line - advance both counters + if current_old_start is not None: + current_old_start += 1 + if current_new_start is not None: + current_new_start += 1 + + return { + 'added_lines': added_lines, + 'removed_lines': removed_lines + } + except subprocess.CalledProcessError: + return None + + def check_directive_outdated(self, directive: LiteralIncludeDirective) -> bool: # noqa: E501 + """Check if a literalinclude directive has outdated line numbers. + + Args: + directive: The directive to check + + Returns: + True if the directive is potentially outdated + """ + # Skip if no line numbers are specified + if not directive.has_line_numbers: + return False + + # Check if files exist + if not directive.rst_file.exists() or not directive.source_file.exists(): # noqa: E501 + return False + + # Get last modification times + rst_commit = self.get_file_last_modified(directive.rst_file) + source_commit = self.get_file_last_modified(directive.source_file) + + if not rst_commit or not source_commit: + return False + + # If source file was modified after RST file, check for changes + try: + # Check if source was modified after RST + result = subprocess.run( + ['git', 'log', '--format=%H', '--', str(directive.source_file)], # noqa: E501 + cwd=self.repo_root, + capture_output=True, + text=True, + check=True + ) + source_commits_str = result.stdout.strip() + if not source_commits_str: + return False + source_commits = source_commits_str.split('\n') + + result = subprocess.run( + ['git', 'log', '--format=%H', '--', str(directive.rst_file)], # noqa: E501 + cwd=self.repo_root, + capture_output=True, + text=True, + check=True + ) + rst_commits_str = result.stdout.strip() + if not rst_commits_str: + return False + rst_commits = rst_commits_str.split('\n') + + # Find the commit where the directive was last modified + # Simple heuristic: if source file has commits after the RST's last commit # noqa: E501 + if source_commits and rst_commits: + rst_last_idx = len(source_commits) # Assume all source commits are after # noqa: E501 + try: + rst_last_idx = source_commits.index(rst_commits[0]) + except ValueError: + pass + + # Source was modified after RST + if rst_last_idx > 0: + # Check what changed + changes = self.get_file_changes_since( + directive.source_file, + rst_commit + ) + + if changes: + max_line = directive.max_line_number + if max_line: + # Check if changes affect lines up to max_line + for line_num in changes['added_lines'] + changes['removed_lines']: # noqa: E501 + if line_num <= max_line: + return True + + return False + except subprocess.CalledProcessError: + return False + + def find_outdated_directives( + self, + search_dirs: Optional[list[str]] = None + ) -> list[tuple[LiteralIncludeDirective, str]]: + """Find all outdated literalinclude directives. + + Args: + search_dirs: List of directory names to search + + Returns: + List of tuples (directive, reason) for outdated directives + """ + outdated = [] + rst_files = self.find_rst_files(search_dirs) + + for rst_file in rst_files: + directives = self.parse_rst_file(rst_file) + + for directive in directives: + if self.check_directive_outdated(directive): + reason = ( + f"Source file '{directive.source_file.name}' was modified " # noqa: E501 + f"after RST file '{directive.rst_file.name}' and changes " # noqa: E501 + f"affect lines <= {directive.max_line_number}" + ) + outdated.append((directive, reason)) + + return outdated diff --git a/py/envoy.docs.literalinclude/envoy/docs/literalinclude/cmd.py b/py/envoy.docs.literalinclude/envoy/docs/literalinclude/cmd.py new file mode 100644 index 0000000000..65a8d2b1e8 --- /dev/null +++ b/py/envoy.docs.literalinclude/envoy/docs/literalinclude/cmd.py @@ -0,0 +1,174 @@ +"""Command-line interface for literalinclude checker/fixer.""" + +import argparse +import json +import sys +from pathlib import Path + +from .checker import LiteralIncludeChecker +from .fixer import LiteralIncludeFixer + + +def main(argv=None): + """Main entry point for the CLI.""" + parser = argparse.ArgumentParser( + description=( + 'Check and fix literalinclude directives ' + 'with outdated line numbers' + ) + ) + parser.add_argument( + 'repo_root', + type=Path, + help='Root directory of the repository' + ) + parser.add_argument( + '--dirs', + nargs='+', + default=['docs', 'api'], + help='Directories to search for RST files (default: docs api)' + ) + parser.add_argument( + '--fix', + action='store_true', + help='Fix outdated line numbers (default: dry-run mode)' + ) + parser.add_argument( + '--json', + action='store_true', + help='Output results in JSON format' + ) + parser.add_argument( + '--list', + action='store_true', + help='List all literalinclude directives (not just outdated ones)' + ) + + args = parser.parse_args(argv) + + # Initialize checker + checker = LiteralIncludeChecker(args.repo_root) + + if args.list: + # List all directives + return list_directives(checker, args) + else: + # Check and optionally fix outdated directives + return check_and_fix(checker, args) + + +def list_directives(checker, args): + """List all literalinclude directives.""" + rst_files = checker.find_rst_files(args.dirs) + + all_directives = [] + for rst_file in rst_files: + directives = checker.parse_rst_file(rst_file) + all_directives.extend(directives) + + if args.json: + output = [] + for directive in all_directives: + output.append({ + 'rst_file': str(directive.rst_file), + 'rst_line': directive.rst_line_number, + 'source_file': str(directive.source_file), + 'lines': directive.lines_spec, + 'emphasize_lines': directive.emphasize_lines_spec, + 'start_after': directive.start_after, + 'end_before': directive.end_before, + }) + print(json.dumps(output, indent=2)) + else: + print(f"Found {len(all_directives)} literalinclude directives:\n") + for directive in all_directives: + print(f" {directive.rst_file}:{directive.rst_line_number}") + print(f" -> {directive.source_file}") + if directive.lines_spec: + print(f" :lines: {directive.lines_spec}") + if directive.emphasize_lines_spec: + print( + f" :emphasize-lines: " + f"{directive.emphasize_lines_spec}" + ) + print() + + return 0 + + +def check_and_fix(checker, args): + """Check for outdated directives and optionally fix them.""" + fixer = LiteralIncludeFixer(checker) + + # Find outdated directives + outdated = checker.find_outdated_directives(args.dirs) + + if not outdated: + if not args.json: + print("✓ No outdated literalinclude directives found!") + else: + print(json.dumps({'status': 'ok', 'outdated_count': 0})) + return 0 + + # Fix them if requested + if args.fix: + stats = fixer.fix_all_outdated(args.dirs, dry_run=False) + + if args.json: + print(json.dumps(stats, indent=2)) + else: + print( + f"Fixed {stats['fixed']} out of " + f"{stats['total_outdated']} outdated directives" + ) + print(f"Failed to fix: {stats['failed']}") + print("\nDetails:") + for detail in stats['details']: + status_symbol = ( + '✓' if detail['status'] == 'fixed' else '✗' + ) + print( + f" {status_symbol} {detail['file']}:" + f"{detail['line']}" + ) + print(f" {detail['reason']}") + + return 1 if stats['failed'] > 0 else 0 + else: + # Dry run - just report + if args.json: + output = [] + for directive, reason in outdated: + output.append({ + 'rst_file': str(directive.rst_file), + 'rst_line': directive.rst_line_number, + 'source_file': str(directive.source_file), + 'reason': reason, + }) + print(json.dumps({ + 'status': 'outdated_found', + 'count': len(outdated), + 'directives': output + }, indent=2)) + else: + print( + f"Found {len(outdated)} outdated literalinclude " + f"directives:\n" + ) + for directive, reason in outdated: + print( + f" {directive.rst_file}:{directive.rst_line_number}" + ) + print(f" -> {directive.source_file}") + if directive.lines_spec: + print(f" :lines: {directive.lines_spec}") + print(f" Reason: {reason}") + print() + + print("Run with --fix to automatically update line numbers") + + return 1 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/py/envoy.docs.literalinclude/envoy/docs/literalinclude/fixer.py b/py/envoy.docs.literalinclude/envoy/docs/literalinclude/fixer.py new file mode 100644 index 0000000000..f632d7b480 --- /dev/null +++ b/py/envoy.docs.literalinclude/envoy/docs/literalinclude/fixer.py @@ -0,0 +1,217 @@ +"""Fixer for literalinclude line number issues.""" + +import re +from typing import Optional + +from .checker import LiteralIncludeChecker, LiteralIncludeDirective + + +class LiteralIncludeFixer: + """Fixer that updates line numbers in literalinclude directives.""" + + def __init__(self, checker: LiteralIncludeChecker): + """Initialize the fixer. + + Args: + checker: LiteralIncludeChecker instance + """ + self.checker = checker + + def calculate_correct_lines( + self, + directive: LiteralIncludeDirective + ) -> Optional[dict[str, str]]: + """Calculate the correct line numbers for a directive. + + This attempts to determine what the line numbers should be based + on the current state of the source file. + + Args: + directive: The directive to fix + + Returns: + Dictionary with 'lines' and/or 'emphasize_lines' keys with + corrected specs, or None if unable to determine correct lines + """ + if not directive.source_file.exists(): + return None + + # For now, this is a placeholder that returns the original specs + # A more sophisticated implementation would: + # 1. Look at the git history to see what changed + # 2. Try to identify the original snippet by content matching + # 3. Update the line numbers to match the new location + + # Simple approach: count lines in the source file and validate + try: + with open(directive.source_file, 'r') as f: + total_lines = sum(1 for _ in f) + + result = {} + + # Check if current line specs are valid + max_line = directive.max_line_number + if max_line and max_line > total_lines: + # Line numbers exceed file length, needs fixing + # For now, just cap it to the file length + if directive.lines_spec: + result['lines'] = self._adjust_line_spec( + directive.lines_spec, + total_lines + ) + if directive.emphasize_lines_spec: + result['emphasize_lines'] = self._adjust_line_spec( + directive.emphasize_lines_spec, + total_lines + ) + + return result if result else None + except (IOError, OSError): + return None + + @staticmethod + def _adjust_line_spec(spec: str, max_line: int) -> str: + """Adjust a line specification to not exceed max_line. + + Args: + spec: Line specification like '1-10,15,20-25' + max_line: Maximum valid line number + + Returns: + Adjusted line specification + """ + parts = spec.split(',') + adjusted_parts = [] + + for part in parts: + part = part.strip() + if '-' in part: + start, end = part.split('-', 1) + try: + start_num = int(start.strip()) + end_num = int(end.strip()) + + if start_num > max_line: + continue # Skip this range entirely + + end_num = min(end_num, max_line) + adjusted_parts.append(f"{start_num}-{end_num}") + except ValueError: + adjusted_parts.append(part) + else: + try: + line_num = int(part) + if line_num <= max_line: + adjusted_parts.append(part) + except ValueError: + adjusted_parts.append(part) + + return ','.join(adjusted_parts) if adjusted_parts else '1' + + def fix_directive( + self, + directive: LiteralIncludeDirective, + dry_run: bool = True + ) -> bool: + """Fix a literalinclude directive with outdated line numbers. + + Args: + directive: The directive to fix + dry_run: If True, don't actually modify files + + Returns: + True if the directive was fixed (or would be fixed in dry run mode) + """ + corrections = self.calculate_correct_lines(directive) + + if not corrections: + return False + + try: + content = directive.rst_file.read_text() + lines = content.split('\n') + + # Find the directive and update it + modified = False + for i in range( + directive.rst_line_number - 1, + min(directive.rst_line_number + 20, len(lines)) + ): + line = lines[i] + + if 'lines' in corrections and ':lines:' in line: + old_match = re.match( + r'(\s+):lines:\s+(.+?)$', line + ) + if old_match: + indent = old_match.group(1) + lines[i] = ( + f"{indent}:lines: {corrections['lines']}" + ) + modified = True + + if ( + 'emphasize_lines' in corrections + and ':emphasize-lines:' in line + ): + old_match = re.match( + r'(\s+):emphasize-lines:\s+(.+?)$', line + ) + if old_match: + indent = old_match.group(1) + lines[i] = ( + f"{indent}:emphasize-lines: " + f"{corrections['emphasize_lines']}" + ) + modified = True + + if modified and not dry_run: + directive.rst_file.write_text('\n'.join(lines)) + + return modified + except (IOError, OSError) as e: + print(f"Error fixing {directive.rst_file}: {e}") + return False + + def fix_all_outdated( + self, + search_dirs: Optional[list[str]] = None, + dry_run: bool = True + ) -> dict: + """Fix all outdated literalinclude directives. + + Args: + search_dirs: List of directory names to search + dry_run: If True, don't actually modify files + + Returns: + Dictionary with statistics about fixes + """ + outdated = self.checker.find_outdated_directives(search_dirs) + + stats = { + 'total_outdated': len(outdated), + 'fixed': 0, + 'failed': 0, + 'details': [] + } + + for directive, reason in outdated: + if self.fix_directive(directive, dry_run=dry_run): + stats['fixed'] += 1 + stats['details'].append({ + 'file': str(directive.rst_file), + 'line': directive.rst_line_number, + 'status': 'fixed' if not dry_run else 'would_fix', + 'reason': reason + }) + else: + stats['failed'] += 1 + stats['details'].append({ + 'file': str(directive.rst_file), + 'line': directive.rst_line_number, + 'status': 'failed', + 'reason': reason + }) + + return stats diff --git a/py/envoy.docs.literalinclude/envoy/docs/literalinclude/py.typed b/py/envoy.docs.literalinclude/envoy/docs/literalinclude/py.typed new file mode 100644 index 0000000000..e69de29bb2 diff --git a/py/envoy.docs.literalinclude/setup.cfg b/py/envoy.docs.literalinclude/setup.cfg new file mode 100644 index 0000000000..17fa04d6f6 --- /dev/null +++ b/py/envoy.docs.literalinclude/setup.cfg @@ -0,0 +1,36 @@ +[metadata] +name = envoy.docs.literalinclude +version = file: VERSION +author = Envoy Maintainers +author_email = envoy-maintainers@googlegroups.com +maintainer = Envoy Maintainers +maintainer_email = envoy-maintainers@googlegroups.com +license = Apache Software License 2.0 +url = https://github.com/envoyproxy/toolshed/tree/main/envoy.docs.literalinclude +description = Tool to check and fix literalinclude line numbers in RST documentation +long_description = file: README.rst +classifiers = + Development Status :: 4 - Beta + Intended Audience :: Developers + Topic :: Software Development :: Documentation + Topic :: Software Development :: Quality Assurance + Programming Language :: Python + Programming Language :: Python :: 3 + Programming Language :: Python :: 3.12 + Programming Language :: Python :: 3 :: Only + Programming Language :: Python :: Implementation :: CPython + Operating System :: OS Independent + License :: OSI Approved :: Apache Software License + +[options] +python_requires = >=3.12 +packages = find_namespace: +install_requires = + +[options.packages.find] +exclude = + tests + +[options.entry_points] +console_scripts = + literalinclude-check = envoy.docs.literalinclude.cmd:main diff --git a/py/envoy.docs.literalinclude/setup.py b/py/envoy.docs.literalinclude/setup.py new file mode 100644 index 0000000000..1f6a64b9cf --- /dev/null +++ b/py/envoy.docs.literalinclude/setup.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python + +from setuptools import setup # type:ignore + +setup() diff --git a/py/envoy.docs.literalinclude/tests/BUILD b/py/envoy.docs.literalinclude/tests/BUILD new file mode 100644 index 0000000000..39ad3103e2 --- /dev/null +++ b/py/envoy.docs.literalinclude/tests/BUILD @@ -0,0 +1,7 @@ +toolshed_tests( + "envoy.docs.literalinclude", + sources=[ + "test_checker.py", + "test_fixer.py", + ], +) diff --git a/py/envoy.docs.literalinclude/tests/test_checker.py b/py/envoy.docs.literalinclude/tests/test_checker.py new file mode 100644 index 0000000000..2d966e31d1 --- /dev/null +++ b/py/envoy.docs.literalinclude/tests/test_checker.py @@ -0,0 +1,142 @@ +"""Tests for literalinclude checker.""" + +import tempfile +from pathlib import Path + +from envoy.docs.literalinclude import checker + + +def test_literal_include_directive_parse_line_spec(): + """Test parsing line specifications.""" + # Simple range + lines = checker.LiteralIncludeDirective._parse_line_spec("1-10") + assert lines == list(range(1, 11)) + + # Multiple ranges and single lines + lines = checker.LiteralIncludeDirective._parse_line_spec("1-5,10,15-20") + assert 1 in lines + assert 5 in lines + assert 10 in lines + assert 15 in lines + assert 20 in lines + assert 11 not in lines + + # Single line + lines = checker.LiteralIncludeDirective._parse_line_spec("42") + assert lines == [42] + + +def test_literal_include_directive_max_line_number(): + """Test getting maximum line number from directive.""" + directive = checker.LiteralIncludeDirective( + rst_file=Path("test.rst"), + rst_line_number=1, + source_file=Path("source.yaml"), + lines_spec="1-10,15,20-25" + ) + assert directive.max_line_number == 25 + + directive = checker.LiteralIncludeDirective( + rst_file=Path("test.rst"), + rst_line_number=1, + source_file=Path("source.yaml"), + emphasize_lines_spec="1-5" + ) + assert directive.max_line_number == 5 + + directive = checker.LiteralIncludeDirective( + rst_file=Path("test.rst"), + rst_line_number=1, + source_file=Path("source.yaml") + ) + assert directive.max_line_number is None + + +def test_literal_include_checker_parse_rst(): + """Test parsing RST files for literalinclude directives.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + + # Create test RST file + rst_content = """ +Test Documentation +================== + +.. literalinclude:: /config/example.yaml + :lines: 1-10 + :emphasize-lines: 5-7 + +Some text. + +.. literalinclude:: ../other/file.yaml + :lines: 20-30 +""" + rst_file = tmpdir / "test.rst" + rst_file.write_text(rst_content) + + # Create source files + (tmpdir / "config").mkdir() + (tmpdir / "config" / "example.yaml").write_text("content\n" * 50) + (tmpdir / "other").mkdir() + (tmpdir / "other" / "file.yaml").write_text("content\n" * 50) + + # Parse + checker_obj = checker.LiteralIncludeChecker(tmpdir) + directives = checker_obj.parse_rst_file(rst_file) + + assert len(directives) == 2 + assert directives[0].lines_spec == "1-10" + assert directives[0].emphasize_lines_spec == "5-7" + assert directives[0].max_line_number == 10 + assert directives[1].lines_spec == "20-30" + assert directives[1].max_line_number == 30 + + +def test_literal_include_checker_find_rst_files(): + """Test finding RST files in directories.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + + # Create directory structure + docs = tmpdir / "docs" + docs.mkdir() + (docs / "test1.rst").write_text("test") + (docs / "subdir").mkdir() + (docs / "subdir" / "test2.rst").write_text("test") + + api = tmpdir / "api" + api.mkdir() + (api / "test3.rst").write_text("test") + + # Find files + checker_obj = checker.LiteralIncludeChecker(tmpdir) + rst_files = checker_obj.find_rst_files() + + assert len(rst_files) == 3 + assert any("test1.rst" in str(f) for f in rst_files) + assert any("test2.rst" in str(f) for f in rst_files) + assert any("test3.rst" in str(f) for f in rst_files) + + +def test_literal_include_checker_resolve_paths(): + """Test resolving source paths from RST files.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + + rst_file = tmpdir / "docs" / "guide.rst" + rst_file.parent.mkdir(parents=True) + + checker_obj = checker.LiteralIncludeChecker(tmpdir) + + # Absolute path from root + resolved = checker_obj._resolve_source_path( + rst_file, "/config/example.yaml" + ) + assert resolved == tmpdir / "config" / "example.yaml" + + # Relative path + resolved = checker_obj._resolve_source_path( + rst_file, "../config/example.yaml" + ) + assert resolved.name == "example.yaml" + assert "config" in str(resolved) diff --git a/py/envoy.docs.literalinclude/tests/test_fixer.py b/py/envoy.docs.literalinclude/tests/test_fixer.py new file mode 100644 index 0000000000..d6e7abc148 --- /dev/null +++ b/py/envoy.docs.literalinclude/tests/test_fixer.py @@ -0,0 +1,104 @@ +"""Tests for literalinclude fixer.""" + +import tempfile +from pathlib import Path + +from envoy.docs.literalinclude import checker, fixer + + +def test_fixer_adjust_line_spec(): + """Test adjusting line specifications to not exceed max line.""" + # Cap range that exceeds max + adjusted = fixer.LiteralIncludeFixer._adjust_line_spec("1-100", 50) + assert adjusted == "1-50" + + # Remove ranges that entirely exceed max + adjusted = fixer.LiteralIncludeFixer._adjust_line_spec("60-100", 50) + assert adjusted == "1" # fallback + + # Mixed ranges and single lines + adjusted = fixer.LiteralIncludeFixer._adjust_line_spec("1-10,55,60-70", 50) + assert "1-10" in adjusted + assert "55" not in adjusted + assert "60-70" not in adjusted + + +def test_fixer_calculate_correct_lines(): + """Test calculating correct line numbers.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + + # Create source file with 30 lines + source = tmpdir / "source.yaml" + source.write_text("line\n" * 30) + + rst = tmpdir / "test.rst" + rst.write_text("test") + + # Directive that exceeds file length + directive = checker.LiteralIncludeDirective( + rst_file=rst, + rst_line_number=1, + source_file=source, + lines_spec="1-50" + ) + + checker_obj = checker.LiteralIncludeChecker(tmpdir) + fixer_obj = fixer.LiteralIncludeFixer(checker_obj) + + corrections = fixer_obj.calculate_correct_lines(directive) + + assert corrections is not None + assert 'lines' in corrections + # Should be capped to 30 + assert "30" in corrections['lines'] + + +def test_fixer_fix_directive(): + """Test fixing a directive in an RST file.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + + # Create source file + source = tmpdir / "source.yaml" + source.write_text("line\n" * 20) + + # Create RST file with outdated line numbers + rst = tmpdir / "test.rst" + rst_content = """ +Documentation +============= + +.. literalinclude:: source.yaml + :lines: 1-50 + :emphasize-lines: 40-45 +""" + rst.write_text(rst_content) + + # Create directive + directive = checker.LiteralIncludeDirective( + rst_file=rst, + rst_line_number=5, + source_file=source, + lines_spec="1-50", + emphasize_lines_spec="40-45" + ) + + checker_obj = checker.LiteralIncludeChecker(tmpdir) + fixer_obj = fixer.LiteralIncludeFixer(checker_obj) + + # Fix in dry-run mode first + result = fixer_obj.fix_directive(directive, dry_run=True) + assert result is True + + # File should not be modified yet + assert "1-50" in rst.read_text() + + # Actually fix + result = fixer_obj.fix_directive(directive, dry_run=False) + assert result is True + + # File should be modified + fixed_content = rst.read_text() + assert "1-50" not in fixed_content + assert "1-20" in fixed_content # Capped to actual file length