|
| 1 | +"""Pipeline engine: runs a chain of Transforms sequentially. |
| 2 | +
|
| 3 | +Part of claw-compactor. License: MIT. |
| 4 | +""" |
| 5 | +from __future__ import annotations |
| 6 | +import logging |
| 7 | +from dataclasses import dataclass, field |
| 8 | +from lib.transforms.base import Transform, CompressContext, TransformResult |
| 9 | + |
| 10 | +logger = logging.getLogger(__name__) |
| 11 | + |
| 12 | + |
| 13 | +@dataclass(frozen=True) |
| 14 | +class StepResult: |
| 15 | + """Result from a single pipeline step.""" |
| 16 | + transform_name: str |
| 17 | + result: TransformResult |
| 18 | + |
| 19 | + |
| 20 | +@dataclass(frozen=True) |
| 21 | +class PipelineResult: |
| 22 | + """Aggregated result from running all transforms.""" |
| 23 | + content: str |
| 24 | + steps: list[StepResult] = field(default_factory=list) |
| 25 | + total_timing_ms: float = 0.0 |
| 26 | + markers: list[str] = field(default_factory=list) |
| 27 | + warnings: list[str] = field(default_factory=list) |
| 28 | + |
| 29 | + |
| 30 | +class Pipeline: |
| 31 | + """Ordered chain of Transforms.""" |
| 32 | + |
| 33 | + def __init__(self, transforms: list[Transform] | None = None): |
| 34 | + self._transforms: list[Transform] = sorted( |
| 35 | + transforms or [], key=lambda t: t.order |
| 36 | + ) |
| 37 | + |
| 38 | + def add(self, transform: Transform) -> Pipeline: |
| 39 | + """Return a new Pipeline with the transform added (immutable).""" |
| 40 | + new_transforms = sorted( |
| 41 | + [*self._transforms, transform], key=lambda t: t.order |
| 42 | + ) |
| 43 | + return Pipeline(new_transforms) |
| 44 | + |
| 45 | + @property |
| 46 | + def transforms(self) -> list[Transform]: |
| 47 | + return list(self._transforms) |
| 48 | + |
| 49 | + def run(self, ctx: CompressContext) -> PipelineResult: |
| 50 | + """Run all transforms sequentially. Each transform's output feeds the next.""" |
| 51 | + steps: list[StepResult] = [] |
| 52 | + all_markers: list[str] = [] |
| 53 | + all_warnings: list[str] = [] |
| 54 | + total_ms = 0.0 |
| 55 | + current_ctx = ctx |
| 56 | + |
| 57 | + for transform in self._transforms: |
| 58 | + result = transform.timed_apply(current_ctx) |
| 59 | + steps.append(StepResult( |
| 60 | + transform_name=transform.name, |
| 61 | + result=result, |
| 62 | + )) |
| 63 | + total_ms += result.timing_ms |
| 64 | + |
| 65 | + if not result.skipped: |
| 66 | + current_ctx = current_ctx.evolve(content=result.content) |
| 67 | + all_markers.extend(result.markers) |
| 68 | + all_warnings.extend(result.warnings) |
| 69 | + logger.debug( |
| 70 | + "%s: %d→%d tokens (%.1fms)", |
| 71 | + transform.name, |
| 72 | + result.original_tokens, |
| 73 | + result.compressed_tokens, |
| 74 | + result.timing_ms, |
| 75 | + ) |
| 76 | + else: |
| 77 | + logger.debug("%s: skipped", transform.name) |
| 78 | + |
| 79 | + return PipelineResult( |
| 80 | + content=current_ctx.content, |
| 81 | + steps=steps, |
| 82 | + total_timing_ms=total_ms, |
| 83 | + markers=all_markers, |
| 84 | + warnings=all_warnings, |
| 85 | + ) |
0 commit comments