|
| 1 | +"""Trim Galore Oxidized Edition (Rust rewrite). CLI is a near-superset of |
| 2 | +the Perl trim_galore: same `--paired`, `--adapter`, `--adapter2`, |
| 3 | +`--stringency`, `--quality`, `--length`, `--output_dir`, `--cores`, |
| 4 | +`--gzip`. Output filenames also follow the Perl convention |
| 5 | +(`<basename>_val_1.fq.gz` for paired, `<basename>_trimmed.fq.gz` for SE), |
| 6 | +so the moves table reuses the same logic as the Perl render. |
| 7 | +
|
| 8 | +Difference from the Perl render: no `--cores` cap at 4. The Rust binary |
| 9 | +uses an in-process thread pool rather than the Perl version's multi- |
| 10 | +process spawn, so it scales cleanly past 4 threads.""" |
| 11 | + |
| 12 | +from pathlib import Path |
| 13 | + |
| 14 | + |
| 15 | +def render(ctx: dict) -> dict: |
| 16 | + cfg = ctx["trim_cfg"] |
| 17 | + workdir = Path(ctx["workdir"]) |
| 18 | + |
| 19 | + stringency = str(cfg.get("min_adapter_overlap", 5)) |
| 20 | + argv = ["trim_galore", |
| 21 | + "--cores", str(ctx["threads"]), |
| 22 | + "--adapter", ctx["adapter_r1"], |
| 23 | + "--stringency", stringency, |
| 24 | + "--gzip", |
| 25 | + "--output_dir", str(workdir)] |
| 26 | + |
| 27 | + if ctx["paired"]: |
| 28 | + argv += ["--paired", "--adapter2", ctx["adapter_r2"]] |
| 29 | + |
| 30 | + argv += ["--quality", str(cfg.get("quality_threshold", 0)) if cfg.get("quality_trim") else "0"] |
| 31 | + argv += ["--length", str(cfg["min_length"]) if cfg.get("min_length", 0) > 0 else "1"] |
| 32 | + |
| 33 | + argv.append(ctx["input_r1"]) |
| 34 | + if ctx["paired"]: |
| 35 | + argv.append(ctx["input_r2"]) |
| 36 | + |
| 37 | + def basename_no_fqgz(path: str) -> str: |
| 38 | + n = Path(path).name |
| 39 | + for suffix in (".fastq.gz", ".fq.gz", ".fastq", ".fq"): |
| 40 | + if n.endswith(suffix): |
| 41 | + return n[: -len(suffix)] |
| 42 | + return Path(path).stem |
| 43 | + |
| 44 | + moves: dict[str, str] = {} |
| 45 | + if ctx["paired"]: |
| 46 | + b1 = basename_no_fqgz(ctx["input_r1"]) |
| 47 | + b2 = basename_no_fqgz(ctx["input_r2"]) |
| 48 | + moves[str(workdir / f"{b1}_val_1.fq.gz")] = ctx["output_r1"] |
| 49 | + moves[str(workdir / f"{b2}_val_2.fq.gz")] = ctx["output_r2"] |
| 50 | + else: |
| 51 | + b1 = basename_no_fqgz(ctx["input_r1"]) |
| 52 | + moves[str(workdir / f"{b1}_trimmed.fq.gz")] = ctx["output_r1"] |
| 53 | + |
| 54 | + return {"argv": argv, "moves": moves} |
0 commit comments