From b5181a5263e85bdb60f038e3af164430ae7c889b Mon Sep 17 00:00:00 2001
From: Arthur PERRIN <arthur.perrin@telecom-sudparis.eu>
Date: Fri, 8 May 2026 17:50:41 +0200
Subject: [PATCH 01/24] feat(tui): add interactive Terminal UI for mmore
 commands
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a `mmore tui` command that launches an interactive terminal interface
built with `questionary` and `rich`. Lets users pick a pipeline command
(process / postprocess / index / rag), reuse or build a config interactively,
and run the full pipeline with progress feedback — without having to write
YAML configs by hand.

- New `src/mmore/tui/` module (app, commands registry, config builder,
  pipeline runner, theme).
- Wires up `tui` as a top-level Click command in `cli.py`.
- Adds `questionary>=2.0` and `rich>=13` to core dependencies.
---
 pyproject.toml                  |   4 +-
 src/mmore/cli.py                |   8 +
 src/mmore/tui/__init__.py       |   3 +
 src/mmore/tui/app.py            | 121 ++++++++++++
 src/mmore/tui/commands.py       | 158 ++++++++++++++++
 src/mmore/tui/config_builder.py | 314 ++++++++++++++++++++++++++++++++
 src/mmore/tui/pipeline.py       | 103 +++++++++++
 src/mmore/tui/theme.py          |  68 +++++++
 uv.lock                         |  28 +++
 9 files changed, 806 insertions(+), 1 deletion(-)
 create mode 100644 src/mmore/tui/__init__.py
 create mode 100644 src/mmore/tui/app.py
 create mode 100644 src/mmore/tui/commands.py
 create mode 100644 src/mmore/tui/config_builder.py
 create mode 100644 src/mmore/tui/pipeline.py
 create mode 100644 src/mmore/tui/theme.py

diff --git a/pyproject.toml b/pyproject.toml
index a6f63a51..b9428fa9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,7 +40,9 @@ dependencies = [
     "python-dotenv>=1.0",
     "typing_extensions>=4.15.0,<5.0",
     "PyYAML>=6.0",
-    "setuptools<81"
+    "setuptools<81",
+    "questionary>=2.0",
+    "rich>=13"
 ]
 
 [project.optional-dependencies]
diff --git a/src/mmore/cli.py b/src/mmore/cli.py
index ad952f58..7e8e2af2 100644
--- a/src/mmore/cli.py
+++ b/src/mmore/cli.py
@@ -265,6 +265,14 @@ def ragcli(config_file: str):
     my_rag_cli.launch_cli()
 
 
+@main.command()
+def tui():
+    """Launch the interactive Terminal UI."""
+    from .tui import run
+
+    run()
+
+
 @main.group()
 def colpali():
     """ColPali pipeline commands for PDF processing, indexing, and retrieval."""
diff --git a/src/mmore/tui/__init__.py b/src/mmore/tui/__init__.py
new file mode 100644
index 00000000..3004c7fb
--- /dev/null
+++ b/src/mmore/tui/__init__.py
@@ -0,0 +1,3 @@
+from mmore.tui.app import run
+
+__all__ = ["run"]
diff --git a/src/mmore/tui/app.py b/src/mmore/tui/app.py
new file mode 100644
index 00000000..9feddf10
--- /dev/null
+++ b/src/mmore/tui/app.py
@@ -0,0 +1,121 @@
+"""mmore TUI entry point."""
+from __future__ import annotations
+
+import time
+
+import questionary
+from questionary import Style
+from rich.spinner import Spinner
+from rich.live import Live
+from rich.text import Text
+
+from mmore.tui.commands import REGISTRY
+from mmore.tui.config_builder import pick_or_build_config
+from mmore.tui.pipeline import run_full_pipeline
+from mmore.tui.theme import ACCENT, ACCENT2, MUTED, OK, console, section, show_banner
+
+QSTYLE = Style([
+    ("qmark", "fg:#5fd7ff bold"),
+    ("question", "bold"),
+    ("answer", "fg:#ff5fd7 bold"),
+    ("pointer", "fg:#5fd7ff bold"),
+    ("highlighted", "fg:#5fd7ff bold"),
+    ("selected", "fg:#ff5fd7"),
+    ("instruction", "fg:#808080 italic"),
+])
+
+
+def _run_with_spinner(label: str, fn, **kwargs) -> None:
+    start = time.time()
+    spinner = Spinner("dots", text=Text(f"  {label}…", style=ACCENT))
+    with Live(spinner, console=console, refresh_per_second=12, transient=True):
+        fn(**kwargs)
+    console.print(
+        f"  [{OK}]✓[/] {label} [dim]({time.time() - start:.1f}s)[/dim]"
+    )
+
+
+def _run_single_command() -> None:
+    choices = [
+        questionary.Choice(f"{spec.name:<12} — {spec.description}", value=spec.name)
+        for spec in REGISTRY.values()
+    ]
+    name = questionary.select(
+        "Pick a command", choices=choices, style=QSTYLE, qmark="▸",
+    ).ask()
+    if name is None:
+        return
+    spec = REGISTRY[name]
+    config_file = pick_or_build_config(spec)
+    kwargs = {"config_file": config_file}
+    if spec.needs_input_data:
+        input_data = questionary.text(
+            "Input JSONL path",
+            default="examples/process/outputs/merged/merged_results.jsonl",
+            style=QSTYLE, qmark="▸",
+        ).ask()
+        if input_data is None:
+            return
+        kwargs["input_data"] = input_data
+
+    console.print()
+    console.print(section(
+        f"Running {name}",
+        Text(f"config: {config_file}", style=MUTED),
+        style=ACCENT2,
+    ))
+    interactive = name in {"ragcli", "retrieve", "rag"}
+    if interactive:
+        spec.run(**kwargs)
+    else:
+        _run_with_spinner(spec.description, spec.run, **kwargs)
+    console.print(f"[{OK}]✓ {name} finished[/]")
+
+
+def _chat_only() -> None:
+    config_file = pick_or_build_config(REGISTRY["ragcli"])
+    console.print()
+    console.print(section("RAG chat", Text(f"config: {config_file}", style=MUTED)))
+    REGISTRY["ragcli"].run(config_file=config_file)
+
+
+def _main_menu() -> str | None:
+    return questionary.select(
+        "What do you want to do?",
+        choices=[
+            questionary.Choice("⚙  Run a single command", value="single"),
+            questionary.Choice(
+                "🚀 Run full pipeline  (process → postprocess → index)",
+                value="pipeline",
+            ),
+            questionary.Choice("💬 Chat with indexed documents", value="chat"),
+            questionary.Separator(),
+            questionary.Choice("✕  Quit", value="quit"),
+        ],
+        style=QSTYLE,
+        qmark="▸",
+    ).ask()
+
+
+def run() -> None:
+    console.clear()
+    show_banner("interactive launcher")
+    while True:
+        try:
+            mode = _main_menu()
+            if mode in (None, "quit"):
+                console.print(f"[{ACCENT}]bye![/]")
+                return
+            if mode == "single":
+                _run_single_command()
+            elif mode == "pipeline":
+                run_full_pipeline()
+            elif mode == "chat":
+                _chat_only()
+        except KeyboardInterrupt:
+            console.print(f"\n[{ACCENT2}]interrupted.[/]")
+            return
+        except Exception as e:  # noqa: BLE001
+            console.print(f"[bold red]error:[/] {e}")
+            if not questionary.confirm("Continue?", default=True, style=QSTYLE).ask():
+                return
diff --git a/src/mmore/tui/commands.py b/src/mmore/tui/commands.py
new file mode 100644
index 00000000..fae5e67f
--- /dev/null
+++ b/src/mmore/tui/commands.py
@@ -0,0 +1,158 @@
+"""Registry of mmore commands callable from the TUI.
+
+Each entry mirrors a Click command in `mmore.cli` so the TUI is a thin wrapper:
+the `run` callable is the same `run_*` function the CLI uses.
+"""
+from dataclasses import dataclass, field
+from typing import Any, Callable, Optional
+
+
+@dataclass
+class CommandSpec:
+    name: str
+    description: str
+    example_config: Optional[str]
+    run: Callable[..., None]
+    needs_input_data: bool = False
+    config_globs: list[str] = field(default_factory=list)
+    # Lazy importer returning the dataclass to validate YAML against.
+    # Returns None if no validation is wired up for this stage.
+    config_dataclass: Optional[Callable[[], Any]] = None
+
+
+def _process(config_file: str, **_):
+    from mmore.run_process import process
+    process(config_file)
+
+
+def _postprocess(config_file: str, input_data: str, **_):
+    from mmore.run_postprocess import postprocess
+    postprocess(config_file, input_data)
+
+
+def _index(config_file: str, documents_path: Optional[str] = None,
+           collection_name: Optional[str] = None, **_):
+    from mmore.run_index import index
+    index(config_file, documents_path, collection_name)
+
+
+def _retrieve(config_file: str, **_):
+    from mmore.run_retriever import run_api
+    run_api(config_file, "0.0.0.0", 8001)
+
+
+def _rag(config_file: str, **_):
+    from mmore.run_rag import rag
+    rag(config_file)
+
+
+def _ragcli(config_file: str, **_):
+    from mmore.run_ragcli import RagCLI
+    RagCLI(config_file).launch_cli()
+
+
+def _websearch(config_file: str, **_):
+    from mmore.run_websearch import run_websearch
+    run_websearch(config_file)
+
+
+# Lazy dataclass importers — keeps heavy deps out of TUI startup.
+def _dc_process():
+    from mmore.run_process import ProcessInference
+    return ProcessInference
+
+
+def _dc_postprocess():
+    from mmore.process.post_processor.pipeline import PPPipelineConfig
+    return PPPipelineConfig
+
+
+def _dc_index():
+    from mmore.run_index import IndexConfig
+    return IndexConfig
+
+
+def _dc_rag():
+    from mmore.run_rag import RAGInferenceConfig
+    return RAGInferenceConfig
+
+
+REGISTRY: dict[str, CommandSpec] = {
+    "process": CommandSpec(
+        name="process",
+        description="Crawl + extract documents into a JSONL",
+        example_config="examples/process/config.yaml",
+        run=_process,
+        config_globs=[
+            "examples/process/**/*.yaml",
+            "examples/process/**/*.yml",
+        ],
+        config_dataclass=_dc_process,
+    ),
+    "postprocess": CommandSpec(
+        name="postprocess",
+        description="Chunk / clean processed documents",
+        example_config="examples/postprocessor/config.yaml",
+        run=_postprocess,
+        needs_input_data=True,
+        config_globs=[
+            "examples/postprocessor/**/*.yaml",
+            "examples/postprocessor/**/*.yml",
+        ],
+        config_dataclass=_dc_postprocess,
+    ),
+    "index": CommandSpec(
+        name="index",
+        description="Embed + store documents in Milvus",
+        example_config="examples/index/config.yaml",
+        run=_index,
+        config_globs=[
+            "examples/index/**/*.yaml",
+            "examples/index/**/*.yml",
+        ],
+        config_dataclass=_dc_index,
+    ),
+    "retrieve": CommandSpec(
+        name="retrieve",
+        description="Run retriever API server",
+        example_config="examples/rag/config.yaml",
+        run=_retrieve,
+        config_globs=[
+            "examples/rag/**/*.yaml",
+            "examples/rag/**/*.yml",
+        ],
+        config_dataclass=_dc_rag,
+    ),
+    "rag": CommandSpec(
+        name="rag",
+        description="Run a one-shot RAG pipeline",
+        example_config="examples/rag/config.yaml",
+        run=_rag,
+        config_globs=[
+            "examples/rag/**/*.yaml",
+            "examples/rag/**/*.yml",
+        ],
+        config_dataclass=_dc_rag,
+    ),
+    "ragcli": CommandSpec(
+        name="ragcli",
+        description="Interactive RAG chat",
+        example_config="examples/rag/config.yaml",
+        run=_ragcli,
+        config_globs=[
+            "examples/rag/**/*.yaml",
+            "examples/rag/**/*.yml",
+        ],
+        config_dataclass=_dc_rag,
+    ),
+    "websearch": CommandSpec(
+        name="websearch",
+        description="Web search (+ optional RAG)",
+        example_config="examples/websearchRAG/config.yaml",
+        run=_websearch,
+        config_globs=[
+            "examples/websearchRAG/**/*.yaml",
+            "examples/websearchRAG/**/*.yml",
+        ],
+    ),
+}
diff --git a/src/mmore/tui/config_builder.py b/src/mmore/tui/config_builder.py
new file mode 100644
index 00000000..f91cc2fe
--- /dev/null
+++ b/src/mmore/tui/config_builder.py
@@ -0,0 +1,314 @@
+"""Generate YAML config files via guided prompts.
+
+Templates here mirror the example configs under `examples/`. The user is
+asked only for the fields most likely to change between runs; everything else
+falls back to the example defaults. The resulting dict is dumped to a YAML
+file under `./tui-configs/`.
+"""
+from __future__ import annotations
+
+import os
+import time
+from pathlib import Path
+from typing import Any, Optional
+
+import questionary
+import yaml
+from questionary import Style
+from rich.panel import Panel
+from rich.text import Text
+
+from mmore.tui.commands import CommandSpec
+
+CONFIG_DIR = Path("./tui-configs")
+
+QSTYLE = Style([
+    ("qmark", "fg:#5fd7ff bold"),
+    ("question", "bold"),
+    ("answer", "fg:#ff5fd7 bold"),
+    ("pointer", "fg:#5fd7ff bold"),
+    ("highlighted", "fg:#5fd7ff bold"),
+    ("selected", "fg:#ff5fd7"),
+    ("instruction", "fg:#808080 italic"),
+])
+QMARK = "▸"
+
+
+def _prompt(question: str, default: str = "") -> str:
+    answer = questionary.text(question, default=default, style=QSTYLE, qmark=QMARK).ask()
+    if answer is None:
+        raise KeyboardInterrupt
+    return answer
+
+
+def _confirm(question: str, default: bool = False) -> bool:
+    answer = questionary.confirm(question, default=default, style=QSTYLE, qmark=QMARK).ask()
+    if answer is None:
+        raise KeyboardInterrupt
+    return answer
+
+
+def _save(name: str, data: dict[str, Any]) -> str:
+    CONFIG_DIR.mkdir(parents=True, exist_ok=True)
+    path = CONFIG_DIR / f"{name}-{int(time.time())}.yaml"
+    with open(path, "w") as f:
+        yaml.safe_dump(data, f, sort_keys=False)
+    return str(path)
+
+
+def build_process_config() -> str:
+    data_path = _prompt("Data path (folder with documents to process)", "examples/sample_data/")
+    output_path = _prompt("Output path (where merged_results.jsonl will be written)",
+                          "examples/process/outputs/")
+    use_fast = _confirm("Use fast (lower-quality) processors?", default=False)
+    distributed = _confirm("Use distributed processing (Dask)?", default=False)
+    extract_images = _confirm("Extract images from documents?", default=True)
+
+    cfg = {
+        "data_path": data_path,
+        "google_drive_ids": [],
+        "previous_results": None,
+        "dispatcher_config": {
+            "output_path": output_path,
+            "use_fast_processors": use_fast,
+            "distributed": distributed,
+            "extract_images": extract_images,
+            "scheduler_file": None,
+            "process_batch_sizes": [
+                {"URLProcessor": 40},
+                {"DOCXProcessor": 100},
+                {"PDFProcessor": 4000},
+                {"MediaProcessor": 40},
+                {"SpreadsheetProcessor": 100},
+                {"TXTProcessor": 100},
+                {"PPTXProcessor": 100},
+                {"MarkdownProcessor": 100},
+                {"EMLProcessor": 100},
+                {"HTMLProcessor": 100},
+            ],
+            "processor_config": {
+                "MediaProcessor": [
+                    {"normal_model": "openai/whisper-large-v3-turbo"},
+                    {"fast_model": "openai/whisper-tiny"},
+                    {"type": "automatic-speech-recognition"},
+                    {"sample_rate": 10},
+                    {"batch_size": 4},
+                ],
+                "PDFProcessor": [
+                    {"PDFTEXT_CPU_WORKERS": 0},
+                    {"DETECTOR_BATCH_SIZE": 1},
+                    {"DETECTOR_POSTPROCESSING_CPU_WORKERS": 0},
+                    {"RECOGNITION_BATCH_SIZE": 1},
+                    {"OCR_PARALLEL_WORKERS": 0},
+                    {"TEXIFY_BATCH_SIZE": 1},
+                    {"LAYOUT_BATCH_SIZE": 1},
+                    {"ORDER_BATCH_SIZE": 1},
+                    {"TABLE_REC_BATCH_SIZE": 1},
+                ],
+            },
+        },
+    }
+    return _save("process", cfg)
+
+
+def build_postprocess_config() -> str:
+    strategy = questionary.select(
+        "Chunking strategy",
+        choices=["sentence", "token", "word", "semantic"],
+        default="sentence",
+        style=QSTYLE, qmark=QMARK,
+    ).ask()
+    if strategy is None:
+        raise KeyboardInterrupt
+    table_handling = questionary.select(
+        "Table handling",
+        choices=["single_row", "multi_rows", "keep_whole", "none"],
+        default="single_row",
+        style=QSTYLE, qmark=QMARK,
+    ).ask()
+    if table_handling is None:
+        raise KeyboardInterrupt
+    output_path = _prompt("Output JSONL path",
+                          "examples/postprocessor/outputs/merged/results.jsonl")
+
+    cfg = {
+        "previous_results": None,
+        "pp_modules": [
+            {"type": "chunker", "args": {
+                "chunking_strategy": strategy,
+                "table_handling": table_handling,
+            }},
+        ],
+        "output": {"output_path": output_path, "save_each_step": True},
+    }
+    return _save("postprocess", cfg)
+
+
+def build_index_config(documents_path: Optional[str] = None) -> str:
+    dense = _prompt("Dense embedding model",
+                    "sentence-transformers/all-MiniLM-L6-v2")
+    sparse = _prompt("Sparse embedding model", "splade")
+    db_uri = _prompt("DB URI (Milvus Lite file or server URL)", "./proc_demo.db")
+    db_name = _prompt("DB name", "my_db")
+    collection = _prompt("Collection name", "my_docs")
+    docs = documents_path or _prompt(
+        "Documents JSONL path",
+        "examples/postprocessor/outputs/merged/results.jsonl",
+    )
+    cfg = {
+        "indexer": {
+            "dense_model": {"model_name": dense, "is_multimodal": False},
+            "sparse_model": {"model_name": sparse, "is_multimodal": False},
+            "db": {"uri": db_uri, "name": db_name},
+        },
+        "collection_name": collection,
+        "documents_path": docs,
+    }
+    return _save("index", cfg)
+
+
+BUILDERS = {
+    "process": build_process_config,
+    "postprocess": build_postprocess_config,
+    "index": build_index_config,
+}
+
+
+def find_yaml_configs(spec: CommandSpec, root: str = ".") -> list[str]:
+    """Find candidate YAML configs scoped to this stage.
+
+    Includes:
+    - files matching any of `spec.config_globs`
+    - previously-generated `tui-configs/<stage>-*.yaml`
+    """
+    root_path = Path(root)
+    matches: list[str] = []
+    for pattern in spec.config_globs:
+        for p in root_path.glob(pattern):
+            matches.append(str(p))
+    # Generated configs from previous TUI runs
+    generated = root_path / "tui-configs"
+    if generated.exists():
+        for p in sorted(generated.glob(f"{spec.name}-*.yaml")):
+            matches.append(str(p))
+
+    seen: set[str] = set()
+    out: list[str] = []
+    for m in matches:
+        if m not in seen:
+            seen.add(m)
+            out.append(m)
+    return out
+
+
+def _validate_yaml(path: str, spec: CommandSpec) -> Optional[str]:
+    """Return None on success, an error message string on failure."""
+    if spec.config_dataclass is None:
+        return None
+    try:
+        from mmore.utils import load_config
+        dataclass_cls = spec.config_dataclass()
+        load_config(path, dataclass_cls)
+        return None
+    except Exception as e:  # noqa: BLE001
+        return f"{type(e).__name__}: {e}"
+
+
+def _show_error_panel(path: str, err: str) -> None:
+    from mmore.tui.theme import console
+    console.print(Panel(
+        Text.assemble(
+            (f"{path}\n\n", "bold"),
+            (err, "red"),
+        ),
+        title="[bold red]invalid config[/]",
+        border_style="red",
+        padding=(1, 2),
+    ))
+
+
+def _ranked_choices(spec: CommandSpec, candidates: list[str]) -> list[Any]:
+    """Put `spec.example_config` first as ★ recommended; rest under a separator."""
+    choices: list[Any] = []
+    rec = spec.example_config
+    rest = list(candidates)
+    if rec and rec in rest:
+        choices.append(questionary.Choice(f"★ {rec}  (recommended)", value=rec))
+        rest.remove(rec)
+    elif rec and Path(rec).exists():
+        choices.append(questionary.Choice(f"★ {rec}  (recommended)", value=rec))
+    if rest:
+        if choices:
+            choices.append(questionary.Separator("── other configs ──"))
+        for c in rest:
+            choices.append(questionary.Choice(c, value=c))
+    return choices
+
+
+def pick_or_build_config(spec: CommandSpec, documents_path: Optional[str] = None) -> str:
+    """Ask the user to either pick an existing YAML or generate one.
+
+    Validates the chosen YAML against the stage's dataclass and re-prompts
+    on failure rather than letting the run blow up later.
+    """
+    while True:
+        choice = questionary.select(
+            f"Config for `{spec.name}`?",
+            choices=[
+                questionary.Choice("📂 Pick existing YAML", value="pick"),
+                questionary.Choice("✨ Generate new YAML (guided)", value="build"),
+                questionary.Choice("⌨  Type a path manually", value="manual"),
+            ],
+            style=QSTYLE, qmark=QMARK,
+        ).ask()
+        if choice is None:
+            raise KeyboardInterrupt
+
+        path: Optional[str] = None
+
+        if choice == "pick":
+            candidates = find_yaml_configs(spec)
+            ranked = _ranked_choices(spec, candidates)
+            if not ranked:
+                questionary.print(
+                    f"No YAML configs found for `{spec.name}`, falling back to manual entry.",
+                    style="fg:yellow",
+                )
+                choice = "manual"
+            else:
+                picked = questionary.select(
+                    f"Select a config for `{spec.name}`",
+                    choices=ranked,
+                    style=QSTYLE, qmark=QMARK,
+                ).ask()
+                if picked is None:
+                    raise KeyboardInterrupt
+                path = picked
+
+        if choice == "manual":
+            manual = _prompt("Path to YAML config")
+            if not os.path.exists(manual):
+                _show_error_panel(manual, "file not found")
+                continue
+            path = manual
+
+        if choice == "build":
+            builder = BUILDERS.get(spec.name)
+            if builder is None:
+                questionary.print(
+                    f"No guided builder for `{spec.name}` — pick an existing YAML.",
+                    style="fg:yellow",
+                )
+                continue
+            if spec.name == "index":
+                path = builder(documents_path=documents_path)  # type: ignore[call-arg]
+            else:
+                path = builder()
+
+        assert path is not None
+        err = _validate_yaml(path, spec)
+        if err is None:
+            return path
+        _show_error_panel(path, err)
+        if not _confirm("Try a different config?", default=True):
+            raise KeyboardInterrupt
diff --git a/src/mmore/tui/pipeline.py b/src/mmore/tui/pipeline.py
new file mode 100644
index 00000000..4312c3de
--- /dev/null
+++ b/src/mmore/tui/pipeline.py
@@ -0,0 +1,103 @@
+"""Chain process -> postprocess -> index from the TUI."""
+from __future__ import annotations
+
+import os
+import time
+
+import questionary
+import yaml
+from rich.spinner import Spinner
+from rich.live import Live
+from rich.table import Table
+from rich.text import Text
+
+from mmore.tui.commands import REGISTRY
+from mmore.tui.config_builder import pick_or_build_config
+from mmore.tui.theme import ACCENT, ACCENT2, MUTED, OK, console, section, step_header
+
+
+def _process_output_jsonl(config_path: str) -> str:
+    with open(config_path) as f:
+        cfg = yaml.safe_load(f)
+    out = cfg["dispatcher_config"]["output_path"]
+    return os.path.join(out, "merged", "merged_results.jsonl")
+
+
+def _postprocess_output_jsonl(config_path: str) -> str:
+    with open(config_path) as f:
+        cfg = yaml.safe_load(f)
+    return cfg["output"]["output_path"]
+
+
+def _run_step(label: str, fn, **kwargs) -> float:
+    start = time.time()
+    spinner = Spinner("dots", text=Text(f"  {label}…", style=ACCENT))
+    with Live(spinner, console=console, refresh_per_second=12, transient=True):
+        fn(**kwargs)
+    elapsed = time.time() - start
+    console.print(f"  [{OK}]✓[/] {label} [dim]({elapsed:.1f}s)[/dim]")
+    return elapsed
+
+
+def _summary_table(rows: list[tuple[str, str, float]]) -> Table:
+    table = Table(
+        title="[bold]Pipeline summary[/bold]",
+        title_style=ACCENT2,
+        border_style=ACCENT,
+        header_style=f"bold {ACCENT}",
+        show_lines=False,
+    )
+    table.add_column("Step", style="bold")
+    table.add_column("Output", style=MUTED)
+    table.add_column("Duration", justify="right")
+    total = 0.0
+    for name, out, dur in rows:
+        table.add_row(name, out, f"{dur:.1f}s")
+        total += dur
+    table.add_section()
+    table.add_row("[bold]Total[/bold]", "", f"[bold]{total:.1f}s[/bold]")
+    return table
+
+
+def run_full_pipeline() -> None:
+    console.print()
+    console.print(section(
+        "Full pipeline",
+        Text("process → postprocess → index → (optional) chat", style=ACCENT),
+        style=ACCENT2,
+    ))
+
+    rows: list[tuple[str, str, float]] = []
+
+    # process
+    step_header(1, 3, "process")
+    process_cfg = pick_or_build_config(REGISTRY["process"])
+    elapsed = _run_step("Crawling + extracting documents",
+                        REGISTRY["process"].run, config_file=process_cfg)
+    process_jsonl = _process_output_jsonl(process_cfg)
+    rows.append(("process", process_jsonl, elapsed))
+
+    # postprocess
+    step_header(2, 3, "postprocess")
+    pp_cfg = pick_or_build_config(REGISTRY["postprocess"])
+    elapsed = _run_step("Chunking + cleaning",
+                        REGISTRY["postprocess"].run,
+                        config_file=pp_cfg, input_data=process_jsonl)
+    pp_jsonl = _postprocess_output_jsonl(pp_cfg)
+    rows.append(("postprocess", pp_jsonl, elapsed))
+
+    # index
+    step_header(3, 3, "index")
+    index_cfg = pick_or_build_config(REGISTRY["index"], documents_path=pp_jsonl)
+    elapsed = _run_step("Embedding + indexing into Milvus",
+                        REGISTRY["index"].run,
+                        config_file=index_cfg, documents_path=pp_jsonl)
+    rows.append(("index", "(vector DB)", elapsed))
+
+    console.print()
+    console.print(_summary_table(rows))
+    console.print()
+
+    if questionary.confirm("Open the RAG chat now?", default=True).ask():
+        rag_cfg = pick_or_build_config(REGISTRY["ragcli"])
+        REGISTRY["ragcli"].run(config_file=rag_cfg)
diff --git a/src/mmore/tui/theme.py b/src/mmore/tui/theme.py
new file mode 100644
index 00000000..8d71fec6
--- /dev/null
+++ b/src/mmore/tui/theme.py
@@ -0,0 +1,68 @@
+"""Shared visuals: banner, palette, panel helpers."""
+from __future__ import annotations
+
+from rich.align import Align
+from rich.console import Console, Group
+from rich.panel import Panel
+from rich.text import Text
+
+console = Console()
+
+# Palette
+ACCENT = "bright_cyan"
+ACCENT2 = "magenta"
+MUTED = "grey58"
+OK = "bold green"
+WARN = "yellow"
+ERR = "bold red"
+
+BANNER = r"""
+
+ ███╗   ███╗███╗   ███╗ ██████╗ ██████╗ ███████╗
+ ████╗ ████║████╗ ████║██╔═══██╗██╔══██╗██╔════╝
+ ██╔████╔██║██╔████╔██║██║   ██║██████╔╝█████╗
+ ██║╚██╔╝██║██║╚██╔╝██║██║   ██║██╔══██╗██╔══╝
+ ██║ ╚═╝ ██║██║ ╚═╝ ██║╚██████╔╝██║  ██║███████╗
+ ╚═╝     ╚═╝╚═╝     ╚═╝ ╚═════╝ ╚═╝  ╚═╝╚══════╝
+"""
+
+
+def _gradient(text: str, start: str = "bright_cyan", end: str = "magenta") -> Text:
+    """Cheap two-color gradient — top half ACCENT, bottom half ACCENT2."""
+    lines = text.splitlines()
+    half = max(1, len(lines) // 2)
+    out = Text()
+    for i, line in enumerate(lines):
+        style = start if i < half else end
+        out.append(line + "\n", style=style)
+    return out
+
+
+def show_banner(subtitle: str = "interactive launcher") -> None:
+    body = Group(
+        _gradient(BANNER),
+        Align.center(Text(subtitle, style=f"italic {MUTED}")),
+    )
+    console.print(Panel(
+        body,
+        border_style=ACCENT,
+        padding=(0, 2),
+    ))
+
+
+def section(title: str, body: str | Text, style: str = ACCENT) -> Panel:
+    return Panel(
+        body if isinstance(body, Text) else Text(body),
+        title=f"[bold]{title}[/bold]",
+        border_style=style,
+        padding=(1, 2),
+    )
+
+
+def step_header(idx: int, total: int, name: str) -> None:
+    bar = "─" * 4
+    console.print()
+    console.print(
+        f"[{ACCENT}]{bar}[/] [bold]Step {idx}/{total}[/bold] "
+        f"[{ACCENT2}]{name}[/] [{ACCENT}]{bar}[/]"
+    )
diff --git a/uv.lock b/uv.lock
index b0725455..85aeac3d 100644
--- a/uv.lock
+++ b/uv.lock
@@ -3634,6 +3634,8 @@ dependencies = [
     { name = "pydantic" },
     { name = "python-dotenv" },
     { name = "pyyaml" },
+    { name = "questionary" },
+    { name = "rich" },
     { name = "setuptools" },
     { name = "typing-extensions" },
     { name = "validators" },
@@ -3866,10 +3868,12 @@ requires-dist = [
     { name = "python-dotenv", specifier = ">=1.0" },
     { name = "python-pptx", marker = "extra == 'process'" },
     { name = "pyyaml", specifier = ">=6.0" },
+    { name = "questionary", specifier = ">=2.0" },
     { name = "ragas", marker = "extra == 'rag'", specifier = ">=0.2" },
     { name = "rarfile", marker = "extra == 'process'", specifier = ">=4.1" },
     { name = "requests", marker = "extra == 'api'", specifier = ">=2.31" },
     { name = "requests", marker = "extra == 'process'", specifier = ">=2.31" },
+    { name = "rich", specifier = ">=13" },
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.4.0" },
     { name = "scipy", marker = "extra == 'index'", specifier = ">=1.8" },
     { name = "sentence-transformers", marker = "extra == 'index'" },
@@ -5892,6 +5896,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c1/1b/f7ea6cde25621cd9236541c66ff018f4268012a534ec31032bcb187dc5e7/proglog-0.1.12-py3-none-any.whl", hash = "sha256:ccaafce51e80a81c65dc907a460c07ccb8ec1f78dc660cfd8f9ec3a22f01b84c", size = 6337, upload-time = "2025-05-09T14:36:16.798Z" },
 ]
 
+[[package]]
+name = "prompt-toolkit"
+version = "3.0.52"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "wcwidth" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a1/96/06e01a7b38dce6fe1db213e061a4602dd6032a8a97ef6c1a862537732421/prompt_toolkit-3.0.52.tar.gz", hash = "sha256:28cde192929c8e7321de85de1ddbe736f1375148b02f2e17edd840042b1be855", size = 434198, upload-time = "2025-08-27T15:24:02.057Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/84/03/0d3ce49e2505ae70cf43bc5bb3033955d2fc9f932163e84dc0779cc47f48/prompt_toolkit-3.0.52-py3-none-any.whl", hash = "sha256:9aac639a3bbd33284347de5ad8d68ecc044b91a762dc39b7c21095fcd6a19955", size = 391431, upload-time = "2025-08-27T15:23:59.498Z" },
+]
+
 [[package]]
 name = "propcache"
 version = "0.4.1"
@@ -6875,6 +6891,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
 ]
 
+[[package]]
+name = "questionary"
+version = "2.1.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "prompt-toolkit" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f6/45/eafb0bba0f9988f6a2520f9ca2df2c82ddfa8d67c95d6625452e97b204a5/questionary-2.1.1.tar.gz", hash = "sha256:3d7e980292bb0107abaa79c68dd3eee3c561b83a0f89ae482860b181c8bd412d", size = 25845, upload-time = "2025-08-28T19:00:20.851Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3c/26/1062c7ec1b053db9e499b4d2d5bc231743201b74051c973dadeac80a8f43/questionary-2.1.1-py3-none-any.whl", hash = "sha256:a51af13f345f1cdea62347589fbb6df3b290306ab8930713bfae4d475a7d4a59", size = 36753, upload-time = "2025-08-28T19:00:19.56Z" },
+]
+
 [[package]]
 name = "ragas"
 version = "0.4.3"

From d85be08b6c74006e0fb849751ec58fb7746db4f3 Mon Sep 17 00:00:00 2001
From: Arthur PERRIN <arthur.perrin@telecom-sudparis.eu>
Date: Sat, 9 May 2026 11:45:49 +0200
Subject: [PATCH 02/24] fix(tui): address PR review feedback

- Centralise QSTYLE/QMARK in theme.py (was duplicated in app.py and
  config_builder.py)
- Derive pipeline output paths via load_config + jsonl_path so env-var
  expansion ($ROOT_OUT_DIR, ...) and the directory-vs-jsonl logic match
  what the underlying commands actually use
- Add paths.py: repo_root() walks up from CWD to find examples/, so the
  TUI works from any working directory; cwd_default() gives ./data-style
  defaults instead of repo-relative paths
- Replace examples/... defaults in guided prompts with cwd_default()
  fallbacks so the TUI is sensible from outside the repo
- ruff format pass on the tui/ package
---
 src/mmore/tui/app.py            |  55 +++++++------
 src/mmore/tui/commands.py       |  20 ++++-
 src/mmore/tui/config_builder.py | 135 ++++++++++++++++++--------------
 src/mmore/tui/paths.py          |  52 ++++++++++++
 src/mmore/tui/pipeline.py       |  79 +++++++++++++------
 src/mmore/tui/theme.py          |  27 +++++--
 6 files changed, 257 insertions(+), 111 deletions(-)
 create mode 100644 src/mmore/tui/paths.py

diff --git a/src/mmore/tui/app.py b/src/mmore/tui/app.py
index 9feddf10..734fe6c2 100644
--- a/src/mmore/tui/app.py
+++ b/src/mmore/tui/app.py
@@ -1,28 +1,29 @@
 """mmore TUI entry point."""
+
 from __future__ import annotations
 
 import time
 
 import questionary
-from questionary import Style
-from rich.spinner import Spinner
 from rich.live import Live
+from rich.spinner import Spinner
 from rich.text import Text
 
 from mmore.tui.commands import REGISTRY
 from mmore.tui.config_builder import pick_or_build_config
+from mmore.tui.paths import cwd_default
 from mmore.tui.pipeline import run_full_pipeline
-from mmore.tui.theme import ACCENT, ACCENT2, MUTED, OK, console, section, show_banner
-
-QSTYLE = Style([
-    ("qmark", "fg:#5fd7ff bold"),
-    ("question", "bold"),
-    ("answer", "fg:#ff5fd7 bold"),
-    ("pointer", "fg:#5fd7ff bold"),
-    ("highlighted", "fg:#5fd7ff bold"),
-    ("selected", "fg:#ff5fd7"),
-    ("instruction", "fg:#808080 italic"),
-])
+from mmore.tui.theme import (
+    ACCENT,
+    ACCENT2,
+    MUTED,
+    OK,
+    QMARK,
+    QSTYLE,
+    console,
+    section,
+    show_banner,
+)
 
 
 def _run_with_spinner(label: str, fn, **kwargs) -> None:
@@ -30,9 +31,7 @@ def _run_with_spinner(label: str, fn, **kwargs) -> None:
     spinner = Spinner("dots", text=Text(f"  {label}…", style=ACCENT))
     with Live(spinner, console=console, refresh_per_second=12, transient=True):
         fn(**kwargs)
-    console.print(
-        f"  [{OK}]✓[/] {label} [dim]({time.time() - start:.1f}s)[/dim]"
-    )
+    console.print(f"  [{OK}]✓[/] {label} [dim]({time.time() - start:.1f}s)[/dim]")
 
 
 def _run_single_command() -> None:
@@ -41,7 +40,10 @@ def _run_single_command() -> None:
         for spec in REGISTRY.values()
     ]
     name = questionary.select(
-        "Pick a command", choices=choices, style=QSTYLE, qmark="▸",
+        "Pick a command",
+        choices=choices,
+        style=QSTYLE,
+        qmark=QMARK,
     ).ask()
     if name is None:
         return
@@ -51,19 +53,22 @@ def _run_single_command() -> None:
     if spec.needs_input_data:
         input_data = questionary.text(
             "Input JSONL path",
-            default="examples/process/outputs/merged/merged_results.jsonl",
-            style=QSTYLE, qmark="▸",
+            default=cwd_default("outputs/process/merged/merged_results.jsonl"),
+            style=QSTYLE,
+            qmark=QMARK,
         ).ask()
         if input_data is None:
             return
         kwargs["input_data"] = input_data
 
     console.print()
-    console.print(section(
-        f"Running {name}",
-        Text(f"config: {config_file}", style=MUTED),
-        style=ACCENT2,
-    ))
+    console.print(
+        section(
+            f"Running {name}",
+            Text(f"config: {config_file}", style=MUTED),
+            style=ACCENT2,
+        )
+    )
     interactive = name in {"ragcli", "retrieve", "rag"}
     if interactive:
         spec.run(**kwargs)
@@ -93,7 +98,7 @@ def _main_menu() -> str | None:
             questionary.Choice("✕  Quit", value="quit"),
         ],
         style=QSTYLE,
-        qmark="▸",
+        qmark=QMARK,
     ).ask()
 
 
diff --git a/src/mmore/tui/commands.py b/src/mmore/tui/commands.py
index fae5e67f..9ab63920 100644
--- a/src/mmore/tui/commands.py
+++ b/src/mmore/tui/commands.py
@@ -3,6 +3,7 @@
 Each entry mirrors a Click command in `mmore.cli` so the TUI is a thin wrapper:
 the `run` callable is the same `run_*` function the CLI uses.
 """
+
 from dataclasses import dataclass, field
 from typing import Any, Callable, Optional
 
@@ -22,58 +23,73 @@ class CommandSpec:
 
 def _process(config_file: str, **_):
     from mmore.run_process import process
+
     process(config_file)
 
 
 def _postprocess(config_file: str, input_data: str, **_):
     from mmore.run_postprocess import postprocess
+
     postprocess(config_file, input_data)
 
 
-def _index(config_file: str, documents_path: Optional[str] = None,
-           collection_name: Optional[str] = None, **_):
+def _index(
+    config_file: str,
+    documents_path: Optional[str] = None,
+    collection_name: Optional[str] = None,
+    **_,
+):
     from mmore.run_index import index
+
     index(config_file, documents_path, collection_name)
 
 
 def _retrieve(config_file: str, **_):
     from mmore.run_retriever import run_api
+
     run_api(config_file, "0.0.0.0", 8001)
 
 
 def _rag(config_file: str, **_):
     from mmore.run_rag import rag
+
     rag(config_file)
 
 
 def _ragcli(config_file: str, **_):
     from mmore.run_ragcli import RagCLI
+
     RagCLI(config_file).launch_cli()
 
 
 def _websearch(config_file: str, **_):
     from mmore.run_websearch import run_websearch
+
     run_websearch(config_file)
 
 
 # Lazy dataclass importers — keeps heavy deps out of TUI startup.
 def _dc_process():
     from mmore.run_process import ProcessInference
+
     return ProcessInference
 
 
 def _dc_postprocess():
     from mmore.process.post_processor.pipeline import PPPipelineConfig
+
     return PPPipelineConfig
 
 
 def _dc_index():
     from mmore.run_index import IndexConfig
+
     return IndexConfig
 
 
 def _dc_rag():
     from mmore.run_rag import RAGInferenceConfig
+
     return RAGInferenceConfig
 
 
diff --git a/src/mmore/tui/config_builder.py b/src/mmore/tui/config_builder.py
index f91cc2fe..be31c41d 100644
--- a/src/mmore/tui/config_builder.py
+++ b/src/mmore/tui/config_builder.py
@@ -5,6 +5,7 @@
 falls back to the example defaults. The resulting dict is dumped to a YAML
 file under `./tui-configs/`.
 """
+
 from __future__ import annotations
 
 import os
@@ -14,35 +15,29 @@
 
 import questionary
 import yaml
-from questionary import Style
 from rich.panel import Panel
 from rich.text import Text
 
 from mmore.tui.commands import CommandSpec
+from mmore.tui.paths import cwd_default, repo_root, resolve_example
+from mmore.tui.theme import QMARK, QSTYLE, console
 
 CONFIG_DIR = Path("./tui-configs")
 
-QSTYLE = Style([
-    ("qmark", "fg:#5fd7ff bold"),
-    ("question", "bold"),
-    ("answer", "fg:#ff5fd7 bold"),
-    ("pointer", "fg:#5fd7ff bold"),
-    ("highlighted", "fg:#5fd7ff bold"),
-    ("selected", "fg:#ff5fd7"),
-    ("instruction", "fg:#808080 italic"),
-])
-QMARK = "▸"
-
 
 def _prompt(question: str, default: str = "") -> str:
-    answer = questionary.text(question, default=default, style=QSTYLE, qmark=QMARK).ask()
+    answer = questionary.text(
+        question, default=default, style=QSTYLE, qmark=QMARK
+    ).ask()
     if answer is None:
         raise KeyboardInterrupt
     return answer
 
 
 def _confirm(question: str, default: bool = False) -> bool:
-    answer = questionary.confirm(question, default=default, style=QSTYLE, qmark=QMARK).ask()
+    answer = questionary.confirm(
+        question, default=default, style=QSTYLE, qmark=QMARK
+    ).ask()
     if answer is None:
         raise KeyboardInterrupt
     return answer
@@ -57,9 +52,14 @@ def _save(name: str, data: dict[str, Any]) -> str:
 
 
 def build_process_config() -> str:
-    data_path = _prompt("Data path (folder with documents to process)", "examples/sample_data/")
-    output_path = _prompt("Output path (where merged_results.jsonl will be written)",
-                          "examples/process/outputs/")
+    data_path = _prompt(
+        "Data path (folder with documents to process)",
+        cwd_default("data"),
+    )
+    output_path = _prompt(
+        "Output path (where merged_results.jsonl will be written)",
+        cwd_default("outputs/process"),
+    )
     use_fast = _confirm("Use fast (lower-quality) processors?", default=False)
     distributed = _confirm("Use distributed processing (Dask)?", default=False)
     extract_images = _confirm("Extract images from documents?", default=True)
@@ -116,7 +116,8 @@ def build_postprocess_config() -> str:
         "Chunking strategy",
         choices=["sentence", "token", "word", "semantic"],
         default="sentence",
-        style=QSTYLE, qmark=QMARK,
+        style=QSTYLE,
+        qmark=QMARK,
     ).ask()
     if strategy is None:
         raise KeyboardInterrupt
@@ -124,20 +125,26 @@ def build_postprocess_config() -> str:
         "Table handling",
         choices=["single_row", "multi_rows", "keep_whole", "none"],
         default="single_row",
-        style=QSTYLE, qmark=QMARK,
+        style=QSTYLE,
+        qmark=QMARK,
     ).ask()
     if table_handling is None:
         raise KeyboardInterrupt
-    output_path = _prompt("Output JSONL path",
-                          "examples/postprocessor/outputs/merged/results.jsonl")
+    output_path = _prompt(
+        "Output JSONL path",
+        cwd_default("outputs/postprocess/results.jsonl"),
+    )
 
     cfg = {
         "previous_results": None,
         "pp_modules": [
-            {"type": "chunker", "args": {
-                "chunking_strategy": strategy,
-                "table_handling": table_handling,
-            }},
+            {
+                "type": "chunker",
+                "args": {
+                    "chunking_strategy": strategy,
+                    "table_handling": table_handling,
+                },
+            },
         ],
         "output": {"output_path": output_path, "save_each_step": True},
     }
@@ -145,15 +152,16 @@ def build_postprocess_config() -> str:
 
 
 def build_index_config(documents_path: Optional[str] = None) -> str:
-    dense = _prompt("Dense embedding model",
-                    "sentence-transformers/all-MiniLM-L6-v2")
+    dense = _prompt("Dense embedding model", "sentence-transformers/all-MiniLM-L6-v2")
     sparse = _prompt("Sparse embedding model", "splade")
-    db_uri = _prompt("DB URI (Milvus Lite file or server URL)", "./proc_demo.db")
+    db_uri = _prompt(
+        "DB URI (Milvus Lite file or server URL)", cwd_default("proc_demo.db")
+    )
     db_name = _prompt("DB name", "my_db")
     collection = _prompt("Collection name", "my_docs")
     docs = documents_path or _prompt(
         "Documents JSONL path",
-        "examples/postprocessor/outputs/merged/results.jsonl",
+        cwd_default("outputs/postprocess/results.jsonl"),
     )
     cfg = {
         "indexer": {
@@ -174,20 +182,20 @@ def build_index_config(documents_path: Optional[str] = None) -> str:
 }
 
 
-def find_yaml_configs(spec: CommandSpec, root: str = ".") -> list[str]:
+def find_yaml_configs(spec: CommandSpec) -> list[str]:
     """Find candidate YAML configs scoped to this stage.
 
-    Includes:
-    - files matching any of `spec.config_globs`
-    - previously-generated `tui-configs/<stage>-*.yaml`
+    Globs are evaluated against the resolved repo root (looked up by walking
+    up from CWD), so the TUI works from any working directory. Generated
+    configs in `./tui-configs/` (CWD-relative) are always included so users
+    keep access to configs they just built.
     """
-    root_path = Path(root)
+    root = repo_root() or Path.cwd()
     matches: list[str] = []
     for pattern in spec.config_globs:
-        for p in root_path.glob(pattern):
+        for p in root.glob(pattern):
             matches.append(str(p))
-    # Generated configs from previous TUI runs
-    generated = root_path / "tui-configs"
+    generated = Path.cwd() / "tui-configs"
     if generated.exists():
         for p in sorted(generated.glob(f"{spec.name}-*.yaml")):
             matches.append(str(p))
@@ -207,6 +215,7 @@ def _validate_yaml(path: str, spec: CommandSpec) -> Optional[str]:
         return None
     try:
         from mmore.utils import load_config
+
         dataclass_cls = spec.config_dataclass()
         load_config(path, dataclass_cls)
         return None
@@ -215,28 +224,35 @@ def _validate_yaml(path: str, spec: CommandSpec) -> Optional[str]:
 
 
 def _show_error_panel(path: str, err: str) -> None:
-    from mmore.tui.theme import console
-    console.print(Panel(
-        Text.assemble(
-            (f"{path}\n\n", "bold"),
-            (err, "red"),
-        ),
-        title="[bold red]invalid config[/]",
-        border_style="red",
-        padding=(1, 2),
-    ))
+    console.print(
+        Panel(
+            Text.assemble(
+                (f"{path}\n\n", "bold"),
+                (err, "red"),
+            ),
+            title="[bold red]invalid config[/]",
+            border_style="red",
+            padding=(1, 2),
+        )
+    )
 
 
 def _ranked_choices(spec: CommandSpec, candidates: list[str]) -> list[Any]:
     """Put `spec.example_config` first as ★ recommended; rest under a separator."""
     choices: list[Any] = []
-    rec = spec.example_config
+    rec_resolved: Optional[str] = None
+    if spec.example_config:
+        rec_resolved = resolve_example(spec.example_config)
     rest = list(candidates)
-    if rec and rec in rest:
-        choices.append(questionary.Choice(f"★ {rec}  (recommended)", value=rec))
-        rest.remove(rec)
-    elif rec and Path(rec).exists():
-        choices.append(questionary.Choice(f"★ {rec}  (recommended)", value=rec))
+    if rec_resolved and rec_resolved in rest:
+        choices.append(
+            questionary.Choice(f"★ {rec_resolved}  (recommended)", value=rec_resolved)
+        )
+        rest.remove(rec_resolved)
+    elif rec_resolved and Path(rec_resolved).exists():
+        choices.append(
+            questionary.Choice(f"★ {rec_resolved}  (recommended)", value=rec_resolved)
+        )
     if rest:
         if choices:
             choices.append(questionary.Separator("── other configs ──"))
@@ -245,7 +261,9 @@ def _ranked_choices(spec: CommandSpec, candidates: list[str]) -> list[Any]:
     return choices
 
 
-def pick_or_build_config(spec: CommandSpec, documents_path: Optional[str] = None) -> str:
+def pick_or_build_config(
+    spec: CommandSpec, documents_path: Optional[str] = None
+) -> str:
     """Ask the user to either pick an existing YAML or generate one.
 
     Validates the chosen YAML against the stage's dataclass and re-prompts
@@ -259,7 +277,8 @@ def pick_or_build_config(spec: CommandSpec, documents_path: Optional[str] = None
                 questionary.Choice("✨ Generate new YAML (guided)", value="build"),
                 questionary.Choice("⌨  Type a path manually", value="manual"),
             ],
-            style=QSTYLE, qmark=QMARK,
+            style=QSTYLE,
+            qmark=QMARK,
         ).ask()
         if choice is None:
             raise KeyboardInterrupt
@@ -271,7 +290,8 @@ def pick_or_build_config(spec: CommandSpec, documents_path: Optional[str] = None
             ranked = _ranked_choices(spec, candidates)
             if not ranked:
                 questionary.print(
-                    f"No YAML configs found for `{spec.name}`, falling back to manual entry.",
+                    f"No YAML configs found for `{spec.name}`, "
+                    "falling back to manual entry.",
                     style="fg:yellow",
                 )
                 choice = "manual"
@@ -279,7 +299,8 @@ def pick_or_build_config(spec: CommandSpec, documents_path: Optional[str] = None
                 picked = questionary.select(
                     f"Select a config for `{spec.name}`",
                     choices=ranked,
-                    style=QSTYLE, qmark=QMARK,
+                    style=QSTYLE,
+                    qmark=QMARK,
                 ).ask()
                 if picked is None:
                     raise KeyboardInterrupt
diff --git a/src/mmore/tui/paths.py b/src/mmore/tui/paths.py
new file mode 100644
index 00000000..17194f00
--- /dev/null
+++ b/src/mmore/tui/paths.py
@@ -0,0 +1,52 @@
+"""Locate bundled example configs regardless of CWD or install layout.
+
+Strategy:
+- If `examples/` exists relative to CWD (source checkout), use it.
+- Else, walk up from CWD looking for a repo root that contains `examples/`.
+- Else, fall back to `importlib.resources` to read examples shipped with the
+  package (only available if the wheel actually bundles them).
+- If nothing is found, return the original repo-relative path so error
+  messages stay readable; callers handle "missing" gracefully.
+"""
+
+from __future__ import annotations
+
+import os
+from functools import lru_cache
+from pathlib import Path
+from typing import Optional
+
+
+@lru_cache(maxsize=1)
+def repo_root() -> Optional[Path]:
+    """Return a directory that contains an `examples/` folder, if any."""
+    cwd = Path.cwd()
+    for candidate in [cwd, *cwd.parents]:
+        if (candidate / "examples").is_dir():
+            return candidate
+    return None
+
+
+def resolve_example(rel: str) -> str:
+    """Resolve an `examples/...` relative path to an absolute one.
+
+    Falls back to the original string if no source checkout is found, so the
+    UI can still display it (and the validator will surface a clear error).
+    """
+    root = repo_root()
+    if root is not None:
+        candidate = root / rel
+        if candidate.exists():
+            return str(candidate)
+    return rel
+
+
+def resolve_glob(pattern: str) -> tuple[Path, str]:
+    """Split a relative glob into (root, remaining-pattern) for Path.glob."""
+    root = repo_root() or Path.cwd()
+    return root, pattern
+
+
+def cwd_default(rel: str) -> str:
+    """A safe default path rooted at CWD (e.g. `./data` instead of `examples/...`)."""
+    return os.path.join(".", rel)
diff --git a/src/mmore/tui/pipeline.py b/src/mmore/tui/pipeline.py
index 4312c3de..1114a5ba 100644
--- a/src/mmore/tui/pipeline.py
+++ b/src/mmore/tui/pipeline.py
@@ -1,32 +1,56 @@
 """Chain process -> postprocess -> index from the TUI."""
+
 from __future__ import annotations
 
 import os
 import time
 
 import questionary
-import yaml
-from rich.spinner import Spinner
 from rich.live import Live
+from rich.spinner import Spinner
 from rich.table import Table
 from rich.text import Text
 
 from mmore.tui.commands import REGISTRY
 from mmore.tui.config_builder import pick_or_build_config
-from mmore.tui.theme import ACCENT, ACCENT2, MUTED, OK, console, section, step_header
+from mmore.tui.theme import (
+    ACCENT,
+    ACCENT2,
+    MUTED,
+    OK,
+    console,
+    section,
+    step_header,
+)
 
 
 def _process_output_jsonl(config_path: str) -> str:
-    with open(config_path) as f:
-        cfg = yaml.safe_load(f)
-    out = cfg["dispatcher_config"]["output_path"]
+    """Resolve the JSONL path the `process` step writes to.
+
+    Goes through `mmore.utils.load_config` so env-var expansion ($ROOT_OUT_DIR,
+    etc.) matches what the underlying command sees.
+    """
+    from mmore.run_process import ProcessInference
+    from mmore.utils import load_config
+
+    cfg: ProcessInference = load_config(config_path, ProcessInference)
+    out = cfg.dispatcher_config.output_path
     return os.path.join(out, "merged", "merged_results.jsonl")
 
 
 def _postprocess_output_jsonl(config_path: str) -> str:
-    with open(config_path) as f:
-        cfg = yaml.safe_load(f)
-    return cfg["output"]["output_path"]
+    """Resolve the JSONL path `postprocess` writes to.
+
+    Mirrors `PPPipeline`'s use of `mmore.process.utils.jsonl_path`: if the
+    configured `output_path` is a directory, the pipeline writes to
+    `<dir>/final.jsonl`; if it already ends in `.jsonl`, it's used as-is.
+    """
+    from mmore.process.post_processor.pipeline import PPPipelineConfig
+    from mmore.process.utils import jsonl_path
+    from mmore.utils import load_config
+
+    cfg: PPPipelineConfig = load_config(config_path, PPPipelineConfig)
+    return jsonl_path(cfg.output.output_path)
 
 
 def _run_step(label: str, fn, **kwargs) -> float:
@@ -61,37 +85,48 @@ def _summary_table(rows: list[tuple[str, str, float]]) -> Table:
 
 def run_full_pipeline() -> None:
     console.print()
-    console.print(section(
-        "Full pipeline",
-        Text("process → postprocess → index → (optional) chat", style=ACCENT),
-        style=ACCENT2,
-    ))
+    console.print(
+        section(
+            "Full pipeline",
+            Text("process → postprocess → index → (optional) chat", style=ACCENT),
+            style=ACCENT2,
+        )
+    )
 
     rows: list[tuple[str, str, float]] = []
 
     # process
     step_header(1, 3, "process")
     process_cfg = pick_or_build_config(REGISTRY["process"])
-    elapsed = _run_step("Crawling + extracting documents",
-                        REGISTRY["process"].run, config_file=process_cfg)
+    elapsed = _run_step(
+        "Crawling + extracting documents",
+        REGISTRY["process"].run,
+        config_file=process_cfg,
+    )
     process_jsonl = _process_output_jsonl(process_cfg)
     rows.append(("process", process_jsonl, elapsed))
 
     # postprocess
     step_header(2, 3, "postprocess")
     pp_cfg = pick_or_build_config(REGISTRY["postprocess"])
-    elapsed = _run_step("Chunking + cleaning",
-                        REGISTRY["postprocess"].run,
-                        config_file=pp_cfg, input_data=process_jsonl)
+    elapsed = _run_step(
+        "Chunking + cleaning",
+        REGISTRY["postprocess"].run,
+        config_file=pp_cfg,
+        input_data=process_jsonl,
+    )
     pp_jsonl = _postprocess_output_jsonl(pp_cfg)
     rows.append(("postprocess", pp_jsonl, elapsed))
 
     # index
     step_header(3, 3, "index")
     index_cfg = pick_or_build_config(REGISTRY["index"], documents_path=pp_jsonl)
-    elapsed = _run_step("Embedding + indexing into Milvus",
-                        REGISTRY["index"].run,
-                        config_file=index_cfg, documents_path=pp_jsonl)
+    elapsed = _run_step(
+        "Embedding + indexing into Milvus",
+        REGISTRY["index"].run,
+        config_file=index_cfg,
+        documents_path=pp_jsonl,
+    )
     rows.append(("index", "(vector DB)", elapsed))
 
     console.print()
diff --git a/src/mmore/tui/theme.py b/src/mmore/tui/theme.py
index 8d71fec6..d719c351 100644
--- a/src/mmore/tui/theme.py
+++ b/src/mmore/tui/theme.py
@@ -1,6 +1,8 @@
 """Shared visuals: banner, palette, panel helpers."""
+
 from __future__ import annotations
 
+from questionary import Style
 from rich.align import Align
 from rich.console import Console, Group
 from rich.panel import Panel
@@ -8,6 +10,19 @@
 
 console = Console()
 
+QSTYLE = Style(
+    [
+        ("qmark", "fg:#5fd7ff bold"),
+        ("question", "bold"),
+        ("answer", "fg:#ff5fd7 bold"),
+        ("pointer", "fg:#5fd7ff bold"),
+        ("highlighted", "fg:#5fd7ff bold"),
+        ("selected", "fg:#ff5fd7"),
+        ("instruction", "fg:#808080 italic"),
+    ]
+)
+QMARK = "▸"
+
 # Palette
 ACCENT = "bright_cyan"
 ACCENT2 = "magenta"
@@ -43,11 +58,13 @@ def show_banner(subtitle: str = "interactive launcher") -> None:
         _gradient(BANNER),
         Align.center(Text(subtitle, style=f"italic {MUTED}")),
     )
-    console.print(Panel(
-        body,
-        border_style=ACCENT,
-        padding=(0, 2),
-    ))
+    console.print(
+        Panel(
+            body,
+            border_style=ACCENT,
+            padding=(0, 2),
+        )
+    )
 
 
 def section(title: str, body: str | Text, style: str = ACCENT) -> Panel:

From bbb5d9179ee8f0f6c90fc740dab41ea735d54691 Mon Sep 17 00:00:00 2001
From: Arthur PERRIN <arthur.perrin@telecom-sudparis.eu>
Date: Tue, 12 May 2026 08:50:07 +0200
Subject: [PATCH 03/24] feat(tui): full-pipeline wizard, extras detection,
 cancel-vs-quit

- Add a guided wizard ("Build a full pipeline config") that generates
  coherent process + postprocess + index YAMLs in one flow, exposing
  only processors / post-processors / indexer types that actually exist
  in the repo (pulled from ProcessorRegistry, TAGGER_TYPES, FILTER_TYPES).
- Detect missing extras per stage via importlib.util.find_spec canaries;
  disable menu entries and surface the exact `uv sync --extra ...` hint
  instead of crashing mid-run with ModuleNotFoundError.
- Move questionary + rich out of core dependencies into a new `tui`
  extra (included in `all`); friendly error from `mmore tui` if missing.
- Introduce CancelledByUser so Ctrl-C / Esc inside a sub-flow returns
  to the main menu instead of exiting the whole TUI. Ctrl-C at the main
  menu still quits.
- Add a spinner during YAML validation (dataclass imports take ~5s and
  made the TUI look frozen).
- Document the TUI in the README with install commands and behavior.
---
 README.md                       |  18 ++
 pyproject.toml                  |  10 +-
 src/mmore/cli.py                |  10 +-
 src/mmore/tui/app.py            | 137 ++++++++++++--
 src/mmore/tui/commands.py       |  40 ++++
 src/mmore/tui/config_builder.py | 312 ++++++++++++++++++++++++++++++--
 src/mmore/tui/exceptions.py     |  11 ++
 src/mmore/tui/pipeline.py       |  55 +++++-
 uv.lock                         |  16 +-
 9 files changed, 558 insertions(+), 51 deletions(-)
 create mode 100644 src/mmore/tui/exceptions.py

diff --git a/README.md b/README.md
index 61c482a6..02a1b4c4 100644
--- a/README.md
+++ b/README.md
@@ -103,6 +103,24 @@ uv pip install "mmore[process,cpu]"
 
 > :warning: **Check the instructions for contributors directly at [`docs/for_devs.md`](./docs/for_devs.md)**
 
+### Interactive TUI
+
+Prefer a guided experience over editing YAML by hand? Install the `tui` extra and launch the interactive Terminal UI:
+
+```bash
+uv sync --extra tui --extra process --extra index --extra cpu
+mmore tui
+```
+
+From the launcher you can:
+
+- run any stage (process / postprocess / index / rag / chat) interactively,
+- chain the full pipeline (process → postprocess → index → chat),
+- generate stage YAML configs through a guided wizard,
+- pick from existing example configs without leaving the terminal.
+
+Generated configs land in `./tui-configs/` and are validated against the stage's dataclass before any run. Stages whose extras are missing are greyed out in the menu with the exact `uv sync --extra ...` command to enable them. Press `Ctrl-C` inside a sub-flow to cancel and return to the main menu; press it at the main menu to quit.
+
 ### Minimal Example
 
 You can use our predefined CLI commands to execute parts of the pipeline. Note that you might need to prepend `python -m` to the command if the package does not properly create bash aliases.
diff --git a/pyproject.toml b/pyproject.toml
index b9428fa9..258d3fd7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -41,8 +41,6 @@ dependencies = [
     "typing_extensions>=4.15.0,<5.0",
     "PyYAML>=6.0",
     "setuptools<81",
-    "questionary>=2.0",
-    "rich>=13"
 ]
 
 [project.optional-dependencies]
@@ -128,8 +126,14 @@ api = [
 
 # --- Composite + variant extras ---
 
+tui = [
+    # Interactive terminal launcher (`mmore tui`)
+    "questionary>=2.0",
+    "rich>=13",
+]
+
 all = [
-    "mmore[process,rag,api,websearch]",
+    "mmore[process,rag,api,websearch,tui]",
 ]
 
 cpu = [
diff --git a/src/mmore/cli.py b/src/mmore/cli.py
index 7e8e2af2..080b4be9 100644
--- a/src/mmore/cli.py
+++ b/src/mmore/cli.py
@@ -268,8 +268,14 @@ def ragcli(config_file: str):
 @main.command()
 def tui():
     """Launch the interactive Terminal UI."""
-    from .tui import run
-
+    try:
+        from .tui import run
+    except ImportError as e:
+        click.echo(
+            f"TUI dependencies missing ({e.name or e}). "
+            "Install with: uv sync --extra tui"
+        )
+        raise SystemExit(1)
     run()
 
 
diff --git a/src/mmore/tui/app.py b/src/mmore/tui/app.py
index 734fe6c2..9f414469 100644
--- a/src/mmore/tui/app.py
+++ b/src/mmore/tui/app.py
@@ -9,10 +9,16 @@
 from rich.spinner import Spinner
 from rich.text import Text
 
-from mmore.tui.commands import REGISTRY
-from mmore.tui.config_builder import pick_or_build_config
+from rich.panel import Panel
+
+from mmore.tui.commands import REGISTRY, check_stage_available
+from mmore.tui.config_builder import (
+    build_full_pipeline_wizard,
+    pick_or_build_config,
+)
+from mmore.tui.exceptions import CancelledByUser
 from mmore.tui.paths import cwd_default
-from mmore.tui.pipeline import run_full_pipeline
+from mmore.tui.pipeline import run_full_pipeline, run_pipeline_with_configs
 from mmore.tui.theme import (
     ACCENT,
     ACCENT2,
@@ -26,6 +32,20 @@
 )
 
 
+def _show_missing_extras(spec_name: str, hint: str) -> None:
+    console.print(
+        Panel(
+            Text.assemble(
+                (f"Stage `{spec_name}` can't run.\n\n", "bold"),
+                (hint, "yellow"),
+            ),
+            title="[bold yellow]missing dependencies[/]",
+            border_style="yellow",
+            padding=(1, 2),
+        )
+    )
+
+
 def _run_with_spinner(label: str, fn, **kwargs) -> None:
     start = time.time()
     spinner = Spinner("dots", text=Text(f"  {label}…", style=ACCENT))
@@ -35,10 +55,17 @@ def _run_with_spinner(label: str, fn, **kwargs) -> None:
 
 
 def _run_single_command() -> None:
-    choices = [
-        questionary.Choice(f"{spec.name:<12} — {spec.description}", value=spec.name)
-        for spec in REGISTRY.values()
-    ]
+    choices = []
+    for spec in REGISTRY.values():
+        hint = check_stage_available(spec)
+        label = f"{spec.name:<12} — {spec.description}"
+        if hint:
+            label += "  [dim](extras missing)[/dim]"
+            choices.append(
+                questionary.Choice(label, value=spec.name, disabled=hint)
+            )
+        else:
+            choices.append(questionary.Choice(label, value=spec.name))
     name = questionary.select(
         "Pick a command",
         choices=choices,
@@ -48,6 +75,11 @@ def _run_single_command() -> None:
     if name is None:
         return
     spec = REGISTRY[name]
+    # Defensive re-check in case the user typed past the disabled state.
+    hint = check_stage_available(spec)
+    if hint:
+        _show_missing_extras(spec.name, hint)
+        return
     config_file = pick_or_build_config(spec)
     kwargs = {"config_file": config_file}
     if spec.needs_input_data:
@@ -84,16 +116,68 @@ def _chat_only() -> None:
     REGISTRY["ragcli"].run(config_file=config_file)
 
 
+def _run_full_wizard() -> None:
+    paths = build_full_pipeline_wizard()
+    console.print()
+    console.print(
+        section(
+            "Wizard complete",
+            Text(
+                "process:     " + paths["process"] + "\n"
+                "postprocess: " + paths["postprocess"] + "\n"
+                "index:       " + paths["index"],
+                style=MUTED,
+            ),
+            style=ACCENT2,
+        )
+    )
+    if questionary.confirm(
+        "Run the pipeline now with these configs?",
+        default=True,
+        style=QSTYLE,
+        qmark=QMARK,
+    ).ask():
+        run_pipeline_with_configs(paths["process"], paths["postprocess"], paths["index"])
+
+
+def _pipeline_hint() -> str | None:
+    """Return a combined hint if any of process/postprocess/index is missing."""
+    hints = [
+        check_stage_available(REGISTRY[s])
+        for s in ("process", "postprocess", "index")
+    ]
+    hints = [h for h in hints if h]
+    return " | ".join(hints) if hints else None
+
+
 def _main_menu() -> str | None:
+    pipeline_hint = _pipeline_hint()
+    chat_hint = check_stage_available(REGISTRY["ragcli"])
+
+    pipeline_choice = questionary.Choice(
+        "🚀 Run full pipeline  (process → postprocess → index)"
+        + ("  [dim](extras missing)[/dim]" if pipeline_hint else ""),
+        value="pipeline",
+        disabled=pipeline_hint,
+    )
+    wizard_choice = questionary.Choice(
+        "🧙  Build a full pipeline config (guided wizard)",
+        value="wizard",
+    )  # wizard only writes YAML, no heavy imports needed
+    chat_choice = questionary.Choice(
+        "💬 Chat with indexed documents"
+        + ("  [dim](extras missing)[/dim]" if chat_hint else ""),
+        value="chat",
+        disabled=chat_hint,
+    )
+
     return questionary.select(
         "What do you want to do?",
         choices=[
             questionary.Choice("⚙  Run a single command", value="single"),
-            questionary.Choice(
-                "🚀 Run full pipeline  (process → postprocess → index)",
-                value="pipeline",
-            ),
-            questionary.Choice("💬 Chat with indexed documents", value="chat"),
+            pipeline_choice,
+            wizard_choice,
+            chat_choice,
             questionary.Separator(),
             questionary.Choice("✕  Quit", value="quit"),
         ],
@@ -106,21 +190,36 @@ def run() -> None:
     console.clear()
     show_banner("interactive launcher")
     while True:
+        # Ctrl-C at the main menu itself quits; inside any sub-flow it
+        # cancels and returns here.
         try:
             mode = _main_menu()
-            if mode in (None, "quit"):
-                console.print(f"[{ACCENT}]bye![/]")
-                return
+        except KeyboardInterrupt:
+            console.print(f"\n[{ACCENT}]bye![/]")
+            return
+        if mode in (None, "quit"):
+            console.print(f"[{ACCENT}]bye![/]")
+            return
+
+        try:
             if mode == "single":
                 _run_single_command()
             elif mode == "pipeline":
                 run_full_pipeline()
+            elif mode == "wizard":
+                _run_full_wizard()
             elif mode == "chat":
                 _chat_only()
-        except KeyboardInterrupt:
-            console.print(f"\n[{ACCENT2}]interrupted.[/]")
-            return
+        except (CancelledByUser, KeyboardInterrupt):
+            console.print(f"[{ACCENT2}]cancelled — back to menu.[/]")
+            continue
         except Exception as e:  # noqa: BLE001
             console.print(f"[bold red]error:[/] {e}")
-            if not questionary.confirm("Continue?", default=True, style=QSTYLE).ask():
+            try:
+                cont = questionary.confirm(
+                    "Continue?", default=True, style=QSTYLE
+                ).ask()
+            except KeyboardInterrupt:
+                return
+            if not cont:
                 return
diff --git a/src/mmore/tui/commands.py b/src/mmore/tui/commands.py
index 9ab63920..b498e351 100644
--- a/src/mmore/tui/commands.py
+++ b/src/mmore/tui/commands.py
@@ -4,6 +4,7 @@
 the `run` callable is the same `run_*` function the CLI uses.
 """
 
+import importlib.util
 from dataclasses import dataclass, field
 from typing import Any, Callable, Optional
 
@@ -19,6 +20,31 @@ class CommandSpec:
     # Lazy importer returning the dataclass to validate YAML against.
     # Returns None if no validation is wired up for this stage.
     config_dataclass: Optional[Callable[[], Any]] = None
+    # Extras the user has to `uv sync --extra ...` for this stage to import.
+    # Used only to build a friendly install hint.
+    required_extras: list[str] = field(default_factory=list)
+    # Module names probed via `importlib.util.find_spec` to verify the extras
+    # are actually installed. If any is missing, the stage is disabled in the
+    # menu with an install hint.
+    canary_imports: list[str] = field(default_factory=list)
+
+
+def check_stage_available(spec: "CommandSpec") -> Optional[str]:
+    """Return None if all canary imports resolve, else an install-hint string."""
+    missing: list[str] = []
+    for mod in spec.canary_imports:
+        try:
+            if importlib.util.find_spec(mod) is None:
+                missing.append(mod)
+        except (ImportError, ValueError):
+            missing.append(mod)
+    if not missing:
+        return None
+    extras = " ".join(f"--extra {e}" for e in spec.required_extras)
+    return (
+        f"Missing: {', '.join(missing)}. "
+        f"Install with: uv sync {extras}".strip()
+    )
 
 
 def _process(config_file: str, **_):
@@ -104,6 +130,8 @@ def _dc_rag():
             "examples/process/**/*.yml",
         ],
         config_dataclass=_dc_process,
+        required_extras=["process", "cpu"],
+        canary_imports=["torch", "marker", "transformers"],
     ),
     "postprocess": CommandSpec(
         name="postprocess",
@@ -116,6 +144,8 @@ def _dc_rag():
             "examples/postprocessor/**/*.yml",
         ],
         config_dataclass=_dc_postprocess,
+        required_extras=["process", "cpu"],
+        canary_imports=["torch", "transformers"],
     ),
     "index": CommandSpec(
         name="index",
@@ -127,6 +157,8 @@ def _dc_rag():
             "examples/index/**/*.yml",
         ],
         config_dataclass=_dc_index,
+        required_extras=["index", "cpu"],
+        canary_imports=["pymilvus", "sentence_transformers", "torch"],
     ),
     "retrieve": CommandSpec(
         name="retrieve",
@@ -138,6 +170,8 @@ def _dc_rag():
             "examples/rag/**/*.yml",
         ],
         config_dataclass=_dc_rag,
+        required_extras=["rag", "api", "cpu"],
+        canary_imports=["fastapi", "pymilvus", "torch"],
     ),
     "rag": CommandSpec(
         name="rag",
@@ -149,6 +183,8 @@ def _dc_rag():
             "examples/rag/**/*.yml",
         ],
         config_dataclass=_dc_rag,
+        required_extras=["rag", "cpu"],
+        canary_imports=["langchain", "pymilvus", "torch"],
     ),
     "ragcli": CommandSpec(
         name="ragcli",
@@ -160,6 +196,8 @@ def _dc_rag():
             "examples/rag/**/*.yml",
         ],
         config_dataclass=_dc_rag,
+        required_extras=["rag", "cpu"],
+        canary_imports=["langchain", "pymilvus", "torch"],
     ),
     "websearch": CommandSpec(
         name="websearch",
@@ -170,5 +208,7 @@ def _dc_rag():
             "examples/websearchRAG/**/*.yaml",
             "examples/websearchRAG/**/*.yml",
         ],
+        required_extras=["websearch"],
+        canary_imports=["ddgs"],
     ),
 }
diff --git a/src/mmore/tui/config_builder.py b/src/mmore/tui/config_builder.py
index be31c41d..e9b257d6 100644
--- a/src/mmore/tui/config_builder.py
+++ b/src/mmore/tui/config_builder.py
@@ -15,32 +15,42 @@
 
 import questionary
 import yaml
+from rich.live import Live
 from rich.panel import Panel
+from rich.spinner import Spinner
 from rich.text import Text
 
 from mmore.tui.commands import CommandSpec
+from mmore.tui.exceptions import CancelledByUser
 from mmore.tui.paths import cwd_default, repo_root, resolve_example
-from mmore.tui.theme import QMARK, QSTYLE, console
+from mmore.tui.theme import ACCENT2, QMARK, QSTYLE, console, section
 
-CONFIG_DIR = Path("./tui-configs")
 
+def _ask(prompt_obj: Any) -> Any:
+    """Call .ask() and translate Ctrl-C / Esc into CancelledByUser.
 
-def _prompt(question: str, default: str = "") -> str:
-    answer = questionary.text(
-        question, default=default, style=QSTYLE, qmark=QMARK
-    ).ask()
+    questionary raises KeyboardInterrupt on Ctrl-C and returns None on Esc.
+    Both should land us back at the main menu, not exit the TUI.
+    """
+    try:
+        answer = prompt_obj.ask()
+    except KeyboardInterrupt as e:
+        raise CancelledByUser("cancelled") from e
     if answer is None:
-        raise KeyboardInterrupt
+        raise CancelledByUser("cancelled")
     return answer
 
+CONFIG_DIR = Path("./tui-configs")
+
+
+def _prompt(question: str, default: str = "") -> str:
+    return _ask(questionary.text(question, default=default, style=QSTYLE, qmark=QMARK))
+
 
 def _confirm(question: str, default: bool = False) -> bool:
-    answer = questionary.confirm(
-        question, default=default, style=QSTYLE, qmark=QMARK
-    ).ask()
-    if answer is None:
-        raise KeyboardInterrupt
-    return answer
+    return _ask(
+        questionary.confirm(question, default=default, style=QSTYLE, qmark=QMARK)
+    )
 
 
 def _save(name: str, data: dict[str, Any]) -> str:
@@ -120,7 +130,7 @@ def build_postprocess_config() -> str:
         qmark=QMARK,
     ).ask()
     if strategy is None:
-        raise KeyboardInterrupt
+        raise CancelledByUser("cancelled")
     table_handling = questionary.select(
         "Table handling",
         choices=["single_row", "multi_rows", "keep_whole", "none"],
@@ -129,7 +139,7 @@ def build_postprocess_config() -> str:
         qmark=QMARK,
     ).ask()
     if table_handling is None:
-        raise KeyboardInterrupt
+        raise CancelledByUser("cancelled")
     output_path = _prompt(
         "Output JSONL path",
         cwd_default("outputs/postprocess/results.jsonl"),
@@ -182,6 +192,259 @@ def build_index_config(documents_path: Optional[str] = None) -> str:
 }
 
 
+# Static list of processor class names — kept in sync with
+# src/mmore/process/processors/*.py. Used by the full-pipeline wizard so the
+# user can pick a subset rather than always shipping all 10.
+_ALL_PROCESSORS: list[tuple[str, int]] = [
+    ("PDFProcessor", 4000),
+    ("DOCXProcessor", 100),
+    ("PPTXProcessor", 100),
+    ("MarkdownProcessor", 100),
+    ("HTMLProcessor", 100),
+    ("TXTProcessor", 100),
+    ("EMLProcessor", 100),
+    ("SpreadsheetProcessor", 100),
+    ("MediaProcessor", 40),
+    ("URLProcessor", 40),
+]
+
+_PROCESSOR_DEFAULT_CONFIG: dict[str, list[dict[str, Any]]] = {
+    "MediaProcessor": [
+        {"normal_model": "openai/whisper-large-v3-turbo"},
+        {"fast_model": "openai/whisper-tiny"},
+        {"type": "automatic-speech-recognition"},
+        {"sample_rate": 10},
+        {"batch_size": 4},
+    ],
+    "PDFProcessor": [
+        {"PDFTEXT_CPU_WORKERS": 0},
+        {"DETECTOR_BATCH_SIZE": 1},
+        {"DETECTOR_POSTPROCESSING_CPU_WORKERS": 0},
+        {"RECOGNITION_BATCH_SIZE": 1},
+        {"OCR_PARALLEL_WORKERS": 0},
+        {"TEXIFY_BATCH_SIZE": 1},
+        {"LAYOUT_BATCH_SIZE": 1},
+        {"ORDER_BATCH_SIZE": 1},
+        {"TABLE_REC_BATCH_SIZE": 1},
+    ],
+}
+
+
+def build_process_config_wizard() -> str:
+    """Richer process-config builder that lets the user pick processors."""
+    data_path = _prompt(
+        "Data path (folder with documents to process)", cwd_default("data")
+    )
+    output_path = _prompt(
+        "Output path (where merged_results.jsonl will be written)",
+        cwd_default("outputs/process"),
+    )
+    use_fast = _confirm("Use fast (lower-quality) processors?", default=False)
+    distributed = _confirm("Use distributed processing (Dask)?", default=False)
+    extract_images = _confirm("Extract images from documents?", default=True)
+
+    names = [n for n, _ in _ALL_PROCESSORS]
+    selected = questionary.checkbox(
+        "Select processors to enable",
+        choices=[questionary.Choice(n, value=n, checked=True) for n in names],
+        style=QSTYLE,
+        qmark=QMARK,
+    ).ask()
+    if selected is None:
+        raise CancelledByUser("cancelled")
+    if not selected:
+        selected = names  # empty would mean a no-op pipeline; fall back to all
+
+    customize = _confirm("Customize batch sizes?", default=False)
+    sizes: list[dict[str, int]] = []
+    for name, default in _ALL_PROCESSORS:
+        if name not in selected:
+            continue
+        if customize:
+            raw = _prompt(f"Batch size for {name}", str(default))
+            try:
+                value = int(raw)
+            except ValueError:
+                value = default
+        else:
+            value = default
+        sizes.append({name: value})
+
+    processor_config = {
+        name: cfg
+        for name, cfg in _PROCESSOR_DEFAULT_CONFIG.items()
+        if name in selected
+    }
+
+    cfg = {
+        "data_path": data_path,
+        "google_drive_ids": [],
+        "previous_results": None,
+        "dispatcher_config": {
+            "output_path": output_path,
+            "use_fast_processors": use_fast,
+            "distributed": distributed,
+            "extract_images": extract_images,
+            "scheduler_file": None,
+            "process_batch_sizes": sizes,
+            "processor_config": processor_config,
+        },
+    }
+    return _save("process", cfg)
+
+
+def _postprocessor_choices() -> list[str]:
+    """Enumerate every post-processor `type` string the loader accepts."""
+    from mmore.process.post_processor.filter import FILTER_TYPES
+    from mmore.process.post_processor.tagger import TAGGER_TYPES
+
+    return ["chunker", "ner", "translator", "metafuse", *TAGGER_TYPES, *FILTER_TYPES]
+
+
+def _ask_module_args(pp_type: str) -> dict[str, Any]:
+    if pp_type == "chunker":
+        strategy = questionary.select(
+            "Chunking strategy",
+            choices=["sentence", "token", "word", "semantic"],
+            default="sentence",
+            style=QSTYLE,
+            qmark=QMARK,
+        ).ask()
+        if strategy is None:
+            raise CancelledByUser("cancelled")
+        table_handling = questionary.select(
+            "Table handling",
+            choices=["single_row", "multi_rows", "keep_whole", "none"],
+            default="single_row",
+            style=QSTYLE,
+            qmark=QMARK,
+        ).ask()
+        if table_handling is None:
+            raise CancelledByUser("cancelled")
+        return {
+            "chunking_strategy": strategy,
+            "table_handling": table_handling,
+        }
+    if pp_type in {"ner", "translator", "metafuse"}:
+        if _confirm(f"Provide extra args for `{pp_type}` as YAML?", default=False):
+            raw = _prompt("YAML args (single line, e.g. {key: value})", "{}")
+            try:
+                parsed = yaml.safe_load(raw) or {}
+                if isinstance(parsed, dict):
+                    return parsed
+            except yaml.YAMLError:
+                pass
+        return {}
+    return {}
+
+
+def build_postprocess_config_wizard() -> str:
+    """Build a postprocess config with an arbitrary list of pp_modules."""
+    available = _postprocessor_choices()
+    modules: list[dict[str, Any]] = []
+    while True:
+        if modules:
+            console.print(
+                f"  [dim]current modules:[/] {', '.join(m['type'] for m in modules)}"
+            )
+        pp_type = questionary.select(
+            "Add a post-processor module" if not modules else "Add another module",
+            choices=[*available, questionary.Separator(), "(done)"],
+            style=QSTYLE,
+            qmark=QMARK,
+        ).ask()
+        if pp_type is None:
+            raise CancelledByUser("cancelled")
+        if pp_type == "(done)":
+            break
+        args = _ask_module_args(pp_type)
+        modules.append({"type": pp_type, "args": args})
+
+    output_path = _prompt(
+        "Output JSONL path",
+        cwd_default("outputs/postprocess/results.jsonl"),
+    )
+    cfg = {
+        "previous_results": None,
+        "pp_modules": modules,
+        "output": {"output_path": output_path, "save_each_step": True},
+    }
+    return _save("postprocess", cfg)
+
+
+def build_index_config_wizard(documents_path: Optional[str] = None) -> str:
+    dense = _prompt("Dense embedding model", "sentence-transformers/all-MiniLM-L6-v2")
+    sparse = _prompt("Sparse embedding model", "splade")
+    multimodal = _confirm("Multimodal embeddings?", default=False)
+    db_uri = _prompt(
+        "DB URI (Milvus Lite file or server URL)", cwd_default("proc_demo.db")
+    )
+    db_name = _prompt("DB name", "my_db")
+    collection = _prompt("Collection name", "my_docs")
+    docs = documents_path or _prompt(
+        "Documents JSONL path",
+        cwd_default("outputs/postprocess/results.jsonl"),
+    )
+    cfg = {
+        "indexer": {
+            "dense_model": {"model_name": dense, "is_multimodal": multimodal},
+            "sparse_model": {"model_name": sparse, "is_multimodal": multimodal},
+            "db": {"uri": db_uri, "name": db_name},
+        },
+        "collection_name": collection,
+        "documents_path": docs,
+    }
+    return _save("index", cfg)
+
+
+def build_full_pipeline_wizard() -> dict[str, str]:
+    """Build process + postprocess + index configs in one flow.
+
+    Wires the postprocess output JSONL into the index config's documents_path
+    so the three files form a coherent pipeline. Validates each YAML and
+    re-prompts on failure (the per-stage builders run again on retry).
+    """
+    from mmore.tui.commands import REGISTRY
+    from mmore.tui.pipeline import _postprocess_output_jsonl
+
+    console.print(section("Pipeline wizard", Text("step 1/3 — process", style=ACCENT2)))
+    while True:
+        process_path = build_process_config_wizard()
+        err = _validate_with_spinner(process_path, REGISTRY["process"])
+        if err is None:
+            break
+        _show_error_panel(process_path, err)
+        if not _confirm("Retry the process step?", default=True):
+            raise CancelledByUser("cancelled")
+
+    console.print(section("Pipeline wizard", Text("step 2/3 — postprocess", style=ACCENT2)))
+    while True:
+        pp_path = build_postprocess_config_wizard()
+        err = _validate_with_spinner(pp_path, REGISTRY["postprocess"])
+        if err is None:
+            break
+        _show_error_panel(pp_path, err)
+        if not _confirm("Retry the postprocess step?", default=True):
+            raise CancelledByUser("cancelled")
+
+    try:
+        docs_jsonl = _postprocess_output_jsonl(pp_path)
+    except Exception:  # noqa: BLE001
+        docs_jsonl = None
+
+    console.print(section("Pipeline wizard", Text("step 3/3 — index", style=ACCENT2)))
+    while True:
+        index_path = build_index_config_wizard(documents_path=docs_jsonl)
+        err = _validate_with_spinner(index_path, REGISTRY["index"])
+        if err is None:
+            break
+        _show_error_panel(index_path, err)
+        if not _confirm("Retry the index step?", default=True):
+            raise CancelledByUser("cancelled")
+
+    return {"process": process_path, "postprocess": pp_path, "index": index_path}
+
+
 def find_yaml_configs(spec: CommandSpec) -> list[str]:
     """Find candidate YAML configs scoped to this stage.
 
@@ -223,6 +486,19 @@ def _validate_yaml(path: str, spec: CommandSpec) -> Optional[str]:
         return f"{type(e).__name__}: {e}"
 
 
+def _validate_with_spinner(path: str, spec: CommandSpec) -> Optional[str]:
+    """Same as _validate_yaml but shows a spinner — config dataclass imports
+    can take several seconds (heavy transitive imports), making the TUI look
+    frozen otherwise."""
+    spinner = Spinner(
+        "dots", text=Text(f"  Validating {spec.name} config…", style="cyan")
+    )
+    result: dict[str, Optional[str]] = {}
+    with Live(spinner, console=console, refresh_per_second=12, transient=True):
+        result["err"] = _validate_yaml(path, spec)
+    return result["err"]
+
+
 def _show_error_panel(path: str, err: str) -> None:
     console.print(
         Panel(
@@ -281,7 +557,7 @@ def pick_or_build_config(
             qmark=QMARK,
         ).ask()
         if choice is None:
-            raise KeyboardInterrupt
+            raise CancelledByUser("cancelled")
 
         path: Optional[str] = None
 
@@ -303,7 +579,7 @@ def pick_or_build_config(
                     qmark=QMARK,
                 ).ask()
                 if picked is None:
-                    raise KeyboardInterrupt
+                    raise CancelledByUser("cancelled")
                 path = picked
 
         if choice == "manual":
@@ -332,4 +608,4 @@ def pick_or_build_config(
             return path
         _show_error_panel(path, err)
         if not _confirm("Try a different config?", default=True):
-            raise KeyboardInterrupt
+            raise CancelledByUser("cancelled")
diff --git a/src/mmore/tui/exceptions.py b/src/mmore/tui/exceptions.py
new file mode 100644
index 00000000..905d0d25
--- /dev/null
+++ b/src/mmore/tui/exceptions.py
@@ -0,0 +1,11 @@
+"""TUI-only exceptions."""
+
+from __future__ import annotations
+
+
+class CancelledByUser(Exception):
+    """Raised when the user cancels a sub-flow (Ctrl-C or Esc inside a prompt).
+
+    Caught by the top-level menu loop so cancellation returns to the main menu
+    instead of exiting the whole TUI.
+    """
diff --git a/src/mmore/tui/pipeline.py b/src/mmore/tui/pipeline.py
index 1114a5ba..d460c651 100644
--- a/src/mmore/tui/pipeline.py
+++ b/src/mmore/tui/pipeline.py
@@ -83,6 +83,58 @@ def _summary_table(rows: list[tuple[str, str, float]]) -> Table:
     return table
 
 
+def run_pipeline_with_configs(
+    process_cfg: str, pp_cfg: str, index_cfg: str
+) -> None:
+    """Execute the three stages given already-built YAML paths."""
+    console.print()
+    console.print(
+        section(
+            "Full pipeline",
+            Text("process → postprocess → index → (optional) chat", style=ACCENT),
+            style=ACCENT2,
+        )
+    )
+
+    rows: list[tuple[str, str, float]] = []
+
+    step_header(1, 3, "process")
+    elapsed = _run_step(
+        "Crawling + extracting documents",
+        REGISTRY["process"].run,
+        config_file=process_cfg,
+    )
+    process_jsonl = _process_output_jsonl(process_cfg)
+    rows.append(("process", process_jsonl, elapsed))
+
+    step_header(2, 3, "postprocess")
+    elapsed = _run_step(
+        "Chunking + cleaning",
+        REGISTRY["postprocess"].run,
+        config_file=pp_cfg,
+        input_data=process_jsonl,
+    )
+    pp_jsonl = _postprocess_output_jsonl(pp_cfg)
+    rows.append(("postprocess", pp_jsonl, elapsed))
+
+    step_header(3, 3, "index")
+    elapsed = _run_step(
+        "Embedding + indexing into Milvus",
+        REGISTRY["index"].run,
+        config_file=index_cfg,
+        documents_path=pp_jsonl,
+    )
+    rows.append(("index", "(vector DB)", elapsed))
+
+    console.print()
+    console.print(_summary_table(rows))
+    console.print()
+
+    if questionary.confirm("Open the RAG chat now?", default=True).ask():
+        rag_cfg = pick_or_build_config(REGISTRY["ragcli"])
+        REGISTRY["ragcli"].run(config_file=rag_cfg)
+
+
 def run_full_pipeline() -> None:
     console.print()
     console.print(
@@ -95,7 +147,6 @@ def run_full_pipeline() -> None:
 
     rows: list[tuple[str, str, float]] = []
 
-    # process
     step_header(1, 3, "process")
     process_cfg = pick_or_build_config(REGISTRY["process"])
     elapsed = _run_step(
@@ -106,7 +157,6 @@ def run_full_pipeline() -> None:
     process_jsonl = _process_output_jsonl(process_cfg)
     rows.append(("process", process_jsonl, elapsed))
 
-    # postprocess
     step_header(2, 3, "postprocess")
     pp_cfg = pick_or_build_config(REGISTRY["postprocess"])
     elapsed = _run_step(
@@ -118,7 +168,6 @@ def run_full_pipeline() -> None:
     pp_jsonl = _postprocess_output_jsonl(pp_cfg)
     rows.append(("postprocess", pp_jsonl, elapsed))
 
-    # index
     step_header(3, 3, "index")
     index_cfg = pick_or_build_config(REGISTRY["index"], documents_path=pp_jsonl)
     elapsed = _run_step(
diff --git a/uv.lock b/uv.lock
index 85aeac3d..933ebc23 100644
--- a/uv.lock
+++ b/uv.lock
@@ -3634,8 +3634,6 @@ dependencies = [
     { name = "pydantic" },
     { name = "python-dotenv" },
     { name = "pyyaml" },
-    { name = "questionary" },
-    { name = "rich" },
     { name = "setuptools" },
     { name = "typing-extensions" },
     { name = "validators" },
@@ -3690,9 +3688,11 @@ all = [
     { name = "pymupdf" },
     { name = "python-docx" },
     { name = "python-pptx" },
+    { name = "questionary" },
     { name = "ragas" },
     { name = "rarfile" },
     { name = "requests" },
+    { name = "rich" },
     { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-5-mmore-cpu' and extra == 'extra-5-mmore-cu126')" },
     { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-5-mmore-cpu' and extra == 'extra-5-mmore-cu126')" },
     { name = "sentence-transformers" },
@@ -3800,6 +3800,10 @@ rag = [
     { name = "sentence-transformers" },
     { name = "transformers" },
 ]
+tui = [
+    { name = "questionary" },
+    { name = "rich" },
+]
 websearch = [
     { name = "ddgs" },
     { name = "tavily-python" },
@@ -3846,7 +3850,7 @@ requires-dist = [
     { name = "marker-pdf", marker = "extra == 'process'", specifier = ">=1.6" },
     { name = "milvus-model", marker = "extra == 'index'", specifier = ">=0.2.12" },
     { name = "mmore", extras = ["index"], marker = "extra == 'rag'" },
-    { name = "mmore", extras = ["process", "rag", "api", "websearch"], marker = "extra == 'all'" },
+    { name = "mmore", extras = ["process", "rag", "api", "websearch", "tui"], marker = "extra == 'all'" },
     { name = "motor", marker = "extra == 'api'", specifier = ">=3.5" },
     { name = "moviepy", marker = "extra == 'process'", specifier = ">=2.0" },
     { name = "nltk", marker = "extra == 'rag'", specifier = ">=3.9" },
@@ -3868,12 +3872,12 @@ requires-dist = [
     { name = "python-dotenv", specifier = ">=1.0" },
     { name = "python-pptx", marker = "extra == 'process'" },
     { name = "pyyaml", specifier = ">=6.0" },
-    { name = "questionary", specifier = ">=2.0" },
+    { name = "questionary", marker = "extra == 'tui'", specifier = ">=2.0" },
     { name = "ragas", marker = "extra == 'rag'", specifier = ">=0.2" },
     { name = "rarfile", marker = "extra == 'process'", specifier = ">=4.1" },
     { name = "requests", marker = "extra == 'api'", specifier = ">=2.31" },
     { name = "requests", marker = "extra == 'process'", specifier = ">=2.31" },
-    { name = "rich", specifier = ">=13" },
+    { name = "rich", marker = "extra == 'tui'", specifier = ">=13" },
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.4.0" },
     { name = "scipy", marker = "extra == 'index'", specifier = ">=1.8" },
     { name = "sentence-transformers", marker = "extra == 'index'" },
@@ -3898,7 +3902,7 @@ requires-dist = [
     { name = "validators", specifier = ">=0.28" },
     { name = "xlrd", marker = "extra == 'process'", specifier = ">=2.0.1" },
 ]
-provides-extras = ["process", "index", "rag", "api", "all", "cpu", "cu126", "websearch", "dev"]
+provides-extras = ["process", "index", "rag", "api", "tui", "all", "cpu", "cu126", "websearch", "dev"]
 
 [[package]]
 name = "motor"

From 628ca181086909f4677cb075a5c0b02e61ba510c Mon Sep 17 00:00:00 2001
From: Arthur PERRIN <arthur.perrin@telecom-sudparis.eu>
Date: Tue, 12 May 2026 08:54:31 +0200
Subject: [PATCH 04/24] =?UTF-8?q?fix(tui):=20rename=20CancelledByUser?=
 =?UTF-8?q?=E2=86=92UserCancelledError,=20apply=20ruff=20format?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/mmore/tui/app.py            | 18 +++++++--------
 src/mmore/tui/commands.py       |  5 +---
 src/mmore/tui/config_builder.py | 41 +++++++++++++++++----------------
 src/mmore/tui/exceptions.py     |  2 +-
 src/mmore/tui/pipeline.py       |  4 +---
 5 files changed, 32 insertions(+), 38 deletions(-)

diff --git a/src/mmore/tui/app.py b/src/mmore/tui/app.py
index 9f414469..47d3af96 100644
--- a/src/mmore/tui/app.py
+++ b/src/mmore/tui/app.py
@@ -6,17 +6,16 @@
 
 import questionary
 from rich.live import Live
+from rich.panel import Panel
 from rich.spinner import Spinner
 from rich.text import Text
 
-from rich.panel import Panel
-
 from mmore.tui.commands import REGISTRY, check_stage_available
 from mmore.tui.config_builder import (
     build_full_pipeline_wizard,
     pick_or_build_config,
 )
-from mmore.tui.exceptions import CancelledByUser
+from mmore.tui.exceptions import UserCancelledError
 from mmore.tui.paths import cwd_default
 from mmore.tui.pipeline import run_full_pipeline, run_pipeline_with_configs
 from mmore.tui.theme import (
@@ -61,9 +60,7 @@ def _run_single_command() -> None:
         label = f"{spec.name:<12} — {spec.description}"
         if hint:
             label += "  [dim](extras missing)[/dim]"
-            choices.append(
-                questionary.Choice(label, value=spec.name, disabled=hint)
-            )
+            choices.append(questionary.Choice(label, value=spec.name, disabled=hint))
         else:
             choices.append(questionary.Choice(label, value=spec.name))
     name = questionary.select(
@@ -137,14 +134,15 @@ def _run_full_wizard() -> None:
         style=QSTYLE,
         qmark=QMARK,
     ).ask():
-        run_pipeline_with_configs(paths["process"], paths["postprocess"], paths["index"])
+        run_pipeline_with_configs(
+            paths["process"], paths["postprocess"], paths["index"]
+        )
 
 
 def _pipeline_hint() -> str | None:
     """Return a combined hint if any of process/postprocess/index is missing."""
     hints = [
-        check_stage_available(REGISTRY[s])
-        for s in ("process", "postprocess", "index")
+        check_stage_available(REGISTRY[s]) for s in ("process", "postprocess", "index")
     ]
     hints = [h for h in hints if h]
     return " | ".join(hints) if hints else None
@@ -210,7 +208,7 @@ def run() -> None:
                 _run_full_wizard()
             elif mode == "chat":
                 _chat_only()
-        except (CancelledByUser, KeyboardInterrupt):
+        except (UserCancelledError, KeyboardInterrupt):
             console.print(f"[{ACCENT2}]cancelled — back to menu.[/]")
             continue
         except Exception as e:  # noqa: BLE001
diff --git a/src/mmore/tui/commands.py b/src/mmore/tui/commands.py
index b498e351..650cbf6d 100644
--- a/src/mmore/tui/commands.py
+++ b/src/mmore/tui/commands.py
@@ -41,10 +41,7 @@ def check_stage_available(spec: "CommandSpec") -> Optional[str]:
     if not missing:
         return None
     extras = " ".join(f"--extra {e}" for e in spec.required_extras)
-    return (
-        f"Missing: {', '.join(missing)}. "
-        f"Install with: uv sync {extras}".strip()
-    )
+    return f"Missing: {', '.join(missing)}. Install with: uv sync {extras}".strip()
 
 
 def _process(config_file: str, **_):
diff --git a/src/mmore/tui/config_builder.py b/src/mmore/tui/config_builder.py
index e9b257d6..09e57e58 100644
--- a/src/mmore/tui/config_builder.py
+++ b/src/mmore/tui/config_builder.py
@@ -21,13 +21,13 @@
 from rich.text import Text
 
 from mmore.tui.commands import CommandSpec
-from mmore.tui.exceptions import CancelledByUser
+from mmore.tui.exceptions import UserCancelledError
 from mmore.tui.paths import cwd_default, repo_root, resolve_example
 from mmore.tui.theme import ACCENT2, QMARK, QSTYLE, console, section
 
 
 def _ask(prompt_obj: Any) -> Any:
-    """Call .ask() and translate Ctrl-C / Esc into CancelledByUser.
+    """Call .ask() and translate Ctrl-C / Esc into UserCancelledError.
 
     questionary raises KeyboardInterrupt on Ctrl-C and returns None on Esc.
     Both should land us back at the main menu, not exit the TUI.
@@ -35,11 +35,12 @@ def _ask(prompt_obj: Any) -> Any:
     try:
         answer = prompt_obj.ask()
     except KeyboardInterrupt as e:
-        raise CancelledByUser("cancelled") from e
+        raise UserCancelledError("cancelled") from e
     if answer is None:
-        raise CancelledByUser("cancelled")
+        raise UserCancelledError("cancelled")
     return answer
 
+
 CONFIG_DIR = Path("./tui-configs")
 
 
@@ -130,7 +131,7 @@ def build_postprocess_config() -> str:
         qmark=QMARK,
     ).ask()
     if strategy is None:
-        raise CancelledByUser("cancelled")
+        raise UserCancelledError("cancelled")
     table_handling = questionary.select(
         "Table handling",
         choices=["single_row", "multi_rows", "keep_whole", "none"],
@@ -139,7 +140,7 @@ def build_postprocess_config() -> str:
         qmark=QMARK,
     ).ask()
     if table_handling is None:
-        raise CancelledByUser("cancelled")
+        raise UserCancelledError("cancelled")
     output_path = _prompt(
         "Output JSONL path",
         cwd_default("outputs/postprocess/results.jsonl"),
@@ -251,7 +252,7 @@ def build_process_config_wizard() -> str:
         qmark=QMARK,
     ).ask()
     if selected is None:
-        raise CancelledByUser("cancelled")
+        raise UserCancelledError("cancelled")
     if not selected:
         selected = names  # empty would mean a no-op pipeline; fall back to all
 
@@ -271,9 +272,7 @@ def build_process_config_wizard() -> str:
         sizes.append({name: value})
 
     processor_config = {
-        name: cfg
-        for name, cfg in _PROCESSOR_DEFAULT_CONFIG.items()
-        if name in selected
+        name: cfg for name, cfg in _PROCESSOR_DEFAULT_CONFIG.items() if name in selected
     }
 
     cfg = {
@@ -311,7 +310,7 @@ def _ask_module_args(pp_type: str) -> dict[str, Any]:
             qmark=QMARK,
         ).ask()
         if strategy is None:
-            raise CancelledByUser("cancelled")
+            raise UserCancelledError("cancelled")
         table_handling = questionary.select(
             "Table handling",
             choices=["single_row", "multi_rows", "keep_whole", "none"],
@@ -320,7 +319,7 @@ def _ask_module_args(pp_type: str) -> dict[str, Any]:
             qmark=QMARK,
         ).ask()
         if table_handling is None:
-            raise CancelledByUser("cancelled")
+            raise UserCancelledError("cancelled")
         return {
             "chunking_strategy": strategy,
             "table_handling": table_handling,
@@ -354,7 +353,7 @@ def build_postprocess_config_wizard() -> str:
             qmark=QMARK,
         ).ask()
         if pp_type is None:
-            raise CancelledByUser("cancelled")
+            raise UserCancelledError("cancelled")
         if pp_type == "(done)":
             break
         args = _ask_module_args(pp_type)
@@ -415,9 +414,11 @@ def build_full_pipeline_wizard() -> dict[str, str]:
             break
         _show_error_panel(process_path, err)
         if not _confirm("Retry the process step?", default=True):
-            raise CancelledByUser("cancelled")
+            raise UserCancelledError("cancelled")
 
-    console.print(section("Pipeline wizard", Text("step 2/3 — postprocess", style=ACCENT2)))
+    console.print(
+        section("Pipeline wizard", Text("step 2/3 — postprocess", style=ACCENT2))
+    )
     while True:
         pp_path = build_postprocess_config_wizard()
         err = _validate_with_spinner(pp_path, REGISTRY["postprocess"])
@@ -425,7 +426,7 @@ def build_full_pipeline_wizard() -> dict[str, str]:
             break
         _show_error_panel(pp_path, err)
         if not _confirm("Retry the postprocess step?", default=True):
-            raise CancelledByUser("cancelled")
+            raise UserCancelledError("cancelled")
 
     try:
         docs_jsonl = _postprocess_output_jsonl(pp_path)
@@ -440,7 +441,7 @@ def build_full_pipeline_wizard() -> dict[str, str]:
             break
         _show_error_panel(index_path, err)
         if not _confirm("Retry the index step?", default=True):
-            raise CancelledByUser("cancelled")
+            raise UserCancelledError("cancelled")
 
     return {"process": process_path, "postprocess": pp_path, "index": index_path}
 
@@ -557,7 +558,7 @@ def pick_or_build_config(
             qmark=QMARK,
         ).ask()
         if choice is None:
-            raise CancelledByUser("cancelled")
+            raise UserCancelledError("cancelled")
 
         path: Optional[str] = None
 
@@ -579,7 +580,7 @@ def pick_or_build_config(
                     qmark=QMARK,
                 ).ask()
                 if picked is None:
-                    raise CancelledByUser("cancelled")
+                    raise UserCancelledError("cancelled")
                 path = picked
 
         if choice == "manual":
@@ -608,4 +609,4 @@ def pick_or_build_config(
             return path
         _show_error_panel(path, err)
         if not _confirm("Try a different config?", default=True):
-            raise CancelledByUser("cancelled")
+            raise UserCancelledError("cancelled")
diff --git a/src/mmore/tui/exceptions.py b/src/mmore/tui/exceptions.py
index 905d0d25..eb310dae 100644
--- a/src/mmore/tui/exceptions.py
+++ b/src/mmore/tui/exceptions.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 
-class CancelledByUser(Exception):
+class UserCancelledError(Exception):
     """Raised when the user cancels a sub-flow (Ctrl-C or Esc inside a prompt).
 
     Caught by the top-level menu loop so cancellation returns to the main menu
diff --git a/src/mmore/tui/pipeline.py b/src/mmore/tui/pipeline.py
index d460c651..ded98048 100644
--- a/src/mmore/tui/pipeline.py
+++ b/src/mmore/tui/pipeline.py
@@ -83,9 +83,7 @@ def _summary_table(rows: list[tuple[str, str, float]]) -> Table:
     return table
 
 
-def run_pipeline_with_configs(
-    process_cfg: str, pp_cfg: str, index_cfg: str
-) -> None:
+def run_pipeline_with_configs(process_cfg: str, pp_cfg: str, index_cfg: str) -> None:
     """Execute the three stages given already-built YAML paths."""
     console.print()
     console.print(

From 9d996fa8068f9beb28ec2eab3cb757413dcc96a2 Mon Sep 17 00:00:00 2001
From: Arthur PERRIN <arthur.perrin@telecom-sudparis.eu>
Date: Tue, 12 May 2026 12:20:13 +0200
Subject: [PATCH 05/24] fix: adding wizard config for single command

---
 src/mmore/tui/config_builder.py | 147 ++++++++++++++++++++++++++++++++
 1 file changed, 147 insertions(+)

diff --git a/src/mmore/tui/config_builder.py b/src/mmore/tui/config_builder.py
index 09e57e58..69aacba2 100644
--- a/src/mmore/tui/config_builder.py
+++ b/src/mmore/tui/config_builder.py
@@ -186,10 +186,157 @@ def build_index_config(documents_path: Optional[str] = None) -> str:
     return _save("index", cfg)
 
 
+def build_rag_config() -> str:
+    """Wizard for `rag` / `retrieve` / `ragcli` configs."""
+    llm_name = _prompt("LLM name", "OpenMeditron/meditron3-8b")
+    max_new_tokens_raw = _prompt("Max new tokens", "1200")
+    try:
+        max_new_tokens = int(max_new_tokens_raw)
+    except ValueError:
+        max_new_tokens = 1200
+
+    db_uri = _prompt(
+        "DB URI (Milvus Lite file or server URL)", cwd_default("proc_demo.db")
+    )
+    db_name = _prompt("DB name", "my_db")
+    collection = _prompt("Collection name", "my_docs")
+    k_raw = _prompt("Number of docs to retrieve (k)", "5")
+    try:
+        k = int(k_raw)
+    except ValueError:
+        k = 5
+    hybrid_raw = _prompt("Hybrid search weight (0.0 dense — 1.0 sparse)", "0.5")
+    try:
+        hybrid = float(hybrid_raw)
+    except ValueError:
+        hybrid = 0.5
+    use_web = _confirm("Augment retrieval with web search?", default=False)
+    reranker = _prompt("Reranker model (blank to skip)", "BAAI/bge-reranker-base")
+
+    mode = questionary.select(
+        "Run mode",
+        choices=["local", "api"],
+        default="local",
+        style=QSTYLE,
+        qmark=QMARK,
+    ).ask()
+    if mode is None:
+        raise UserCancelledError("cancelled")
+
+    cfg: dict[str, Any] = {
+        "rag": {
+            "llm": {"llm_name": llm_name, "max_new_tokens": max_new_tokens},
+            "retriever": {
+                "db": {"uri": db_uri, "name": db_name},
+                "hybrid_search_weight": hybrid,
+                "k": k,
+                "collection_name": collection,
+                "use_web": use_web,
+                "reranker_model_name": reranker or None,
+            },
+            "system_prompt": (
+                "Use the following context to answer the questions.\n\n"
+                "Context:\n{context}"
+            ),
+        },
+        "mode": mode,
+    }
+    if mode == "local":
+        input_file = _prompt(
+            "Queries JSONL path", cwd_default("examples/rag/queries.jsonl")
+        )
+        output_file = _prompt(
+            "Output JSON path", cwd_default("outputs/rag/output.json")
+        )
+        cfg["mode_args"] = {"input_file": input_file, "output_file": output_file}
+    else:
+        port_raw = _prompt("API port", "8000")
+        try:
+            port = int(port_raw)
+        except ValueError:
+            port = 8000
+        cfg["mode_args"] = {
+            "endpoint": "/rag",
+            "host": "0.0.0.0",
+            "port": port,
+        }
+    return _save("rag", cfg)
+
+
+def build_websearch_config() -> str:
+    """Wizard for `websearch` configs."""
+    use_rag = _confirm("Combine web search with RAG?", default=True)
+    rag_path = ""
+    if use_rag:
+        rag_path = _prompt(
+            "Path to a RAG config YAML",
+            cwd_default("examples/rag/config.yaml"),
+        )
+    llm_name = _prompt("LLM name", "OpenMeditron/meditron3-8b")
+    max_new_tokens_raw = _prompt("Max new tokens", "1200")
+    try:
+        max_new_tokens = int(max_new_tokens_raw)
+    except ValueError:
+        max_new_tokens = 1200
+    input_queries = _prompt(
+        "Input queries JSONL", cwd_default("examples/rag/queries.jsonl")
+    )
+    output_file = _prompt(
+        "Output JSON path",
+        cwd_default("outputs/websearch/enhanced_results.json"),
+    )
+    n_subqueries_raw = _prompt("Number of sub-queries per question", "2")
+    try:
+        n_subqueries = int(n_subqueries_raw)
+    except ValueError:
+        n_subqueries = 2
+    max_searches_raw = _prompt("Max searches per query", "5")
+    try:
+        max_searches = int(max_searches_raw)
+    except ValueError:
+        max_searches = 5
+    provider = questionary.select(
+        "Search provider",
+        choices=["duckduckgo"],
+        default="duckduckgo",
+        style=QSTYLE,
+        qmark=QMARK,
+    ).ask()
+    if provider is None:
+        raise UserCancelledError("cancelled")
+
+    cfg: dict[str, Any] = {
+        "websearch": {
+            "use_rag": use_rag,
+            "rag_config_path": rag_path,
+            "use_summary": True,
+            "n_subqueries": n_subqueries,
+            "input_queries": input_queries,
+            "output_file": output_file,
+            "n_loops": 2,
+            "max_searches": max_searches,
+            "search_provider": provider,
+            "max_retries": 3,
+            "max_context_tokens": 2048,
+            "fast_tokenizer": False,
+            "mode": "local",
+            "llm_config": {
+                "llm_name": llm_name,
+                "max_new_tokens": max_new_tokens,
+            },
+        }
+    }
+    return _save("websearch", cfg)
+
+
 BUILDERS = {
     "process": build_process_config,
     "postprocess": build_postprocess_config,
     "index": build_index_config,
+    "rag": build_rag_config,
+    "retrieve": build_rag_config,
+    "ragcli": build_rag_config,
+    "websearch": build_websearch_config,
 }
 
 

From c303eee2870365cc111862da40418a46eb46583e Mon Sep 17 00:00:00 2001
From: Arthur PERRIN <arthur.perrin@telecom-sudparis.eu>
Date: Tue, 12 May 2026 12:22:46 +0200
Subject: [PATCH 06/24] docs: for_devs updated

---
 .../developer_documentation/for_devs.md       | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/docs/source/developer_documentation/for_devs.md b/docs/source/developer_documentation/for_devs.md
index ecd179c4..5d9949ff 100644
--- a/docs/source/developer_documentation/for_devs.md
+++ b/docs/source/developer_documentation/for_devs.md
@@ -31,6 +31,7 @@ This guide will help you set up your development environment and contribute to t
     - [Writing tests](#writing-tests)
   - [🔀 Pull Request Process](#-pull-request-process)
     - [PR checklist](#pr-checklist)
+  - [🖥️ Interactive TUI](#️-interactive-tui)
   - [💡 Development tips](#-development-tips)
     - [Working with `uv`](#working-with-uv)
   - [❓ Questions](#-questions)
@@ -256,6 +257,25 @@ def test_something_on_gpu():
 - [ ] Examples are provided for new features
 - [ ] Commit messages are clear and descriptive
 
+## 🖥️ Interactive TUI
+
+MMORE ships with a Terminal UI that wraps the CLI commands behind guided menus and config wizards. Useful for trying the pipeline without writing YAML by hand.
+
+Launch it from a project working directory:
+
+```bash
+mmore tui
+```
+
+From the main menu you can:
+
+- **Run a single command** — pick any stage (`process`, `postprocess`, `index`, `retrieve`, `rag`, `ragcli`, `websearch`), then either select an existing YAML, generate one through a guided wizard, or type a path manually. Generated configs are written to `./tui-configs/` and validated against the stage's dataclass before running.
+- **Run full pipeline** — chains `process → postprocess → index` using existing configs.
+- **Build a full pipeline config (guided wizard)** — walks through the three stages in order, wiring the postprocess output JSONL into the index config automatically.
+- **Chat with indexed documents** — shortcut to `ragcli`.
+
+Stages whose extras are missing are disabled in the menu with an install hint (e.g. `uv sync --extra rag --extra cpu`). Press `Ctrl-C` inside any sub-flow to cancel back to the main menu; press it again at the main menu to quit.
+
 ## 💡 Development tips
 
 ### Working with `uv`

From 61b773e27863537ba474df330e49150b7b25a720 Mon Sep 17 00:00:00 2001
From: Arthur PERRIN <arthur.perrin@telecom-sudparis.eu>
Date: Tue, 12 May 2026 13:18:40 +0200
Subject: [PATCH 07/24] feat(tui): config preview, JSONL inspector, incremental
 resume, $EDITOR edit - Add post-validation menu (preview / edit / run) when
 picking a config:   syntax-highlighted YAML preview via rich.Syntax, and
 $EDITOR launch   with automatic re-validation on save. - New inspector module
 (src/mmore/tui/inspector.py) that streams JSONL   output files and prints a
 summary table (doc count, processor types,   file types, avg text length,
 modalities) plus a sample of the first 3   documents. Called automatically
 after process and postprocess steps. - Wizard builders now detect existing
 output files and propose resuming   via previous_results instead of always
 writing null, leveraging the   existing incremental.py module for skipping
 unchanged files. - Add "Edit an existing YAML in $EDITOR" choice to
 pick_or_build_config.

---
 src/mmore/tui/config_builder.py | 132 ++++++++++++++++++++++++++------
 src/mmore/tui/inspector.py      | 126 ++++++++++++++++++++++++++++++
 src/mmore/tui/pipeline.py       |   5 ++
 3 files changed, 238 insertions(+), 25 deletions(-)
 create mode 100644 src/mmore/tui/inspector.py

diff --git a/src/mmore/tui/config_builder.py b/src/mmore/tui/config_builder.py
index 69aacba2..8fa6e875 100644
--- a/src/mmore/tui/config_builder.py
+++ b/src/mmore/tui/config_builder.py
@@ -9,6 +9,7 @@
 from __future__ import annotations
 
 import os
+import subprocess
 import time
 from pathlib import Path
 from typing import Any, Optional
@@ -18,12 +19,13 @@
 from rich.live import Live
 from rich.panel import Panel
 from rich.spinner import Spinner
+from rich.syntax import Syntax
 from rich.text import Text
 
 from mmore.tui.commands import CommandSpec
 from mmore.tui.exceptions import UserCancelledError
 from mmore.tui.paths import cwd_default, repo_root, resolve_example
-from mmore.tui.theme import ACCENT2, QMARK, QSTYLE, console, section
+from mmore.tui.theme import ACCENT, ACCENT2, QMARK, QSTYLE, console, section
 
 
 def _ask(prompt_obj: Any) -> Any:
@@ -62,6 +64,58 @@ def _save(name: str, data: dict[str, Any]) -> str:
     return str(path)
 
 
+def _preview_config(path: str) -> None:
+    """Display a YAML file with syntax highlighting."""
+    content = Path(path).read_text()
+    console.print(
+        Panel(
+            Syntax(content, "yaml", theme="monokai", line_numbers=True),
+            title=f"[bold]{path}[/bold]",
+            border_style=ACCENT,
+            padding=(1, 2),
+        )
+    )
+
+
+def _edit_config(path: str) -> None:
+    """Open a config file in $EDITOR (falls back to vi)."""
+    editor = os.environ.get("EDITOR", "vi")
+    subprocess.call([editor, path])
+
+
+def _post_validation_menu(path: str, spec: CommandSpec) -> str:
+    """After validation, let the user preview, edit, or run the config.
+
+    Returns the (potentially re-validated) path.
+    """
+    while True:
+        action = _ask(
+            questionary.select(
+                "What next?",
+                choices=[
+                    questionary.Choice("▶  Run with this config", value="run"),
+                    questionary.Choice("👁  Preview config", value="preview"),
+                    questionary.Choice("✎  Edit in $EDITOR", value="edit"),
+                ],
+                default="▶  Run with this config",
+                style=QSTYLE,
+                qmark=QMARK,
+            )
+        )
+        if action == "run":
+            return path
+        if action == "preview":
+            _preview_config(path)
+            continue
+        if action == "edit":
+            _edit_config(path)
+            err = _validate_with_spinner(path, spec)
+            if err:
+                _show_error_panel(path, err)
+            continue
+    return path  # unreachable but keeps mypy happy
+
+
 def build_process_config() -> str:
     data_path = _prompt(
         "Data path (folder with documents to process)",
@@ -422,10 +476,19 @@ def build_process_config_wizard() -> str:
         name: cfg for name, cfg in _PROCESSOR_DEFAULT_CONFIG.items() if name in selected
     }
 
+    # Incremental resume: detect previous results
+    previous_results = None
+    prev_path = os.path.join(output_path, "merged", "merged_results.jsonl")
+    if os.path.exists(prev_path) and _confirm(
+        f"Previous results found at {prev_path}. Resume (skip unchanged files)?",
+        default=True,
+    ):
+        previous_results = prev_path
+
     cfg = {
         "data_path": data_path,
         "google_drive_ids": [],
-        "previous_results": None,
+        "previous_results": previous_results,
         "dispatcher_config": {
             "output_path": output_path,
             "use_fast_processors": use_fast,
@@ -510,8 +573,22 @@ def build_postprocess_config_wizard() -> str:
         "Output JSONL path",
         cwd_default("outputs/postprocess/results.jsonl"),
     )
+
+    # Incremental resume: detect previous results
+    previous_results = None
+    # Resolve the actual JSONL path (dir → dir/final.jsonl, .jsonl → as-is)
+    if output_path.endswith(".jsonl"):
+        pp_prev_path = output_path
+    else:
+        pp_prev_path = os.path.join(output_path, "final.jsonl")
+    if os.path.exists(pp_prev_path) and _confirm(
+        f"Previous results found at {pp_prev_path}. Resume (skip unchanged)?",
+        default=True,
+    ):
+        previous_results = pp_prev_path
+
     cfg = {
-        "previous_results": None,
+        "previous_results": previous_results,
         "pp_modules": modules,
         "output": {"output_path": output_path, "save_each_step": True},
     }
@@ -694,22 +771,25 @@ def pick_or_build_config(
     on failure rather than letting the run blow up later.
     """
     while True:
-        choice = questionary.select(
-            f"Config for `{spec.name}`?",
-            choices=[
-                questionary.Choice("📂 Pick existing YAML", value="pick"),
-                questionary.Choice("✨ Generate new YAML (guided)", value="build"),
-                questionary.Choice("⌨  Type a path manually", value="manual"),
-            ],
-            style=QSTYLE,
-            qmark=QMARK,
-        ).ask()
-        if choice is None:
-            raise UserCancelledError("cancelled")
+        choice = _ask(
+            questionary.select(
+                f"Config for `{spec.name}`?",
+                choices=[
+                    questionary.Choice("📂 Pick existing YAML", value="pick"),
+                    questionary.Choice("✨ Generate new YAML (guided)", value="build"),
+                    questionary.Choice(
+                        "✎  Edit an existing YAML in $EDITOR", value="edit"
+                    ),
+                    questionary.Choice("⌨  Type a path manually", value="manual"),
+                ],
+                style=QSTYLE,
+                qmark=QMARK,
+            )
+        )
 
         path: Optional[str] = None
 
-        if choice == "pick":
+        if choice in ("pick", "edit"):
             candidates = find_yaml_configs(spec)
             ranked = _ranked_choices(spec, candidates)
             if not ranked:
@@ -720,15 +800,17 @@ def pick_or_build_config(
                 )
                 choice = "manual"
             else:
-                picked = questionary.select(
-                    f"Select a config for `{spec.name}`",
-                    choices=ranked,
-                    style=QSTYLE,
-                    qmark=QMARK,
-                ).ask()
-                if picked is None:
-                    raise UserCancelledError("cancelled")
+                picked = _ask(
+                    questionary.select(
+                        f"Select a config for `{spec.name}`",
+                        choices=ranked,
+                        style=QSTYLE,
+                        qmark=QMARK,
+                    )
+                )
                 path = picked
+                if choice == "edit":
+                    _edit_config(path)
 
         if choice == "manual":
             manual = _prompt("Path to YAML config")
@@ -753,7 +835,7 @@ def pick_or_build_config(
         assert path is not None
         err = _validate_yaml(path, spec)
         if err is None:
-            return path
+            return _post_validation_menu(path, spec)
         _show_error_panel(path, err)
         if not _confirm("Try a different config?", default=True):
             raise UserCancelledError("cancelled")
diff --git a/src/mmore/tui/inspector.py b/src/mmore/tui/inspector.py
new file mode 100644
index 00000000..2d0dd033
--- /dev/null
+++ b/src/mmore/tui/inspector.py
@@ -0,0 +1,126 @@
+"""Lightweight JSONL inspector for TUI result previews.
+
+Streams the file line-by-line (no heavy imports like torch/transformers)
+and prints a rich summary table + sample documents.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from collections import Counter
+from pathlib import Path
+from typing import Any
+
+from rich.panel import Panel
+from rich.table import Table
+from rich.text import Text
+
+from mmore.tui.theme import ACCENT, ACCENT2, MUTED, console
+
+
+def _iter_dicts(path: str):
+    """Yield raw dicts from a JSONL file without importing MultimodalSample."""
+    with open(path) as f:
+        for line in f:
+            line = line.strip()
+            if line:
+                yield json.loads(line)
+
+
+def inspect_jsonl(path: str, max_samples: int = 3) -> None:
+    """Print a summary of a JSONL file: counts, breakdowns, sample docs."""
+    if not os.path.exists(path):
+        console.print(f"  [dim]no output file at {path}[/dim]")
+        return
+
+    total = 0
+    processor_types: Counter[str] = Counter()
+    file_extensions: Counter[str] = Counter()
+    modality_types: Counter[str] = Counter()
+    total_text_len = 0
+    samples: list[dict[str, Any]] = []
+
+    for doc in _iter_dicts(path):
+        total += 1
+
+        meta = doc.get("metadata", {})
+        pt = meta.get("processor_type", "unknown")
+        processor_types[pt] += 1
+
+        fp = meta.get("file_path", "")
+        ext = Path(fp).suffix.lower() if fp else "(none)"
+        file_extensions[ext] += 1
+
+        text = doc.get("text", "")
+        if isinstance(text, str):
+            total_text_len += len(text)
+
+        for mod in doc.get("modalities", []):
+            modality_types[mod.get("type", "unknown")] += 1
+
+        if len(samples) < max_samples:
+            samples.append(doc)
+
+    if total == 0:
+        console.print("  [dim]empty JSONL (0 documents)[/dim]")
+        return
+
+    # --- Stats table ---
+    table = Table(
+        title="[bold]Results summary[/bold]",
+        title_style=ACCENT2,
+        border_style=ACCENT,
+        header_style=f"bold {ACCENT}",
+        show_lines=False,
+        padding=(0, 2),
+    )
+    table.add_column("Metric", style="bold")
+    table.add_column("Value")
+
+    table.add_row("Total documents", str(total))
+    table.add_row("Avg text length", f"{total_text_len // total:,} chars")
+
+    if processor_types:
+        breakdown = ", ".join(f"{k}: {v}" for k, v in processor_types.most_common())
+        table.add_row("Processor types", breakdown)
+
+    if file_extensions:
+        breakdown = ", ".join(f"{k}: {v}" for k, v in file_extensions.most_common())
+        table.add_row("File types", breakdown)
+
+    if modality_types:
+        breakdown = ", ".join(f"{k}: {v}" for k, v in modality_types.most_common())
+        table.add_row("Modalities", breakdown)
+
+    console.print()
+    console.print(table)
+
+    # --- Sample documents ---
+    if samples:
+        sample_text = Text()
+        for i, doc in enumerate(samples, 1):
+            meta = doc.get("metadata", {})
+            fp = meta.get("file_path", "?")
+            pt = meta.get("processor_type", "?")
+            text = doc.get("text", "")
+            if isinstance(text, str):
+                preview = text[:200].replace("\n", " ")
+                if len(text) > 200:
+                    preview += "…"
+            else:
+                preview = "(structured content)"
+            sample_text.append(f"#{i} ", style="bold")
+            sample_text.append(f"{fp}  ")
+            sample_text.append(f"({pt})", style="dim")
+            sample_text.append("\n")
+            sample_text.append(preview + "\n\n", style=MUTED)
+
+        console.print(
+            Panel(
+                sample_text,
+                title=f"[bold]Sample documents (first {len(samples)})[/bold]",
+                border_style=ACCENT,
+                padding=(1, 2),
+            )
+        )
diff --git a/src/mmore/tui/pipeline.py b/src/mmore/tui/pipeline.py
index ded98048..d79f83ce 100644
--- a/src/mmore/tui/pipeline.py
+++ b/src/mmore/tui/pipeline.py
@@ -13,6 +13,7 @@
 
 from mmore.tui.commands import REGISTRY
 from mmore.tui.config_builder import pick_or_build_config
+from mmore.tui.inspector import inspect_jsonl
 from mmore.tui.theme import (
     ACCENT,
     ACCENT2,
@@ -104,6 +105,7 @@ def run_pipeline_with_configs(process_cfg: str, pp_cfg: str, index_cfg: str) ->
     )
     process_jsonl = _process_output_jsonl(process_cfg)
     rows.append(("process", process_jsonl, elapsed))
+    inspect_jsonl(process_jsonl)
 
     step_header(2, 3, "postprocess")
     elapsed = _run_step(
@@ -114,6 +116,7 @@ def run_pipeline_with_configs(process_cfg: str, pp_cfg: str, index_cfg: str) ->
     )
     pp_jsonl = _postprocess_output_jsonl(pp_cfg)
     rows.append(("postprocess", pp_jsonl, elapsed))
+    inspect_jsonl(pp_jsonl)
 
     step_header(3, 3, "index")
     elapsed = _run_step(
@@ -154,6 +157,7 @@ def run_full_pipeline() -> None:
     )
     process_jsonl = _process_output_jsonl(process_cfg)
     rows.append(("process", process_jsonl, elapsed))
+    inspect_jsonl(process_jsonl)
 
     step_header(2, 3, "postprocess")
     pp_cfg = pick_or_build_config(REGISTRY["postprocess"])
@@ -165,6 +169,7 @@ def run_full_pipeline() -> None:
     )
     pp_jsonl = _postprocess_output_jsonl(pp_cfg)
     rows.append(("postprocess", pp_jsonl, elapsed))
+    inspect_jsonl(pp_jsonl)
 
     step_header(3, 3, "index")
     index_cfg = pick_or_build_config(REGISTRY["index"], documents_path=pp_jsonl)

From f521702ceccf794901f4b924bfbf3f52d4a6b957 Mon Sep 17 00:00:00 2001
From: Mathieu <mathieu.bonnet@telecom-sudparis.eu>
Date: Tue, 12 May 2026 16:38:36 +0200
Subject: [PATCH 08/24] disable commands when pipeline extras are missing | big
 fix

---
 src/mmore/tui/app.py            | 79 ++++++++++++++++++++++++++++-----
 src/mmore/tui/config_builder.py | 18 +++++---
 src/mmore/tui/theme.py          |  1 +
 3 files changed, 83 insertions(+), 15 deletions(-)

diff --git a/src/mmore/tui/app.py b/src/mmore/tui/app.py
index 47d3af96..4a5b33e0 100644
--- a/src/mmore/tui/app.py
+++ b/src/mmore/tui/app.py
@@ -45,6 +45,39 @@ def _show_missing_extras(spec_name: str, hint: str) -> None:
     )
 
 
+def _missing_extras_notice() -> Panel | None:
+    """One-line-per-install-command notice — kept compact so the banner stays visible."""
+    install_to_stages: dict[str, list[str]] = {}
+    for name, spec in REGISTRY.items():
+        hint = check_stage_available(spec)
+        if hint and "Install with: " in hint:
+            cmd = hint.split("Install with: ", 1)[1].strip()
+            install_to_stages.setdefault(cmd, []).append(name)
+
+    if not install_to_stages:
+        return None
+
+    body = Text()
+    for i, (cmd, stages) in enumerate(install_to_stages.items()):
+        if i > 0:
+            body.append("\n")
+        body.append(", ".join(stages), style="bold white")
+        body.append("  →  ", style="yellow")
+        body.append(cmd, style="cyan")
+
+    return Panel(
+        body,
+        title="[bold yellow]⚠  missing extras[/]",
+        border_style="yellow",
+        padding=(0, 1),
+    )
+
+
+def _disabled_label(label: str) -> str:
+    """Prefix a menu label so its disabled state is immediately readable."""
+    return f"⚠  {label}"
+
+
 def _run_with_spinner(label: str, fn, **kwargs) -> None:
     start = time.time()
     spinner = Spinner("dots", text=Text(f"  {label}…", style=ACCENT))
@@ -55,14 +88,29 @@ def _run_with_spinner(label: str, fn, **kwargs) -> None:
 
 def _run_single_command() -> None:
     choices = []
+    enabled_count = 0
     for spec in REGISTRY.values():
         hint = check_stage_available(spec)
         label = f"{spec.name:<12} — {spec.description}"
         if hint:
-            label += "  [dim](extras missing)[/dim]"
-            choices.append(questionary.Choice(label, value=spec.name, disabled=hint))
+            choices.append(
+                questionary.Choice(
+                    _disabled_label(label), value=spec.name, disabled="missing extras"
+                )
+            )
         else:
             choices.append(questionary.Choice(label, value=spec.name))
+            enabled_count += 1
+
+    # questionary crashes ("InquirerControl has no attribute 'pointed_at'") when
+    # every choice is disabled because it can't pick an initial pointer. Bail
+    # out with a clear notice instead.
+    if enabled_count == 0:
+        notice = _missing_extras_notice()
+        if notice is not None:
+            console.print(notice)
+        return
+
     name = questionary.select(
         "Pick a command",
         choices=choices,
@@ -149,24 +197,35 @@ def _pipeline_hint() -> str | None:
 
 
 def _main_menu() -> str | None:
+    notice = _missing_extras_notice()
+    if notice is not None:
+        console.print(notice)
+
     pipeline_hint = _pipeline_hint()
     chat_hint = check_stage_available(REGISTRY["ragcli"])
+    # The wizard validates each generated YAML against the stage's dataclass,
+    # which transitively imports torch / transformers / etc. — so it needs the
+    # same extras as the full pipeline. Reuse `_pipeline_hint()` to stay aligned.
+    wizard_hint = _pipeline_hint()
+
+    pipeline_label = "🚀 Run full pipeline  (process → postprocess → index)"
+    wizard_label = "🧙  Build a full pipeline config (guided wizard)"
+    chat_label = "💬 Chat with indexed documents"
 
     pipeline_choice = questionary.Choice(
-        "🚀 Run full pipeline  (process → postprocess → index)"
-        + ("  [dim](extras missing)[/dim]" if pipeline_hint else ""),
+        _disabled_label(pipeline_label) if pipeline_hint else pipeline_label,
         value="pipeline",
-        disabled=pipeline_hint,
+        disabled="missing extras" if pipeline_hint else None,
     )
     wizard_choice = questionary.Choice(
-        "🧙  Build a full pipeline config (guided wizard)",
+        _disabled_label(wizard_label) if wizard_hint else wizard_label,
         value="wizard",
-    )  # wizard only writes YAML, no heavy imports needed
+        disabled="missing extras" if wizard_hint else None,
+    )
     chat_choice = questionary.Choice(
-        "💬 Chat with indexed documents"
-        + ("  [dim](extras missing)[/dim]" if chat_hint else ""),
+        _disabled_label(chat_label) if chat_hint else chat_label,
         value="chat",
-        disabled=chat_hint,
+        disabled="missing extras" if chat_hint else None,
     )
 
     return questionary.select(
diff --git a/src/mmore/tui/config_builder.py b/src/mmore/tui/config_builder.py
index 8fa6e875..37eaddc4 100644
--- a/src/mmore/tui/config_builder.py
+++ b/src/mmore/tui/config_builder.py
@@ -97,7 +97,7 @@ def _post_validation_menu(path: str, spec: CommandSpec) -> str:
                     questionary.Choice("👁  Preview config", value="preview"),
                     questionary.Choice("✎  Edit in $EDITOR", value="edit"),
                 ],
-                default="▶  Run with this config",
+                default="run",
                 style=QSTYLE,
                 qmark=QMARK,
             )
@@ -503,11 +503,19 @@ def build_process_config_wizard() -> str:
 
 
 def _postprocessor_choices() -> list[str]:
-    """Enumerate every post-processor `type` string the loader accepts."""
-    from mmore.process.post_processor.filter import FILTER_TYPES
-    from mmore.process.post_processor.tagger import TAGGER_TYPES
+    """Enumerate every post-processor `type` string the loader accepts.
 
-    return ["chunker", "ner", "translator", "metafuse", *TAGGER_TYPES, *FILTER_TYPES]
+    The wizard is reachable without the `process` extra installed (it only
+    writes YAML), so we fall back to the core set if the extra modules are
+    missing instead of crashing mid-wizard with an ImportError.
+    """
+    base = ["chunker", "ner", "translator", "metafuse"]
+    try:
+        from mmore.process.post_processor.filter import FILTER_TYPES
+        from mmore.process.post_processor.tagger import TAGGER_TYPES
+    except ImportError:
+        return base
+    return [*base, *TAGGER_TYPES, *FILTER_TYPES]
 
 
 def _ask_module_args(pp_type: str) -> dict[str, Any]:
diff --git a/src/mmore/tui/theme.py b/src/mmore/tui/theme.py
index d719c351..b3710278 100644
--- a/src/mmore/tui/theme.py
+++ b/src/mmore/tui/theme.py
@@ -19,6 +19,7 @@
         ("highlighted", "fg:#5fd7ff bold"),
         ("selected", "fg:#ff5fd7"),
         ("instruction", "fg:#808080 italic"),
+        ("disabled", "fg:#ffaf00 italic"),
     ]
 )
 QMARK = "▸"

From 0d78172cb20b19cc349e859b59319b5b0baf70bb Mon Sep 17 00:00:00 2001
From: Arthur PERRIN <arthur.perrin@telecom-sudparis.eu>
Date: Wed, 13 May 2026 23:29:22 +0200
Subject: [PATCH 09/24] fix(tui): address Copilot review comments

- Use time.time_ns() for config filenames to avoid collisions
- Support $EDITOR with flags (e.g. "code -w") via shlex.split
- Use _validate_with_spinner in pick_or_build_config to show feedback
  during slow dataclass imports
- Expand ~ and env vars on manual path input (expanduser/expandvars)
- Replace cwd_default("examples/...") with resolve_example() so
  defaults resolve correctly from any CWD
- Narrow ImportError catch in cli.py to ModuleNotFoundError for
  expected TUI deps only, re-raise other import errors
- Fix paths.py docstring to match actual implementation (no
  importlib.resources fallback)
---
 src/mmore/cli.py                | 13 +++++++------
 src/mmore/tui/config_builder.py | 19 ++++++++++++-------
 src/mmore/tui/paths.py          |  8 +++-----
 3 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/src/mmore/cli.py b/src/mmore/cli.py
index 080b4be9..d6333a69 100644
--- a/src/mmore/cli.py
+++ b/src/mmore/cli.py
@@ -270,12 +270,13 @@ def tui():
     """Launch the interactive Terminal UI."""
     try:
         from .tui import run
-    except ImportError as e:
-        click.echo(
-            f"TUI dependencies missing ({e.name or e}). "
-            "Install with: uv sync --extra tui"
-        )
-        raise SystemExit(1)
+    except ModuleNotFoundError as e:
+        if e.name in ("questionary", "rich", "prompt_toolkit"):
+            click.echo(
+                f"TUI dependency missing ({e.name}). Install with: uv sync --extra tui"
+            )
+            raise SystemExit(1)
+        raise
     run()
 
 
diff --git a/src/mmore/tui/config_builder.py b/src/mmore/tui/config_builder.py
index 37eaddc4..8f38a9b1 100644
--- a/src/mmore/tui/config_builder.py
+++ b/src/mmore/tui/config_builder.py
@@ -9,6 +9,7 @@
 from __future__ import annotations
 
 import os
+import shlex
 import subprocess
 import time
 from pathlib import Path
@@ -58,7 +59,7 @@ def _confirm(question: str, default: bool = False) -> bool:
 
 def _save(name: str, data: dict[str, Any]) -> str:
     CONFIG_DIR.mkdir(parents=True, exist_ok=True)
-    path = CONFIG_DIR / f"{name}-{int(time.time())}.yaml"
+    path = CONFIG_DIR / f"{name}-{time.time_ns()}.yaml"
     with open(path, "w") as f:
         yaml.safe_dump(data, f, sort_keys=False)
     return str(path)
@@ -78,9 +79,12 @@ def _preview_config(path: str) -> None:
 
 
 def _edit_config(path: str) -> None:
-    """Open a config file in $EDITOR (falls back to vi)."""
+    """Open a config file in $EDITOR (falls back to vi).
+
+    Supports editors with flags like ``EDITOR="code -w"`` via shlex.split.
+    """
     editor = os.environ.get("EDITOR", "vi")
-    subprocess.call([editor, path])
+    subprocess.call([*shlex.split(editor), path])
 
 
 def _post_validation_menu(path: str, spec: CommandSpec) -> str:
@@ -297,7 +301,7 @@ def build_rag_config() -> str:
     }
     if mode == "local":
         input_file = _prompt(
-            "Queries JSONL path", cwd_default("examples/rag/queries.jsonl")
+            "Queries JSONL path", resolve_example("examples/rag/queries.jsonl")
         )
         output_file = _prompt(
             "Output JSON path", cwd_default("outputs/rag/output.json")
@@ -324,7 +328,7 @@ def build_websearch_config() -> str:
     if use_rag:
         rag_path = _prompt(
             "Path to a RAG config YAML",
-            cwd_default("examples/rag/config.yaml"),
+            resolve_example("examples/rag/config.yaml"),
         )
     llm_name = _prompt("LLM name", "OpenMeditron/meditron3-8b")
     max_new_tokens_raw = _prompt("Max new tokens", "1200")
@@ -333,7 +337,7 @@ def build_websearch_config() -> str:
     except ValueError:
         max_new_tokens = 1200
     input_queries = _prompt(
-        "Input queries JSONL", cwd_default("examples/rag/queries.jsonl")
+        "Input queries JSONL", resolve_example("examples/rag/queries.jsonl")
     )
     output_file = _prompt(
         "Output JSON path",
@@ -822,6 +826,7 @@ def pick_or_build_config(
 
         if choice == "manual":
             manual = _prompt("Path to YAML config")
+            manual = os.path.expandvars(os.path.expanduser(manual))
             if not os.path.exists(manual):
                 _show_error_panel(manual, "file not found")
                 continue
@@ -841,7 +846,7 @@ def pick_or_build_config(
                 path = builder()
 
         assert path is not None
-        err = _validate_yaml(path, spec)
+        err = _validate_with_spinner(path, spec)
         if err is None:
             return _post_validation_menu(path, spec)
         _show_error_panel(path, err)
diff --git a/src/mmore/tui/paths.py b/src/mmore/tui/paths.py
index 17194f00..3c6233bf 100644
--- a/src/mmore/tui/paths.py
+++ b/src/mmore/tui/paths.py
@@ -1,10 +1,8 @@
-"""Locate bundled example configs regardless of CWD or install layout.
+"""Locate bundled example configs regardless of CWD.
 
 Strategy:
-- If `examples/` exists relative to CWD (source checkout), use it.
-- Else, walk up from CWD looking for a repo root that contains `examples/`.
-- Else, fall back to `importlib.resources` to read examples shipped with the
-  package (only available if the wheel actually bundles them).
+- Walk up from CWD looking for a directory that contains ``examples/``
+  (works from any subdirectory of a source checkout).
 - If nothing is found, return the original repo-relative path so error
   messages stay readable; callers handle "missing" gracefully.
 """

From bb808499a7fbd4b535737dd8eedd294a70170b86 Mon Sep 17 00:00:00 2001
From: Mathieu <mathieu.bonnet@telecom-sudparis.eu>
Date: Sun, 17 May 2026 15:00:56 +0200
Subject: [PATCH 10/24] fix(tui): expose merged_results_path helper in
 run_process + uv.lock

---
 src/mmore/run_process.py        | 14 +++++++++++---
 src/mmore/tui/config_builder.py |  4 +++-
 src/mmore/tui/pipeline.py       |  5 ++---
 uv.lock                         |  2 +-
 4 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/src/mmore/run_process.py b/src/mmore/run_process.py
index da53c62a..66484109 100644
--- a/src/mmore/run_process.py
+++ b/src/mmore/run_process.py
@@ -44,11 +44,19 @@ class ProcessInference:
     previous_results: Optional[str] = None
 
 
+def merged_results_path(output_path: str) -> str:
+    """Path where `process` writes its final merged JSONL.
+
+    Single source of truth for downstream tooling (TUI, scripts) that needs
+    to locate the JSONL produced by a `process` run from its config.
+    """
+    return os.path.join(output_path, "merged", "merged_results.jsonl")
+
+
 def _write_merged_results(output_path, reused_samples, dispatched=True):
     """Merge per-processor JSONL files and reused samples into a single output."""
-    merged_output_path = os.path.join(output_path, "merged")
-    output_file = os.path.join(merged_output_path, "merged_results.jsonl")
-    os.makedirs(merged_output_path, exist_ok=True)
+    output_file = merged_results_path(output_path)
+    os.makedirs(os.path.dirname(output_file), exist_ok=True)
 
     total_results = 0
     with open(output_file, "w") as f:
diff --git a/src/mmore/tui/config_builder.py b/src/mmore/tui/config_builder.py
index 8f38a9b1..bd4ce7ee 100644
--- a/src/mmore/tui/config_builder.py
+++ b/src/mmore/tui/config_builder.py
@@ -481,8 +481,10 @@ def build_process_config_wizard() -> str:
     }
 
     # Incremental resume: detect previous results
+    from mmore.run_process import merged_results_path
+
     previous_results = None
-    prev_path = os.path.join(output_path, "merged", "merged_results.jsonl")
+    prev_path = merged_results_path(output_path)
     if os.path.exists(prev_path) and _confirm(
         f"Previous results found at {prev_path}. Resume (skip unchanged files)?",
         default=True,
diff --git a/src/mmore/tui/pipeline.py b/src/mmore/tui/pipeline.py
index d79f83ce..65017ac0 100644
--- a/src/mmore/tui/pipeline.py
+++ b/src/mmore/tui/pipeline.py
@@ -31,12 +31,11 @@ def _process_output_jsonl(config_path: str) -> str:
     Goes through `mmore.utils.load_config` so env-var expansion ($ROOT_OUT_DIR,
     etc.) matches what the underlying command sees.
     """
-    from mmore.run_process import ProcessInference
+    from mmore.run_process import ProcessInference, merged_results_path
     from mmore.utils import load_config
 
     cfg: ProcessInference = load_config(config_path, ProcessInference)
-    out = cfg.dispatcher_config.output_path
-    return os.path.join(out, "merged", "merged_results.jsonl")
+    return merged_results_path(cfg.dispatcher_config.output_path)
 
 
 def _postprocess_output_jsonl(config_path: str) -> str:
diff --git a/uv.lock b/uv.lock
index 933ebc23..94f0d1da 100644
--- a/uv.lock
+++ b/uv.lock
@@ -3621,7 +3621,7 @@ wheels = [
 
 [[package]]
 name = "mmore"
-version = "1.2.2"
+version = "1.2.3"
 source = { editable = "." }
 dependencies = [
     { name = "click" },

From 244a972085b94a556429a76e6bd021cdd7013af9 Mon Sep 17 00:00:00 2001
From: Mathieu <mathieu.bonnet@telecom-sudparis.eu>
Date: Mon, 18 May 2026 19:07:21 +0200
Subject: [PATCH 11/24] remove spinner + expose merged_results_path helper

---
 src/mmore/tui/app.py      |  9 ++++-----
 src/mmore/tui/pipeline.py | 10 +++++-----
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/mmore/tui/app.py b/src/mmore/tui/app.py
index 4a5b33e0..1a758012 100644
--- a/src/mmore/tui/app.py
+++ b/src/mmore/tui/app.py
@@ -5,9 +5,7 @@
 import time
 
 import questionary
-from rich.live import Live
 from rich.panel import Panel
-from rich.spinner import Spinner
 from rich.text import Text
 
 from mmore.tui.commands import REGISTRY, check_stage_available
@@ -79,10 +77,11 @@ def _disabled_label(label: str) -> str:
 
 
 def _run_with_spinner(label: str, fn, **kwargs) -> None:
+    # See pipeline._run_step: heavy underlying commands log to stdout in ways
+    # that clash with a rich Live spinner. Plain prints keep output readable.
     start = time.time()
-    spinner = Spinner("dots", text=Text(f"  {label}…", style=ACCENT))
-    with Live(spinner, console=console, refresh_per_second=12, transient=True):
-        fn(**kwargs)
+    console.print(f"  [{ACCENT}]▸[/] {label}…")
+    fn(**kwargs)
     console.print(f"  [{OK}]✓[/] {label} [dim]({time.time() - start:.1f}s)[/dim]")
 
 
diff --git a/src/mmore/tui/pipeline.py b/src/mmore/tui/pipeline.py
index 65017ac0..2263ea0b 100644
--- a/src/mmore/tui/pipeline.py
+++ b/src/mmore/tui/pipeline.py
@@ -6,8 +6,6 @@
 import time
 
 import questionary
-from rich.live import Live
-from rich.spinner import Spinner
 from rich.table import Table
 from rich.text import Text
 
@@ -54,10 +52,12 @@ def _postprocess_output_jsonl(config_path: str) -> str:
 
 
 def _run_step(label: str, fn, **kwargs) -> float:
+    # No Live spinner here: run_process / run_index emit their own logs via
+    # `logging` and `click.echo`, which bypass rich.Console and clash with a
+    # refreshing spinner. Plain prints keep the output readable.
     start = time.time()
-    spinner = Spinner("dots", text=Text(f"  {label}…", style=ACCENT))
-    with Live(spinner, console=console, refresh_per_second=12, transient=True):
-        fn(**kwargs)
+    console.print(f"  [{ACCENT}]▸[/] {label}…")
+    fn(**kwargs)
     elapsed = time.time() - start
     console.print(f"  [{OK}]✓[/] {label} [dim]({elapsed:.1f}s)[/dim]")
     return elapsed

From 072901be86fb6317528001ff30384c36a271a299 Mon Sep 17 00:00:00 2001
From: Mathieu <mathieu.bonnet@telecom-sudparis.eu>
Date: Mon, 18 May 2026 19:26:55 +0200
Subject: [PATCH 12/24] warm pipeline dataclasses + factor run_step helper +
 update .gitignore

---
 .gitignore                |  1 +
 src/mmore/tui/app.py      | 40 ++++++++++++++++++++++++++++-----------
 src/mmore/tui/pipeline.py | 29 +++++++---------------------
 src/mmore/tui/theme.py    | 18 ++++++++++++++++++
 4 files changed, 55 insertions(+), 33 deletions(-)

diff --git a/.gitignore b/.gitignore
index a490b5de..eaf88e7e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -114,6 +114,7 @@ venv.bak/
 # Milvus DB
 db/
 *.db
+*.db.lock
 
 # Project files
 tmp/
diff --git a/src/mmore/tui/app.py b/src/mmore/tui/app.py
index 1a758012..69d13854 100644
--- a/src/mmore/tui/app.py
+++ b/src/mmore/tui/app.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-import time
+import threading
 
 import questionary
 from rich.panel import Panel
@@ -24,10 +24,35 @@
     QMARK,
     QSTYLE,
     console,
+    run_step,
     section,
     show_banner,
 )
 
+_PIPELINE_STAGES = ("process", "postprocess", "index")
+
+
+def _warm_pipeline_dataclasses() -> None:
+    """Pre-load process/postprocess/index dataclasses in a daemon thread.
+
+    Called when entering the wizard or full-pipeline flows, where several YAML
+    validations happen back-to-back. The import cost then overlaps with the
+    wizard's own prompts. Daemon = no impact on exit. Stages whose canary
+    imports are missing are skipped so partial installs don't crash the warm-up.
+    """
+
+    def _warm() -> None:
+        for stage in _PIPELINE_STAGES:
+            spec = REGISTRY[stage]
+            if check_stage_available(spec) is not None or spec.config_dataclass is None:
+                continue
+            try:
+                spec.config_dataclass()
+            except Exception:  # noqa: BLE001
+                pass
+
+    threading.Thread(target=_warm, daemon=True).start()
+
 
 def _show_missing_extras(spec_name: str, hint: str) -> None:
     console.print(
@@ -76,15 +101,6 @@ def _disabled_label(label: str) -> str:
     return f"⚠  {label}"
 
 
-def _run_with_spinner(label: str, fn, **kwargs) -> None:
-    # See pipeline._run_step: heavy underlying commands log to stdout in ways
-    # that clash with a rich Live spinner. Plain prints keep output readable.
-    start = time.time()
-    console.print(f"  [{ACCENT}]▸[/] {label}…")
-    fn(**kwargs)
-    console.print(f"  [{OK}]✓[/] {label} [dim]({time.time() - start:.1f}s)[/dim]")
-
-
 def _run_single_command() -> None:
     choices = []
     enabled_count = 0
@@ -149,7 +165,7 @@ def _run_single_command() -> None:
     if interactive:
         spec.run(**kwargs)
     else:
-        _run_with_spinner(spec.description, spec.run, **kwargs)
+        run_step(spec.description, spec.run, **kwargs)
     console.print(f"[{OK}]✓ {name} finished[/]")
 
 
@@ -161,6 +177,7 @@ def _chat_only() -> None:
 
 
 def _run_full_wizard() -> None:
+    _warm_pipeline_dataclasses()
     paths = build_full_pipeline_wizard()
     console.print()
     console.print(
@@ -261,6 +278,7 @@ def run() -> None:
             if mode == "single":
                 _run_single_command()
             elif mode == "pipeline":
+                _warm_pipeline_dataclasses()
                 run_full_pipeline()
             elif mode == "wizard":
                 _run_full_wizard()
diff --git a/src/mmore/tui/pipeline.py b/src/mmore/tui/pipeline.py
index 2263ea0b..025692fb 100644
--- a/src/mmore/tui/pipeline.py
+++ b/src/mmore/tui/pipeline.py
@@ -2,9 +2,6 @@
 
 from __future__ import annotations
 
-import os
-import time
-
 import questionary
 from rich.table import Table
 from rich.text import Text
@@ -16,8 +13,8 @@
     ACCENT,
     ACCENT2,
     MUTED,
-    OK,
     console,
+    run_step,
     section,
     step_header,
 )
@@ -51,18 +48,6 @@ def _postprocess_output_jsonl(config_path: str) -> str:
     return jsonl_path(cfg.output.output_path)
 
 
-def _run_step(label: str, fn, **kwargs) -> float:
-    # No Live spinner here: run_process / run_index emit their own logs via
-    # `logging` and `click.echo`, which bypass rich.Console and clash with a
-    # refreshing spinner. Plain prints keep the output readable.
-    start = time.time()
-    console.print(f"  [{ACCENT}]▸[/] {label}…")
-    fn(**kwargs)
-    elapsed = time.time() - start
-    console.print(f"  [{OK}]✓[/] {label} [dim]({elapsed:.1f}s)[/dim]")
-    return elapsed
-
-
 def _summary_table(rows: list[tuple[str, str, float]]) -> Table:
     table = Table(
         title="[bold]Pipeline summary[/bold]",
@@ -97,7 +82,7 @@ def run_pipeline_with_configs(process_cfg: str, pp_cfg: str, index_cfg: str) ->
     rows: list[tuple[str, str, float]] = []
 
     step_header(1, 3, "process")
-    elapsed = _run_step(
+    elapsed = run_step(
         "Crawling + extracting documents",
         REGISTRY["process"].run,
         config_file=process_cfg,
@@ -107,7 +92,7 @@ def run_pipeline_with_configs(process_cfg: str, pp_cfg: str, index_cfg: str) ->
     inspect_jsonl(process_jsonl)
 
     step_header(2, 3, "postprocess")
-    elapsed = _run_step(
+    elapsed = run_step(
         "Chunking + cleaning",
         REGISTRY["postprocess"].run,
         config_file=pp_cfg,
@@ -118,7 +103,7 @@ def run_pipeline_with_configs(process_cfg: str, pp_cfg: str, index_cfg: str) ->
     inspect_jsonl(pp_jsonl)
 
     step_header(3, 3, "index")
-    elapsed = _run_step(
+    elapsed = run_step(
         "Embedding + indexing into Milvus",
         REGISTRY["index"].run,
         config_file=index_cfg,
@@ -149,7 +134,7 @@ def run_full_pipeline() -> None:
 
     step_header(1, 3, "process")
     process_cfg = pick_or_build_config(REGISTRY["process"])
-    elapsed = _run_step(
+    elapsed = run_step(
         "Crawling + extracting documents",
         REGISTRY["process"].run,
         config_file=process_cfg,
@@ -160,7 +145,7 @@ def run_full_pipeline() -> None:
 
     step_header(2, 3, "postprocess")
     pp_cfg = pick_or_build_config(REGISTRY["postprocess"])
-    elapsed = _run_step(
+    elapsed = run_step(
         "Chunking + cleaning",
         REGISTRY["postprocess"].run,
         config_file=pp_cfg,
@@ -172,7 +157,7 @@ def run_full_pipeline() -> None:
 
     step_header(3, 3, "index")
     index_cfg = pick_or_build_config(REGISTRY["index"], documents_path=pp_jsonl)
-    elapsed = _run_step(
+    elapsed = run_step(
         "Embedding + indexing into Milvus",
         REGISTRY["index"].run,
         config_file=index_cfg,
diff --git a/src/mmore/tui/theme.py b/src/mmore/tui/theme.py
index b3710278..cfae578f 100644
--- a/src/mmore/tui/theme.py
+++ b/src/mmore/tui/theme.py
@@ -2,6 +2,9 @@
 
 from __future__ import annotations
 
+import time
+from typing import Any, Callable
+
 from questionary import Style
 from rich.align import Align
 from rich.console import Console, Group
@@ -77,6 +80,21 @@ def section(title: str, body: str | Text, style: str = ACCENT) -> Panel:
     )
 
 
+def run_step(label: str, fn: Callable[..., Any], **kwargs: Any) -> float:
+    """Print a start line, call fn(**kwargs), print a timed done line.
+
+    Heavy pipeline commands emit their own logs via logging/click which bypass
+    rich.Console — a Live spinner would clash with them. Plain prints keep the
+    output readable while still showing progress.
+    """
+    start = time.time()
+    console.print(f"  [{ACCENT}]▸[/] {label}…")
+    fn(**kwargs)
+    elapsed = time.time() - start
+    console.print(f"  [{OK}]✓[/] {label} [dim]({elapsed:.1f}s)[/dim]")
+    return elapsed
+
+
 def step_header(idx: int, total: int, name: str) -> None:
     bar = "─" * 4
     console.print()

From d665ee9720cdd11471cdba2d5c1cbbc63914a965 Mon Sep 17 00:00:00 2001
From: Arthur PERRIN <arthur.perrin@telecom-sudparis.eu>
Date: Tue, 19 May 2026 13:35:00 +0200
Subject: [PATCH 13/24] feat(tui): add setup wizard for deps install and .env
 generation

Add a guided setup flow accessible from the main menu that walks the
user through picking pipeline stages, selecting a compute backend
(cpu / cu126), running `uv sync` with the right extras, and generating
a .env file with the API keys / paths each stage needs. Existing .env
entries are preserved on merge, and secret values are masked in the
preview table.
---
 src/mmore/tui/app.py   |   2 +
 src/mmore/tui/setup.py | 307 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 309 insertions(+)
 create mode 100644 src/mmore/tui/setup.py

diff --git a/src/mmore/tui/app.py b/src/mmore/tui/app.py
index 69d13854..d5e54e99 100644
--- a/src/mmore/tui/app.py
+++ b/src/mmore/tui/app.py
@@ -16,6 +16,7 @@
 from mmore.tui.exceptions import UserCancelledError
 from mmore.tui.paths import cwd_default
 from mmore.tui.pipeline import run_full_pipeline, run_pipeline_with_configs
+from mmore.tui.setup import run_setup_wizard
 from mmore.tui.theme import (
     ACCENT,
     ACCENT2,
@@ -252,6 +253,7 @@ def _main_menu() -> str | None:
             wizard_choice,
             chat_choice,
             questionary.Separator(),
+            questionary.Choice("🔧 Setup (install deps + generate .env)", value="setup"),
             questionary.Choice("✕  Quit", value="quit"),
         ],
         style=QSTYLE,
diff --git a/src/mmore/tui/setup.py b/src/mmore/tui/setup.py
new file mode 100644
index 00000000..45ef2931
--- /dev/null
+++ b/src/mmore/tui/setup.py
@@ -0,0 +1,307 @@
+"""Setup wizard: install extras + generate .env in one guided flow."""
+
+from __future__ import annotations
+
+import os
+import subprocess
+import sys
+from pathlib import Path
+from typing import Any
+
+import questionary
+from rich.panel import Panel
+from rich.syntax import Syntax
+from rich.table import Table
+from rich.text import Text
+
+from mmore.tui.commands import REGISTRY, check_stage_available
+from mmore.tui.config_builder import _ask, _confirm, _prompt
+from mmore.tui.theme import ACCENT, ACCENT2, MUTED, OK, QMARK, QSTYLE, console
+
+# ---------------------------------------------------------------------------
+# Stage → extras mapping
+# ---------------------------------------------------------------------------
+
+_STAGE_EXTRAS: dict[str, list[str]] = {
+    "process": ["process"],
+    "postprocess": ["process"],
+    "index": ["index"],
+    "rag": ["rag"],
+    "ragcli": ["rag"],
+    "retrieve": ["rag", "api"],
+    "websearch": ["websearch"],
+}
+
+_COMPUTE_EXTRAS = [
+    ("cpu", "CPU-only (no CUDA)"),
+    ("cu126", "CUDA 12.6 (GPU)"),
+]
+
+# ---------------------------------------------------------------------------
+# Stage → env vars that may be needed
+# ---------------------------------------------------------------------------
+
+_STAGE_ENV_VARS: dict[str, list[tuple[str, str, str]]] = {
+    # (var_name, description, default_or_empty)
+    "process": [
+        ("ROOT_OUT_DIR", "Root output directory for processed results", ""),
+        ("ROOT_IN_DIR", "Root input directory for source documents", ""),
+    ],
+    "rag": [
+        ("OPENAI_API_KEY", "OpenAI API key (for GPT models)", ""),
+        ("ANTHROPIC_API_KEY", "Anthropic API key (for Claude models)", ""),
+        ("MISTRAL_API_KEY", "Mistral API key", ""),
+        ("COHERE_API_KEY", "Cohere API key", ""),
+        ("HF_TOKEN", "HuggingFace token (for gated models)", ""),
+    ],
+    "websearch": [
+        ("TAVILY_API_KEY", "Tavily API key (optional, DuckDuckGo used otherwise)", ""),
+    ],
+}
+
+# Aliases: ragcli and retrieve share rag's env vars
+_STAGE_ENV_VARS["ragcli"] = _STAGE_ENV_VARS["rag"]
+_STAGE_ENV_VARS["retrieve"] = _STAGE_ENV_VARS["rag"]
+
+# Profiling env vars (always available)
+_PROFILING_VARS: list[tuple[str, str, str]] = [
+    ("MMORE_PROFILING_ENABLED", "Enable profiling", "false"),
+    ("MMORE_PROFILING_OUTPUT_DIR", "Profiling output directory", "./profiling_output"),
+]
+
+
+def _detect_installed_stages() -> dict[str, bool]:
+    """Check which stages have their deps installed."""
+    return {name: check_stage_available(spec) is None for name, spec in REGISTRY.items()}
+
+
+def _pick_stages() -> list[str]:
+    """Ask the user which pipeline stages they want to use."""
+    installed = _detect_installed_stages()
+    choices = []
+    for name, spec in REGISTRY.items():
+        label = f"{name:<12} — {spec.description}"
+        if installed[name]:
+            label += "  [dim](installed)[/dim]"
+        choices.append(questionary.Choice(label, value=name, checked=not installed[name]))
+
+    selected = _ask(
+        questionary.checkbox(
+            "Which stages do you want to set up?",
+            choices=choices,
+            style=QSTYLE,
+            qmark=QMARK,
+        )
+    )
+    return selected
+
+
+def _pick_compute() -> str:
+    """Ask the user which compute backend to use."""
+    choices = [
+        questionary.Choice(f"{name:<6} — {desc}", value=name)
+        for name, desc in _COMPUTE_EXTRAS
+    ]
+    return _ask(
+        questionary.select(
+            "Compute backend",
+            choices=choices,
+            style=QSTYLE,
+            qmark=QMARK,
+        )
+    )
+
+
+def _build_uv_command(stages: list[str], compute: str) -> list[str]:
+    """Build the uv sync command from selected stages + compute."""
+    extras: set[str] = {"tui"}  # always include TUI
+    for stage in stages:
+        extras.update(_STAGE_EXTRAS.get(stage, []))
+    extras.add(compute)
+
+    cmd = [sys.executable, "-m", "uv", "sync"]
+    for extra in sorted(extras):
+        cmd.extend(["--extra", extra])
+    return cmd
+
+
+def _install_deps(stages: list[str], compute: str) -> bool:
+    """Run uv sync with the right extras. Returns True on success."""
+    cmd = _build_uv_command(stages, compute)
+    display_cmd = " ".join(cmd[2:])  # skip python -m prefix for display
+    console.print(f"\n  [bold]Running:[/] {display_cmd}\n")
+
+    result = subprocess.run(cmd, cwd=os.getcwd())
+    if result.returncode == 0:
+        console.print(f"  [{OK}]✓[/] Dependencies installed successfully")
+        return True
+    console.print("  [bold red]✗[/] Installation failed — check output above")
+    return False
+
+
+def _collect_env_vars(stages: list[str]) -> dict[str, str]:
+    """Prompt the user for env vars needed by their selected stages."""
+    seen: set[str] = set()
+    env_vars: dict[str, str] = {}
+
+    # Gather all relevant vars (deduplicated)
+    all_vars: list[tuple[str, str, str]] = []
+    for stage in stages:
+        for var in _STAGE_ENV_VARS.get(stage, []):
+            if var[0] not in seen:
+                seen.add(var[0])
+                all_vars.append(var)
+
+    if not all_vars:
+        return env_vars
+
+    console.print(
+        Panel(
+            "Set environment variables for your selected stages.\n"
+            "Leave blank to skip — you can always edit the .env file later.",
+            title="[bold]Environment variables[/bold]",
+            border_style=ACCENT,
+            padding=(1, 2),
+        )
+    )
+
+    for var_name, description, default in all_vars:
+        # Check if already set in environment
+        current = os.environ.get(var_name, "")
+        hint = f" [dim](current: {current[:20]}…)[/dim]" if current else ""
+        value = _prompt(f"{var_name} — {description}{hint}", default=current or default)
+        if value:
+            env_vars[var_name] = value
+
+    # Optionally add profiling vars
+    if _confirm("Configure profiling settings?", default=False):
+        for var_name, description, default in _PROFILING_VARS:
+            value = _prompt(f"{var_name} — {description}", default=default)
+            if value:
+                env_vars[var_name] = value
+
+    return env_vars
+
+
+def _write_dotenv(env_vars: dict[str, str], path: str = ".env") -> str:
+    """Write or merge env vars into a .env file.
+
+    Existing variables in the file are preserved; new ones are appended.
+    """
+    existing: dict[str, str] = {}
+    lines: list[str] = []
+    env_path = Path(path)
+
+    if env_path.exists():
+        raw = env_path.read_text()
+        for line in raw.splitlines():
+            stripped = line.strip()
+            if stripped and not stripped.startswith("#") and "=" in stripped:
+                key = stripped.split("=", 1)[0].strip()
+                existing[key] = line
+            lines.append(line)
+
+    # Append new vars
+    added = []
+    for key, value in env_vars.items():
+        if key in existing:
+            continue  # don't overwrite existing
+        # Quote values that contain spaces
+        if " " in value:
+            entry = f'{key}="{value}"'
+        else:
+            entry = f"{key}={value}"
+        lines.append(entry)
+        added.append(key)
+
+    if lines and not lines[-1].endswith("\n"):
+        content = "\n".join(lines) + "\n"
+    else:
+        content = "\n".join(lines)
+
+    env_path.write_text(content)
+    return str(env_path)
+
+
+def _preview_dotenv(env_vars: dict[str, str]) -> None:
+    """Show what will be written to .env."""
+    if not env_vars:
+        console.print("  [dim]No environment variables to write.[/dim]")
+        return
+
+    table = Table(
+        title="[bold].env preview[/bold]",
+        title_style=ACCENT2,
+        border_style=ACCENT,
+        show_lines=False,
+    )
+    table.add_column("Variable", style="bold")
+    table.add_column("Value", style=MUTED)
+
+    for key, value in env_vars.items():
+        # Mask API keys
+        if "KEY" in key or "TOKEN" in key:
+            display = value[:4] + "…" + value[-4:] if len(value) > 8 else "****"
+        else:
+            display = value
+        table.add_row(key, display)
+
+    console.print(table)
+
+
+def run_setup_wizard() -> None:
+    """Full setup wizard: pick stages → install deps → generate .env."""
+    console.print(
+        Panel(
+            Text(
+                "This wizard will:\n"
+                "  1. Install the right Python dependencies for your pipeline\n"
+                "  2. Generate a .env file with the required environment variables",
+            ),
+            title="[bold]Setup wizard[/bold]",
+            border_style=ACCENT2,
+            padding=(1, 2),
+        )
+    )
+
+    # Step 1: pick stages
+    stages = _pick_stages()
+    if not stages:
+        console.print("  [dim]No stages selected — nothing to do.[/dim]")
+        return
+
+    # Step 2: pick compute backend
+    compute = _pick_compute()
+
+    # Step 3: show install command and confirm
+    cmd = _build_uv_command(stages, compute)
+    display_cmd = " ".join(cmd[2:])
+    console.print(
+        Panel(
+            Text(display_cmd),
+            title="[bold]Install command[/bold]",
+            border_style=ACCENT,
+            padding=(0, 2),
+        )
+    )
+    if _confirm("Install dependencies now?", default=True):
+        if not _install_deps(stages, compute):
+            if not _confirm("Continue to .env setup despite install failure?", default=False):
+                return
+
+    # Step 4: collect env vars
+    env_vars = _collect_env_vars(stages)
+
+    # Step 5: preview and write .env
+    if env_vars:
+        _preview_dotenv(env_vars)
+        env_path = _prompt(".env file path", default=".env")
+        if _confirm(f"Write {len(env_vars)} variable(s) to {env_path}?", default=True):
+            written = _write_dotenv(env_vars, env_path)
+            console.print(f"  [{OK}]✓[/] Saved to {written}")
+        else:
+            console.print("  [dim]Skipped .env generation.[/dim]")
+    else:
+        console.print("  [dim]No environment variables needed for selected stages.[/dim]")
+
+    console.print(f"\n  [{OK}]✓ Setup complete![/] Run [bold]mmore tui[/bold] to start.\n")

From 47b8b12198200226e31422b185a7a1321a6e4002 Mon Sep 17 00:00:00 2001
From: Arthur PERRIN <arthur.perrin@telecom-sudparis.eu>
Date: Tue, 19 May 2026 14:40:57 +0200
Subject: [PATCH 14/24] style(tui): match GitHub logo colors in banner
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Outline characters in white, filled blocks in pure black, second M in
yellow — matches the m(m)ore logo. Uses hex colors to avoid terminal
themes remapping ANSI black to dark grey.
---
 src/mmore/tui/theme.py | 40 ++++++++++++++++++++++++++++++++--------
 1 file changed, 32 insertions(+), 8 deletions(-)

diff --git a/src/mmore/tui/theme.py b/src/mmore/tui/theme.py
index cfae578f..4f29b727 100644
--- a/src/mmore/tui/theme.py
+++ b/src/mmore/tui/theme.py
@@ -46,20 +46,44 @@
 """
 
 
-def _gradient(text: str, start: str = "bright_cyan", end: str = "magenta") -> Text:
-    """Cheap two-color gradient — top half ACCENT, bottom half ACCENT2."""
-    lines = text.splitlines()
-    half = max(1, len(lines) // 2)
+def _mmore_logo(text: str) -> Text:
+    """Color the banner like the mmore GitHub logo.
+
+    Strategy, per character:
+    - The second `M` (columns 12:23 of every row) is rendered fully in yellow.
+    - Elsewhere: outline characters (`╔╗╚╝═║╔╝╗`, etc.) are white and the
+      filled `█` blocks are black, giving the letters a hollow look.
+    """
+    OUTLINE = set("╔╗╚╝═║╠╣╦╩╬╔╝╗┌┐└┘─│")
     out = Text()
-    for i, line in enumerate(lines):
-        style = start if i < half else end
-        out.append(line + "\n", style=style)
+    for line in text.splitlines():
+        if not line.strip():
+            out.append(line + "\n")
+            continue
+        left = line[:12]
+        mid = line[12:23]
+        right = line[23:]
+
+        def _emit(segment: str) -> None:
+            for ch in segment:
+                if ch == "█":
+                    # explicit hex — terminal "black" often renders as dark grey
+                    out.append(ch, style="#000000")
+                elif ch in OUTLINE:
+                    out.append(ch, style="bold #ffffff")
+                else:
+                    out.append(ch)
+
+        _emit(left)
+        out.append(mid, style="bold yellow")
+        _emit(right)
+        out.append("\n")
     return out
 
 
 def show_banner(subtitle: str = "interactive launcher") -> None:
     body = Group(
-        _gradient(BANNER),
+        _mmore_logo(BANNER),
         Align.center(Text(subtitle, style=f"italic {MUTED}")),
     )
     console.print(

From ca5479d42d158d66b0c6dffc7ec901df9c667ea0 Mon Sep 17 00:00:00 2001
From: Arthur PERRIN <arthur.perrin@telecom-sudparis.eu>
Date: Tue, 19 May 2026 14:46:22 +0200
Subject: [PATCH 15/24] fix(tui): ruff lint and format

---
 src/mmore/tui/app.py   |  5 +++--
 src/mmore/tui/setup.py | 22 +++++++++++++++-------
 src/mmore/tui/theme.py |  4 ++--
 3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/src/mmore/tui/app.py b/src/mmore/tui/app.py
index d5e54e99..4f0f286c 100644
--- a/src/mmore/tui/app.py
+++ b/src/mmore/tui/app.py
@@ -16,7 +16,6 @@
 from mmore.tui.exceptions import UserCancelledError
 from mmore.tui.paths import cwd_default
 from mmore.tui.pipeline import run_full_pipeline, run_pipeline_with_configs
-from mmore.tui.setup import run_setup_wizard
 from mmore.tui.theme import (
     ACCENT,
     ACCENT2,
@@ -253,7 +252,9 @@ def _main_menu() -> str | None:
             wizard_choice,
             chat_choice,
             questionary.Separator(),
-            questionary.Choice("🔧 Setup (install deps + generate .env)", value="setup"),
+            questionary.Choice(
+                "🔧 Setup (install deps + generate .env)", value="setup"
+            ),
             questionary.Choice("✕  Quit", value="quit"),
         ],
         style=QSTYLE,
diff --git a/src/mmore/tui/setup.py b/src/mmore/tui/setup.py
index 45ef2931..cffeb224 100644
--- a/src/mmore/tui/setup.py
+++ b/src/mmore/tui/setup.py
@@ -6,11 +6,9 @@
 import subprocess
 import sys
 from pathlib import Path
-from typing import Any
 
 import questionary
 from rich.panel import Panel
-from rich.syntax import Syntax
 from rich.table import Table
 from rich.text import Text
 
@@ -72,7 +70,9 @@
 
 def _detect_installed_stages() -> dict[str, bool]:
     """Check which stages have their deps installed."""
-    return {name: check_stage_available(spec) is None for name, spec in REGISTRY.items()}
+    return {
+        name: check_stage_available(spec) is None for name, spec in REGISTRY.items()
+    }
 
 
 def _pick_stages() -> list[str]:
@@ -83,7 +83,9 @@ def _pick_stages() -> list[str]:
         label = f"{name:<12} — {spec.description}"
         if installed[name]:
             label += "  [dim](installed)[/dim]"
-        choices.append(questionary.Choice(label, value=name, checked=not installed[name]))
+        choices.append(
+            questionary.Choice(label, value=name, checked=not installed[name])
+        )
 
     selected = _ask(
         questionary.checkbox(
@@ -286,7 +288,9 @@ def run_setup_wizard() -> None:
     )
     if _confirm("Install dependencies now?", default=True):
         if not _install_deps(stages, compute):
-            if not _confirm("Continue to .env setup despite install failure?", default=False):
+            if not _confirm(
+                "Continue to .env setup despite install failure?", default=False
+            ):
                 return
 
     # Step 4: collect env vars
@@ -302,6 +306,10 @@ def run_setup_wizard() -> None:
         else:
             console.print("  [dim]Skipped .env generation.[/dim]")
     else:
-        console.print("  [dim]No environment variables needed for selected stages.[/dim]")
+        console.print(
+            "  [dim]No environment variables needed for selected stages.[/dim]"
+        )
 
-    console.print(f"\n  [{OK}]✓ Setup complete![/] Run [bold]mmore tui[/bold] to start.\n")
+    console.print(
+        f"\n  [{OK}]✓ Setup complete![/] Run [bold]mmore tui[/bold] to start.\n"
+    )
diff --git a/src/mmore/tui/theme.py b/src/mmore/tui/theme.py
index 4f29b727..2df57d15 100644
--- a/src/mmore/tui/theme.py
+++ b/src/mmore/tui/theme.py
@@ -54,7 +54,7 @@ def _mmore_logo(text: str) -> Text:
     - Elsewhere: outline characters (`╔╗╚╝═║╔╝╗`, etc.) are white and the
       filled `█` blocks are black, giving the letters a hollow look.
     """
-    OUTLINE = set("╔╗╚╝═║╠╣╦╩╬╔╝╗┌┐└┘─│")
+    outline_chars = set("╔╗╚╝═║╠╣╦╩╬╔╝╗┌┐└┘─│")
     out = Text()
     for line in text.splitlines():
         if not line.strip():
@@ -69,7 +69,7 @@ def _emit(segment: str) -> None:
                 if ch == "█":
                     # explicit hex — terminal "black" often renders as dark grey
                     out.append(ch, style="#000000")
-                elif ch in OUTLINE:
+                elif ch in outline_chars:
                     out.append(ch, style="bold #ffffff")
                 else:
                     out.append(ch)

From a7fe73fadd4d2012d3486806c73d5e463677bfd5 Mon Sep 17 00:00:00 2001
From: Mathieu <mathieu.bonnet@telecom-sudparis.eu>
Date: Fri, 22 May 2026 14:25:53 +0200
Subject: [PATCH 16/24]   center banner, centralize _ask, add int/float prompts

---
 src/mmore/tui/config_builder.py | 208 +++++++++++++++-----------------
 src/mmore/tui/paths.py          |   2 -
 src/mmore/tui/theme.py          |   2 +-
 3 files changed, 95 insertions(+), 117 deletions(-)

diff --git a/src/mmore/tui/config_builder.py b/src/mmore/tui/config_builder.py
index bd4ce7ee..6c54c6a0 100644
--- a/src/mmore/tui/config_builder.py
+++ b/src/mmore/tui/config_builder.py
@@ -57,6 +57,20 @@ def _confirm(question: str, default: bool = False) -> bool:
     )
 
 
+def _prompt_int(question: str, default: int) -> int:
+    try:
+        return int(_prompt(question, str(default)))
+    except ValueError:
+        return default
+
+
+def _prompt_float(question: str, default: float) -> float:
+    try:
+        return float(_prompt(question, str(default)))
+    except ValueError:
+        return default
+
+
 def _save(name: str, data: dict[str, Any]) -> str:
     CONFIG_DIR.mkdir(parents=True, exist_ok=True)
     path = CONFIG_DIR / f"{name}-{time.time_ns()}.yaml"
@@ -181,24 +195,24 @@ def build_process_config() -> str:
 
 
 def build_postprocess_config() -> str:
-    strategy = questionary.select(
-        "Chunking strategy",
-        choices=["sentence", "token", "word", "semantic"],
-        default="sentence",
-        style=QSTYLE,
-        qmark=QMARK,
-    ).ask()
-    if strategy is None:
-        raise UserCancelledError("cancelled")
-    table_handling = questionary.select(
-        "Table handling",
-        choices=["single_row", "multi_rows", "keep_whole", "none"],
-        default="single_row",
-        style=QSTYLE,
-        qmark=QMARK,
-    ).ask()
-    if table_handling is None:
-        raise UserCancelledError("cancelled")
+    strategy = _ask(
+        questionary.select(
+            "Chunking strategy",
+            choices=["sentence", "token", "word", "semantic"],
+            default="sentence",
+            style=QSTYLE,
+            qmark=QMARK,
+        )
+    )
+    table_handling = _ask(
+        questionary.select(
+            "Table handling",
+            choices=["single_row", "multi_rows", "keep_whole", "none"],
+            default="single_row",
+            style=QSTYLE,
+            qmark=QMARK,
+        )
+    )
     output_path = _prompt(
         "Output JSONL path",
         cwd_default("outputs/postprocess/results.jsonl"),
@@ -247,39 +261,27 @@ def build_index_config(documents_path: Optional[str] = None) -> str:
 def build_rag_config() -> str:
     """Wizard for `rag` / `retrieve` / `ragcli` configs."""
     llm_name = _prompt("LLM name", "OpenMeditron/meditron3-8b")
-    max_new_tokens_raw = _prompt("Max new tokens", "1200")
-    try:
-        max_new_tokens = int(max_new_tokens_raw)
-    except ValueError:
-        max_new_tokens = 1200
+    max_new_tokens = _prompt_int("Max new tokens", 1200)
 
     db_uri = _prompt(
         "DB URI (Milvus Lite file or server URL)", cwd_default("proc_demo.db")
     )
     db_name = _prompt("DB name", "my_db")
     collection = _prompt("Collection name", "my_docs")
-    k_raw = _prompt("Number of docs to retrieve (k)", "5")
-    try:
-        k = int(k_raw)
-    except ValueError:
-        k = 5
-    hybrid_raw = _prompt("Hybrid search weight (0.0 dense — 1.0 sparse)", "0.5")
-    try:
-        hybrid = float(hybrid_raw)
-    except ValueError:
-        hybrid = 0.5
+    k = _prompt_int("Number of docs to retrieve (k)", 5)
+    hybrid = _prompt_float("Hybrid search weight (0.0 dense — 1.0 sparse)", 0.5)
     use_web = _confirm("Augment retrieval with web search?", default=False)
     reranker = _prompt("Reranker model (blank to skip)", "BAAI/bge-reranker-base")
 
-    mode = questionary.select(
-        "Run mode",
-        choices=["local", "api"],
-        default="local",
-        style=QSTYLE,
-        qmark=QMARK,
-    ).ask()
-    if mode is None:
-        raise UserCancelledError("cancelled")
+    mode = _ask(
+        questionary.select(
+            "Run mode",
+            choices=["local", "api"],
+            default="local",
+            style=QSTYLE,
+            qmark=QMARK,
+        )
+    )
 
     cfg: dict[str, Any] = {
         "rag": {
@@ -308,11 +310,7 @@ def build_rag_config() -> str:
         )
         cfg["mode_args"] = {"input_file": input_file, "output_file": output_file}
     else:
-        port_raw = _prompt("API port", "8000")
-        try:
-            port = int(port_raw)
-        except ValueError:
-            port = 8000
+        port = _prompt_int("API port", 8000)
         cfg["mode_args"] = {
             "endpoint": "/rag",
             "host": "0.0.0.0",
@@ -331,11 +329,7 @@ def build_websearch_config() -> str:
             resolve_example("examples/rag/config.yaml"),
         )
     llm_name = _prompt("LLM name", "OpenMeditron/meditron3-8b")
-    max_new_tokens_raw = _prompt("Max new tokens", "1200")
-    try:
-        max_new_tokens = int(max_new_tokens_raw)
-    except ValueError:
-        max_new_tokens = 1200
+    max_new_tokens = _prompt_int("Max new tokens", 1200)
     input_queries = _prompt(
         "Input queries JSONL", resolve_example("examples/rag/queries.jsonl")
     )
@@ -343,25 +337,17 @@ def build_websearch_config() -> str:
         "Output JSON path",
         cwd_default("outputs/websearch/enhanced_results.json"),
     )
-    n_subqueries_raw = _prompt("Number of sub-queries per question", "2")
-    try:
-        n_subqueries = int(n_subqueries_raw)
-    except ValueError:
-        n_subqueries = 2
-    max_searches_raw = _prompt("Max searches per query", "5")
-    try:
-        max_searches = int(max_searches_raw)
-    except ValueError:
-        max_searches = 5
-    provider = questionary.select(
-        "Search provider",
-        choices=["duckduckgo"],
-        default="duckduckgo",
-        style=QSTYLE,
-        qmark=QMARK,
-    ).ask()
-    if provider is None:
-        raise UserCancelledError("cancelled")
+    n_subqueries = _prompt_int("Number of sub-queries per question", 2)
+    max_searches = _prompt_int("Max searches per query", 5)
+    provider = _ask(
+        questionary.select(
+            "Search provider",
+            choices=["duckduckgo"],
+            default="duckduckgo",
+            style=QSTYLE,
+            qmark=QMARK,
+        )
+    )
 
     cfg: dict[str, Any] = {
         "websearch": {
@@ -450,14 +436,14 @@ def build_process_config_wizard() -> str:
     extract_images = _confirm("Extract images from documents?", default=True)
 
     names = [n for n, _ in _ALL_PROCESSORS]
-    selected = questionary.checkbox(
-        "Select processors to enable",
-        choices=[questionary.Choice(n, value=n, checked=True) for n in names],
-        style=QSTYLE,
-        qmark=QMARK,
-    ).ask()
-    if selected is None:
-        raise UserCancelledError("cancelled")
+    selected = _ask(
+        questionary.checkbox(
+            "Select processors to enable",
+            choices=[questionary.Choice(n, value=n, checked=True) for n in names],
+            style=QSTYLE,
+            qmark=QMARK,
+        )
+    )
     if not selected:
         selected = names  # empty would mean a no-op pipeline; fall back to all
 
@@ -466,14 +452,7 @@ def build_process_config_wizard() -> str:
     for name, default in _ALL_PROCESSORS:
         if name not in selected:
             continue
-        if customize:
-            raw = _prompt(f"Batch size for {name}", str(default))
-            try:
-                value = int(raw)
-            except ValueError:
-                value = default
-        else:
-            value = default
+        value = _prompt_int(f"Batch size for {name}", default) if customize else default
         sizes.append({name: value})
 
     processor_config = {
@@ -526,24 +505,24 @@ def _postprocessor_choices() -> list[str]:
 
 def _ask_module_args(pp_type: str) -> dict[str, Any]:
     if pp_type == "chunker":
-        strategy = questionary.select(
-            "Chunking strategy",
-            choices=["sentence", "token", "word", "semantic"],
-            default="sentence",
-            style=QSTYLE,
-            qmark=QMARK,
-        ).ask()
-        if strategy is None:
-            raise UserCancelledError("cancelled")
-        table_handling = questionary.select(
-            "Table handling",
-            choices=["single_row", "multi_rows", "keep_whole", "none"],
-            default="single_row",
-            style=QSTYLE,
-            qmark=QMARK,
-        ).ask()
-        if table_handling is None:
-            raise UserCancelledError("cancelled")
+        strategy = _ask(
+            questionary.select(
+                "Chunking strategy",
+                choices=["sentence", "token", "word", "semantic"],
+                default="sentence",
+                style=QSTYLE,
+                qmark=QMARK,
+            )
+        )
+        table_handling = _ask(
+            questionary.select(
+                "Table handling",
+                choices=["single_row", "multi_rows", "keep_whole", "none"],
+                default="single_row",
+                style=QSTYLE,
+                qmark=QMARK,
+            )
+        )
         return {
             "chunking_strategy": strategy,
             "table_handling": table_handling,
@@ -570,14 +549,14 @@ def build_postprocess_config_wizard() -> str:
             console.print(
                 f"  [dim]current modules:[/] {', '.join(m['type'] for m in modules)}"
             )
-        pp_type = questionary.select(
-            "Add a post-processor module" if not modules else "Add another module",
-            choices=[*available, questionary.Separator(), "(done)"],
-            style=QSTYLE,
-            qmark=QMARK,
-        ).ask()
-        if pp_type is None:
-            raise UserCancelledError("cancelled")
+        pp_type = _ask(
+            questionary.select(
+                "Add a post-processor module" if not modules else "Add another module",
+                choices=[*available, questionary.Separator(), "(done)"],
+                style=QSTYLE,
+                qmark=QMARK,
+            )
+        )
         if pp_type == "(done)":
             break
         args = _ask_module_args(pp_type)
@@ -847,7 +826,8 @@ def pick_or_build_config(
             else:
                 path = builder()
 
-        assert path is not None
+        if path is None:
+            raise UserCancelledError("no config selected")
         err = _validate_with_spinner(path, spec)
         if err is None:
             return _post_validation_menu(path, spec)
diff --git a/src/mmore/tui/paths.py b/src/mmore/tui/paths.py
index 3c6233bf..cb2594b6 100644
--- a/src/mmore/tui/paths.py
+++ b/src/mmore/tui/paths.py
@@ -10,12 +10,10 @@
 from __future__ import annotations
 
 import os
-from functools import lru_cache
 from pathlib import Path
 from typing import Optional
 
 
-@lru_cache(maxsize=1)
 def repo_root() -> Optional[Path]:
     """Return a directory that contains an `examples/` folder, if any."""
     cwd = Path.cwd()
diff --git a/src/mmore/tui/theme.py b/src/mmore/tui/theme.py
index 2df57d15..4a7aeb6d 100644
--- a/src/mmore/tui/theme.py
+++ b/src/mmore/tui/theme.py
@@ -83,7 +83,7 @@ def _emit(segment: str) -> None:
 
 def show_banner(subtitle: str = "interactive launcher") -> None:
     body = Group(
-        _mmore_logo(BANNER),
+        Align.center(_mmore_logo(BANNER)),
         Align.center(Text(subtitle, style=f"italic {MUTED}")),
     )
     console.print(

From c7f67c453ba4320c991e310053c34ed9db722181 Mon Sep 17 00:00:00 2001
From: fabnemEPFL <117652591+fabnemEPFL@users.noreply.github.com>
Date: Tue, 12 May 2026 11:22:16 +0200
Subject: [PATCH 17/24] Update paper link from OpenReview to arXiv

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 02a1b4c4..85d9f84b 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@
 
 MMORE is an open-source, end-to-end pipeline to ingest, process, index, and retrieve knowledge from heterogeneous files: PDFs, Office docs, spreadsheets, emails, images, audio, video, and web pages. It standardizes content into a unified multimodal format, supports distributed CPU/GPU processing, and provides hybrid dense+sparse retrieval with an integrated RAG service (CLI, APIs). 
 
-👉 Read the paper for more details (OpenReview): [MMORE: Massive Multimodal Open RAG & Extraction](https://openreview.net/forum?id=6j1HjfIdKn)
+👉 Read the paper for more details (arXiv): [MMORE: Massive Multimodal Open RAG & Extraction](https://arxiv.org/abs/2509.11937)
 
 
 ### Documentation

From d3fd9650292e4f87aaed45908077c094b9b8eb56 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Chaverot?= <chaverotjrmy7@gmail.com>
Date: Wed, 13 May 2026 17:02:27 +0200
Subject: [PATCH 18/24] Fix tests not passing in CI (#304)

---
 pyproject.toml |  3 ++-
 uv.lock        | 19 +++++++++----------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 843fff9b..414ef7d8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -87,7 +87,8 @@ process = [
 
 index = [
     # Indexing + Retrieval (vector DB, embeddings)
-    "pymilvus[milvus-lite]==2.6.6",
+    "pymilvus==2.6.6",
+    "milvus-lite==2.5.1",
     "pymilvus-model>=0.3.2",
     "milvus-model>=0.2.12",
     "langchain-milvus>=0.1.8",
diff --git a/uv.lock b/uv.lock
index 94f0d1da..8f887fea 100644
--- a/uv.lock
+++ b/uv.lock
@@ -3575,7 +3575,7 @@ name = "milvus-lite"
 version = "2.5.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "tqdm", marker = "python_full_version < '3.11' or sys_platform != 'win32' or (extra == 'extra-5-mmore-cpu' and extra == 'extra-5-mmore-cu126')" },
+    { name = "tqdm" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/a9/b2/acc5024c8e8b6a0b034670b8e8af306ebd633ede777dcbf557eac4785937/milvus_lite-2.5.1-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:6b014453200ba977be37ba660cb2d021030375fa6a35bc53c2e1d92980a0c512", size = 27934713, upload-time = "2025-06-30T04:23:37.028Z" },
@@ -3676,13 +3676,14 @@ all = [
     { name = "markdown" },
     { name = "markdownify" },
     { name = "marker-pdf" },
+    { name = "milvus-lite" },
     { name = "milvus-model" },
     { name = "motor" },
     { name = "moviepy" },
     { name = "nltk" },
     { name = "openpyxl" },
     { name = "py7zr" },
-    { name = "pymilvus", extra = ["milvus-lite"] },
+    { name = "pymilvus" },
     { name = "pymilvus-model" },
     { name = "pymongo" },
     { name = "pymupdf" },
@@ -3734,8 +3735,9 @@ dev = [
 ]
 index = [
     { name = "langchain-milvus" },
+    { name = "milvus-lite" },
     { name = "milvus-model" },
-    { name = "pymilvus", extra = ["milvus-lite"] },
+    { name = "pymilvus" },
     { name = "pymilvus-model" },
     { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-5-mmore-cpu' and extra == 'extra-5-mmore-cu126')" },
     { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-5-mmore-cpu' and extra == 'extra-5-mmore-cu126')" },
@@ -3790,9 +3792,10 @@ rag = [
     { name = "langchain-milvus" },
     { name = "langchain-mistralai" },
     { name = "langchain-openai" },
+    { name = "milvus-lite" },
     { name = "milvus-model" },
     { name = "nltk" },
-    { name = "pymilvus", extra = ["milvus-lite"] },
+    { name = "pymilvus" },
     { name = "pymilvus-model" },
     { name = "ragas" },
     { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-5-mmore-cpu' and extra == 'extra-5-mmore-cu126')" },
@@ -3848,6 +3851,7 @@ requires-dist = [
     { name = "markdown", marker = "extra == 'process'", specifier = ">=3.5" },
     { name = "markdownify", marker = "extra == 'process'", specifier = ">=0.12" },
     { name = "marker-pdf", marker = "extra == 'process'", specifier = ">=1.6" },
+    { name = "milvus-lite", marker = "extra == 'index'", specifier = "==2.5.1" },
     { name = "milvus-model", marker = "extra == 'index'", specifier = ">=0.2.12" },
     { name = "mmore", extras = ["index"], marker = "extra == 'rag'" },
     { name = "mmore", extras = ["process", "rag", "api", "websearch", "tui"], marker = "extra == 'all'" },
@@ -3861,7 +3865,7 @@ requires-dist = [
     { name = "pillow" },
     { name = "py7zr", marker = "extra == 'process'", specifier = ">=0.22" },
     { name = "pydantic", specifier = ">=2.6" },
-    { name = "pymilvus", extras = ["milvus-lite"], marker = "extra == 'index'", specifier = "==2.6.6" },
+    { name = "pymilvus", marker = "extra == 'index'", specifier = "==2.6.6" },
     { name = "pymilvus-model", marker = "extra == 'index'", specifier = ">=0.3.2" },
     { name = "pymongo", marker = "extra == 'api'", specifier = ">=4.6" },
     { name = "pymupdf", marker = "extra == 'process'" },
@@ -6478,11 +6482,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e5/ab/890c3e258c09981a4df875fe762166b92111fc1f9fb1e646025ebe3acb1b/pymilvus-2.6.6-py3-none-any.whl", hash = "sha256:0e61daa573b0025650f072493cb978a9ada9cdb1d450594707592174b1f297c0", size = 285098, upload-time = "2025-12-30T09:11:27.099Z" },
 ]
 
-[package.optional-dependencies]
-milvus-lite = [
-    { name = "milvus-lite", marker = "sys_platform != 'win32' or (extra == 'extra-5-mmore-cpu' and extra == 'extra-5-mmore-cu126')" },
-]
-
 [[package]]
 name = "pymilvus-model"
 version = "0.3.2"

From c5415a2a6e8a29af81641fd78e4661e790a0bbee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Chaverot?= <chaverotjrmy7@gmail.com>
Date: Tue, 19 May 2026 14:43:06 +0200
Subject: [PATCH 19/24] Fix consumed file ID when upload fails (#299)

---
 src/mmore/run_index_api.py       | 53 +++++++++++++++++++++-------
 tests/test_live_retriever_api.py | 60 ++++++++++++++++++++++++++++++++
 2 files changed, 100 insertions(+), 13 deletions(-)

diff --git a/src/mmore/run_index_api.py b/src/mmore/run_index_api.py
index 732cc16a..972e2d8a 100644
--- a/src/mmore/run_index_api.py
+++ b/src/mmore/run_index_api.py
@@ -87,16 +87,28 @@ async def upload_file(
 
                 await file.close()
 
+                # Process and index the file
+                file_extension = FilePath(file.filename).suffix.lower()
+                try:
+                    documents = process_files_default(
+                        temp_dir, COLLECTION_NAME, [file_extension]
+                    )
+                except KeyError as e:
+                    logger.warning(
+                        "Could not process file '%s' with extension '%s'",
+                        file.filename,
+                        file_extension,
+                        exc_info=True,
+                    )
+                    raise HTTPException(
+                        status_code=422,
+                        detail=f"Could not process file '{file.filename}'",
+                    ) from e
+
                 # Save a permanent copy for later retrieval
                 os.makedirs(os.path.dirname(file_storage_path), exist_ok=True)
                 shutil.copy2(temp_file_path, file_storage_path)
 
-                # Process and index the file
-                file_extension = FilePath(file.filename).suffix.lower()
-                documents = process_files_default(
-                    temp_dir, COLLECTION_NAME, [file_extension]
-                )
-
                 for doc in documents:
                     defDocId = doc.document_id
                     doc.document_id = fileId
@@ -147,6 +159,7 @@ async def upload_files(
             with tempfile.TemporaryDirectory() as temp_dir:
                 logging.info(f"Starting to process {len(files)} files with custom IDs")
 
+                temp_paths: List[FilePath] = []
                 for file, file_id in zip(files, listIds):
                     if file.filename is None:
                         raise HTTPException(
@@ -163,12 +176,10 @@ async def upload_files(
                         )
 
                     # Save to temp directory
-                    file_name = FilePath(temp_dir) / file.filename
+                    file_name = FilePath(temp_dir) / f"{file_id}_{file.filename}"
                     with file_name.open("wb") as buffer:
                         shutil.copyfileobj(file.file, buffer)
-
-                    # Save a permanent copy
-                    shutil.copy2(file_name, file_storage_path)
+                    temp_paths.append(file_name)
 
                     # Close the file
                     await file.close()
@@ -179,9 +190,25 @@ async def upload_files(
                 file_extensions = [
                     FilePath(cast(str, file.filename)).suffix.lower() for file in files
                 ]
-                documents = process_files_default(
-                    temp_dir, COLLECTION_NAME, file_extensions
-                )
+                try:
+                    documents = process_files_default(
+                        temp_dir, COLLECTION_NAME, file_extensions
+                    )
+                except KeyError as e:
+                    logger.warning(
+                        "Could not process one of the uploaded files with extensions %s",
+                        file_extensions,
+                        exc_info=True,
+                    )
+                    raise HTTPException(
+                        status_code=422,
+                        detail="Could not process one of the uploaded files",
+                    ) from e
+
+                # Save permanent copies
+                for temp_path, file_id in zip(temp_paths, listIds):
+                    file_storage_path = FilePath(UPLOAD_DIR) / file_id
+                    shutil.copy2(temp_path, file_storage_path)
 
                 # Change the IDs to match the ones from the client
                 modified_documents = []
diff --git a/tests/test_live_retriever_api.py b/tests/test_live_retriever_api.py
index 1db80d6d..f779caf0 100644
--- a/tests/test_live_retriever_api.py
+++ b/tests/test_live_retriever_api.py
@@ -406,6 +406,32 @@ def test_upload_duplicate_file_returns_400(indexer_client):
     assert "already exists" in response.json()["detail"]
 
 
+def test_upload_failed_processing_does_not_consume_id(indexer_client):
+    tc, upload_dir, _ = indexer_client
+    file_id = "id"
+
+    response = tc.post(
+        "/v1/files",
+        data={"fileId": file_id},
+        files={"file": ("file.xyz", b"bad", "application/octet-stream")},
+    )
+    assert response.status_code == 422
+    assert not Path(upload_dir, file_id).exists()
+
+    fake_path = str(Path(upload_dir) / "good.txt")
+    with patch(
+        "mmore.run_index_api.process_files_default",
+        return_value=[_fake_doc(fake_path, file_id)],
+    ):
+        response = tc.post(
+            "/v1/files",
+            data={"fileId": file_id},
+            files={"file": ("file.txt", b"good", "text/plain")},
+        )
+    assert response.status_code == 201
+    assert Path(upload_dir, file_id).read_bytes() == b"good"
+
+
 # ---------------------------------------------------------------------------
 # POST /v1/files/bulk
 # ---------------------------------------------------------------------------
@@ -450,6 +476,40 @@ def test_upload_bulk_mismatched_ids_returns_400(indexer_client):
     assert "doesn't match" in response.json()["detail"]
 
 
+def test_upload_bulk_failed_processing_does_not_consume_ids(indexer_client):
+    tc, upload_dir, _ = indexer_client
+    ids = ["id-1", "id-2"]
+
+    response = tc.post(
+        "/v1/files/bulk",
+        data={"listIds": ",".join(ids)},
+        files=[
+            ("files", ("file.xyz", b"bad A", "application/octet-stream")),
+            ("files", ("file.xyz", b"bad B", "application/octet-stream")),
+        ],
+    )
+    assert response.status_code == 422
+    for file_id in ids:
+        assert not Path(upload_dir, file_id).exists()
+
+    fake_paths = [str(Path(upload_dir) / f"{i}_file.txt") for i in ids]
+    with patch(
+        "mmore.run_index_api.process_files_default",
+        return_value=[_fake_doc(p, i) for p, i in zip(fake_paths, ids)],
+    ):
+        response = tc.post(
+            "/v1/files/bulk",
+            data={"listIds": ",".join(ids)},
+            files=[
+                ("files", ("file.txt", b"good A", "text/plain")),
+                ("files", ("file.txt", b"good B", "text/plain")),
+            ],
+        )
+    assert response.status_code == 201
+    assert Path(upload_dir, ids[0]).read_bytes() == b"good A"
+    assert Path(upload_dir, ids[1]).read_bytes() == b"good B"
+
+
 # ---------------------------------------------------------------------------
 # PUT /v1/files/{fileId}
 # ---------------------------------------------------------------------------

From ca412b1753061aaf6133fb76112c9745c8610756 Mon Sep 17 00:00:00 2001
From: fabnemEPFL <117652591+fabnemEPFL@users.noreply.github.com>
Date: Tue, 19 May 2026 17:53:20 +0200
Subject: [PATCH 20/24] Fix #288 (#307)

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Copilot <copilot@github.com>
---
 src/mmore/run_index_api.py       | 106 ++++++++++++----
 tests/test_live_retriever_api.py | 212 ++++++++++++++++++++++++++-----
 2 files changed, 262 insertions(+), 56 deletions(-)

diff --git a/src/mmore/run_index_api.py b/src/mmore/run_index_api.py
index 972e2d8a..a6dacf76 100644
--- a/src/mmore/run_index_api.py
+++ b/src/mmore/run_index_api.py
@@ -4,7 +4,7 @@
 import shutil
 import tempfile
 from pathlib import Path as FilePath
-from typing import List, cast
+from typing import List
 
 import uvicorn
 from fastapi import APIRouter, FastAPI, File, Form, HTTPException, Path, UploadFile
@@ -23,6 +23,7 @@
 
 from .process.processors import register_all_processors
 from .rag.retriever import RetrieverConfig
+from .type import MultimodalSample
 from .utils import get_indexer, load_config, process_files_default
 
 UPLOAD_DIR: str = "./uploads"
@@ -34,6 +35,18 @@
 logger = logging.getLogger(__name__)
 
 
+def _apply_uploaded_file_metadata(
+    documents: List[MultimodalSample], file_id: str, filename: str
+) -> None:
+    """Bind processed chunks to the API file ID and persist the original filename."""
+    for doc in documents:
+        chunk_id = doc.id.rsplit("+")[1] if "+" in doc.id else None
+        doc.document_id = file_id
+        doc.id = f"{file_id}+{chunk_id}" if chunk_id else file_id
+
+        doc.metadata.extra["filename"] = filename
+
+
 def make_router(config_path: str) -> APIRouter:
     router = APIRouter()
 
@@ -109,10 +122,13 @@ async def upload_file(
                 os.makedirs(os.path.dirname(file_storage_path), exist_ok=True)
                 shutil.copy2(temp_file_path, file_storage_path)
 
-                for doc in documents:
-                    defDocId = doc.document_id
-                    doc.document_id = fileId
-                    doc.id = doc.id.replace(defDocId, fileId)
+                # Process and index the file
+                file_extension = FilePath(file.filename).suffix.lower()
+                documents = process_files_default(
+                    temp_dir, COLLECTION_NAME, [file_extension]
+                )
+
+                _apply_uploaded_file_metadata(documents, fileId, file.filename)
 
                 # Get indexer and index the document
                 try:
@@ -148,7 +164,12 @@ async def upload_files(
         Upload multiple files with custom IDs and index them.
         """
         try:
-            listIds = listIds[0].split(",")
+            listIds = [
+                file_id.strip()
+                for ids in listIds
+                for file_id in ids.split(",")
+                if file_id.strip()
+            ]
             # Check if IDs and files match in number
             if len(listIds) != len(files):
                 raise HTTPException(
@@ -159,13 +180,15 @@ async def upload_files(
             with tempfile.TemporaryDirectory() as temp_dir:
                 logging.info(f"Starting to process {len(files)} files with custom IDs")
 
-                temp_paths: List[FilePath] = []
-                for file, file_id in zip(files, listIds):
+                uploaded_files: list[dict[str, str]] = []
+                file_info_by_temp_path = {}
+                for index, (file, file_id) in enumerate(zip(files, listIds)):
                     if file.filename is None:
                         raise HTTPException(
                             status_code=422,
                             detail=f"File {file_id} does not have a filename",
                         )
+                    filename = file.filename
 
                     # Check if file with this ID already exists
                     file_storage_path = FilePath(UPLOAD_DIR) / file_id
@@ -176,10 +199,19 @@ async def upload_files(
                         )
 
                     # Save to temp directory
-                    file_name = FilePath(temp_dir) / f"{file_id}_{file.filename}"
-                    with file_name.open("wb") as buffer:
+                    temp_file_path = (
+                        FilePath(temp_dir) / f"{index}{FilePath(filename).suffix}"
+                    )
+                    file_info = {
+                        "fileId": file_id,
+                        "filename": filename,
+                        "temp_path": str(temp_file_path.resolve()),
+                    }
+                    uploaded_files.append(file_info)
+                    file_info_by_temp_path[file_info["temp_path"]] = file_info
+
+                    with temp_file_path.open("wb") as buffer:
                         shutil.copyfileobj(file.file, buffer)
-                    temp_paths.append(file_name)
 
                     # Close the file
                     await file.close()
@@ -188,7 +220,8 @@ async def upload_files(
 
                 # Process the documents
                 file_extensions = [
-                    FilePath(cast(str, file.filename)).suffix.lower() for file in files
+                    FilePath(file_info["temp_path"]).suffix.lower()
+                    for file_info in uploaded_files
                 ]
                 try:
                     documents = process_files_default(
@@ -206,16 +239,34 @@ async def upload_files(
                     ) from e
 
                 # Save permanent copies
-                for temp_path, file_id in zip(temp_paths, listIds):
-                    file_storage_path = FilePath(UPLOAD_DIR) / file_id
-                    shutil.copy2(temp_path, file_storage_path)
+                for file_info in uploaded_files:
+                    file_storage_path = FilePath(UPLOAD_DIR) / file_info["fileId"]
+                    shutil.copy2(file_info["temp_path"], file_storage_path)
 
                 # Change the IDs to match the ones from the client
                 modified_documents = []
-                for doc, docId in zip(documents, listIds):
-                    defDocId = doc.document_id
-                    doc.document_id = docId
-                    doc.id = doc.id.replace(defDocId, docId)
+                text_by_file_id = {}
+                chunks_by_file_id = {
+                    file_info["fileId"]: 0 for file_info in uploaded_files
+                }
+                for doc_index, doc in enumerate(documents):
+                    doc_temp_path = str(FilePath(doc.metadata.file_path).resolve())
+                    file_info = file_info_by_temp_path.get(doc_temp_path)
+                    if file_info is None:
+                        if doc_index >= len(uploaded_files):
+                            raise HTTPException(
+                                status_code=500,
+                                detail=(
+                                    "Could not match processed document "
+                                    f"{doc.metadata.file_path} to an uploaded file"
+                                ),
+                            )
+                        # Fallback for processors/tests that return file paths outside temp_dir.
+                        file_info = uploaded_files[doc_index]
+                    doc_id = file_info["fileId"]
+                    _apply_uploaded_file_metadata([doc], doc_id, file_info["filename"])
+                    text_by_file_id.setdefault(doc_id, doc.text)
+                    chunks_by_file_id[doc_id] += 1
                     modified_documents.append(doc)
 
                 logging.info("Indexing the files")
@@ -232,10 +283,16 @@ async def upload_files(
 
                 return {
                     "status": "success",
-                    "message": f"Successfully processed and indexed {len(modified_documents)} documents",
+                    "message": f"Successfully processed and indexed {len(uploaded_files)} files",
                     "documents": [
-                        {"fileId": doc.document_id, "text": doc.text[:50] + "..."}
-                        for doc in modified_documents
+                        {
+                            "fileId": file_info["fileId"],
+                            "filename": file_info["filename"],
+                            "text": text_by_file_id.get(file_info["fileId"], "")[:50]
+                            + "...",
+                            "chunks": chunks_by_file_id[file_info["fileId"]],
+                        }
+                        for file_info in uploaded_files
                     ],
                 }
 
@@ -284,9 +341,8 @@ async def update_file(
                     temp_dir, COLLECTION_NAME, [file_extension]
                 )
 
-                # Set the custom ID
-                for doc in documents:
-                    doc.id = fileId
+                # Set the custom ID and preserve the original upload filename
+                _apply_uploaded_file_metadata(documents, fileId, file.filename)
 
                 # Get indexer and reindex the document
                 try:
diff --git a/tests/test_live_retriever_api.py b/tests/test_live_retriever_api.py
index f779caf0..f812c0ee 100644
--- a/tests/test_live_retriever_api.py
+++ b/tests/test_live_retriever_api.py
@@ -18,9 +18,14 @@
 
 from mmore.index.indexer import Indexer
 from mmore.rag.model import DenseModelConfig, SparseModelConfig
-from mmore.run_index_api import make_router as make_index_router
+from mmore.run_index_api import (
+    _apply_uploaded_file_metadata,
+)
+from mmore.run_index_api import (
+    make_router as make_index_router,
+)
 from mmore.run_retriever import make_router, save_results
-from mmore.type import MultimodalSample
+from mmore.type import DocumentMetadata, MultimodalSample
 
 _COLLECTION = "my_docs"
 
@@ -216,13 +221,16 @@ def test_save_results_writes_valid_json(tmp_path):
     docs = [
         Document(
             page_content="Paris is the capital.",
-            metadata={
-                "rank": 1,
-                "similarity": 0.9,
-                "id": "1",
-                "page_numbers": [],
-                "paragraph_numbers": [],
-            },
+            metadata=DocumentMetadata(
+                file_path="paris.txt",
+                extra={
+                    "rank": 1,
+                    "similarity": 0.9,
+                    "id": "1",
+                    "page_numbers": [],
+                    "paragraph_numbers": [],
+                },
+            ).to_dict(),
         )
     ]
     results = [docs]
@@ -246,25 +254,31 @@ def test_save_results_multiple_queries(tmp_path):
         [
             Document(
                 page_content="doc A",
-                metadata={
-                    "rank": 1,
-                    "similarity": 0.8,
-                    "id": "a",
-                    "page_numbers": [],
-                    "paragraph_numbers": [],
-                },
+                metadata=DocumentMetadata(
+                    file_path="doc-a.txt",
+                    extra={
+                        "rank": 1,
+                        "similarity": 0.8,
+                        "id": "a",
+                        "page_numbers": [],
+                        "paragraph_numbers": [],
+                    },
+                ).to_dict(),
             )
         ],
         [
             Document(
                 page_content="doc B",
-                metadata={
-                    "rank": 1,
-                    "similarity": 0.7,
-                    "id": "b",
-                    "page_numbers": [],
-                    "paragraph_numbers": [],
-                },
+                metadata=DocumentMetadata(
+                    file_path="doc-b.txt",
+                    extra={
+                        "rank": 1,
+                        "similarity": 0.7,
+                        "id": "b",
+                        "page_numbers": [],
+                        "paragraph_numbers": [],
+                    },
+                ).to_dict(),
             )
         ],
     ]
@@ -294,10 +308,21 @@ def _fake_doc(file_path: str, document_id: str = "doc") -> MultimodalSample:
         document_id=document_id,
         text="Test document content.",
         modalities=[],
-        metadata={"file_path": file_path},
+        metadata=DocumentMetadata(file_path=file_path),
     )
 
 
+def test_apply_uploaded_file_metadata_preserves_chunk_suffix():
+    doc = _fake_doc("/tmp/original-name.txt", document_id="default-doc")
+    doc.id = "processor-generated-id+7"
+
+    _apply_uploaded_file_metadata([doc], "client-doc", "original-name.txt")
+
+    assert doc.document_id == "client-doc"
+    assert doc.id == "client-doc+7"
+    assert doc.metadata.extra["filename"] == "original-name.txt"
+
+
 @pytest.fixture(scope="module")
 def indexer_client(tmp_path_factory):
     """Builds the indexer FastAPI app."""
@@ -392,6 +417,81 @@ def test_upload_file_success(indexer_client):
     assert Path(upload_dir, "new-doc").exists()
 
 
+def test_uploaded_file_has_filename_in_list_files(tmp_path):
+    upload_dir = tmp_path / "uploads"
+    upload_dir.mkdir()
+    db_path = str(tmp_path / "uploaded_list_files.db")
+    config_file = tmp_path / "config.yaml"
+    cfg = {
+        "db": {"uri": db_path, "name": "my_db"},
+        "hybrid_search_weight": 0.5,
+        "k": 2,
+        "collection_name": _COLLECTION,
+        "use_web": False,
+        "reranker_model_name": None,
+    }
+    with open(config_file, "w") as f:
+        yaml.dump(cfg, f)
+
+    with ExitStack() as stack:
+        stack.enter_context(
+            patch(
+                "mmore.index.indexer.SparseModel.from_config",
+                return_value=FakeSparseEmbedding(),
+            )
+        )
+        milvus_client = MilvusClient(db_path, enable_sparse=True)
+        the_indexer = Indexer(
+            dense_model_config=DenseModelConfig(model_name="debug"),
+            sparse_model_config=SparseModelConfig(
+                model_name="naver/splade-cocondenser-selfdistil"
+            ),
+            client=milvus_client,
+        )
+        stack.enter_context(patch("mmore.run_index_api.UPLOAD_DIR", str(upload_dir)))
+        stack.enter_context(patch("mmore.run_index_api.register_all_processors"))
+        stack.enter_context(
+            patch("mmore.run_index_api.get_indexer", return_value=the_indexer)
+        )
+
+        index_app = FastAPI()
+        index_app.include_router(make_index_router(str(config_file)))
+        index_client = TestClient(index_app, raise_server_exceptions=False)
+
+        uploaded_path = str(upload_dir / "listed-doc.txt")
+        stack.enter_context(
+            patch(
+                "mmore.run_index_api.process_files_default",
+                return_value=[_fake_doc(uploaded_path)],
+            )
+        )
+        response = index_client.post(
+            "/v1/files",
+            data={"fileId": "listed-doc"},
+            files={"file": ("listed-doc.txt", b"Hello list files", "text/plain")},
+        )
+        assert response.status_code == 201
+
+        stack.enter_context(
+            patch(
+                "mmore.rag.retriever.SparseModel.from_config",
+                return_value=FakeSparseEmbedding(),
+            )
+        )
+        retriever_app = FastAPI()
+        retriever_app.include_router(make_router(str(config_file)))
+        retriever_client = TestClient(retriever_app)
+
+        response = retriever_client.get(
+            "/list_files", params={"collection_name": _COLLECTION}
+        )
+
+    assert response.status_code == 200
+    files_by_id = {file["id"]: file["filename"] for file in response.json()}
+    assert files_by_id["listed-doc"] == "listed-doc.txt"
+    assert files_by_id["listed-doc"] != "Unknown"
+
+
 def test_upload_duplicate_file_returns_400(indexer_client):
     tc, upload_dir, _ = indexer_client
     duplicate_id = "duplicate-doc"
@@ -438,16 +538,25 @@ def test_upload_failed_processing_does_not_consume_id(indexer_client):
 
 
 def test_upload_bulk_files_success(indexer_client):
-    tc, upload_dir, _ = indexer_client
-    fake_path_1 = str(Path(upload_dir) / "bulk-1.txt")
-    fake_path_2 = str(Path(upload_dir) / "bulk-2.txt")
+    tc, *_ = indexer_client
+
+    def fake_process(temp_dir, collection_name, extensions):
+        first_path, second_path = sorted(Path(temp_dir).iterdir())
+        return [
+            _fake_doc(str(first_path), "bulk-1"),
+            MultimodalSample(
+                id="bulk-1+1",
+                document_id="bulk-1",
+                text="Second chunk from the first bulk document.",
+                modalities=[],
+                metadata=DocumentMetadata(file_path=str(first_path)),
+            ),
+            _fake_doc(str(second_path), "bulk-2"),
+        ]
 
     with patch(
         "mmore.run_index_api.process_files_default",
-        return_value=[
-            _fake_doc(fake_path_1, "bulk-1"),
-            _fake_doc(fake_path_2, "bulk-2"),
-        ],
+        side_effect=fake_process,
     ):
         response = tc.post(
             "/v1/files/bulk",
@@ -459,6 +568,47 @@ def test_upload_bulk_files_success(indexer_client):
         )
 
     assert response.status_code == 201
+    data = response.json()
+    documents_by_id = {doc["fileId"]: doc for doc in data["documents"]}
+    assert set(documents_by_id) == {"bulk-1", "bulk-2"}
+    assert documents_by_id["bulk-1"]["filename"] == "bulk-1.txt"
+    assert documents_by_id["bulk-1"]["chunks"] == 2
+    assert documents_by_id["bulk-2"]["filename"] == "bulk-2.txt"
+    assert documents_by_id["bulk-2"]["chunks"] == 1
+
+
+def test_upload_bulk_files_allows_duplicate_uploaded_filenames(indexer_client):
+    tc, upload_dir, _ = indexer_client
+
+    def fake_process(temp_dir, collection_name, extensions):
+        return [
+            _fake_doc(str(path), f"processed-{path.stem}")
+            for path in sorted(Path(temp_dir).iterdir())
+        ]
+
+    with patch(
+        "mmore.run_index_api.process_files_default",
+        side_effect=fake_process,
+    ):
+        response = tc.post(
+            "/v1/files/bulk",
+            data={"listIds": "same-name-1,same-name-2"},
+            files=[
+                ("files", ("same.txt", b"First content", "text/plain")),
+                ("files", ("same.txt", b"Second content", "text/plain")),
+            ],
+        )
+
+    assert response.status_code == 201
+    data = response.json()
+    documents_by_id = {doc["fileId"]: doc for doc in data["documents"]}
+    assert set(documents_by_id) == {"same-name-1", "same-name-2"}
+    assert documents_by_id["same-name-1"]["filename"] == "same.txt"
+    assert documents_by_id["same-name-1"]["chunks"] == 1
+    assert documents_by_id["same-name-2"]["filename"] == "same.txt"
+    assert documents_by_id["same-name-2"]["chunks"] == 1
+    assert Path(upload_dir, "same-name-1").exists()
+    assert Path(upload_dir, "same-name-2").exists()
 
 
 def test_upload_bulk_mismatched_ids_returns_400(indexer_client):

From 48606246f814352a7264abd37835aa716e40daf9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Chaverot?= <chaverotjrmy7@gmail.com>
Date: Wed, 20 May 2026 22:18:10 +0200
Subject: [PATCH 21/24] Add workflow for Pyright type check (#300)

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
Co-authored-by: fabnemEPFL <117652591+fabnemEPFL@users.noreply.github.com>
---
 .github/workflows/pyright.yml | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 .github/workflows/pyright.yml

diff --git a/.github/workflows/pyright.yml b/.github/workflows/pyright.yml
new file mode 100644
index 00000000..a0a1902b
--- /dev/null
+++ b/.github/workflows/pyright.yml
@@ -0,0 +1,34 @@
+name: 📐 Pyright type checks
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+  workflow_dispatch:
+
+jobs:
+  pyright:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Set up Python 3.12
+        uses: actions/setup-python@v6
+        with:
+          python-version: "3.12"
+
+      - name: Install uv and create venv
+        run: |
+          pipx install uv
+          uv venv .venv
+
+      - name: Install dependencies
+        run: |
+          source .venv/bin/activate
+          uv pip install -e ".[process,index,rag,api,cpu,dev,websearch]"
+
+      - name: Run Pyright
+        continue-on-error: true
+        run: |
+          source .venv/bin/activate
+          pyright

From 9f4a0bf82432c1c37f17c58d5a12870bedbcaff7 Mon Sep 17 00:00:00 2001
From: Arthur PERRIN <arthur.perrin@telecom-sudparis.eu>
Date: Thu, 28 May 2026 16:45:59 +0200
Subject: [PATCH 22/24] address JCHAVEROT review comments on TUI

- wire run_setup_wizard() in app.py dispatch (elif mode == "setup")
- add uv to tui extra in pyproject.toml so setup wizard can run uv sync
- replace .env file generation with export command hints (mmore does not
  use dotenv for secrets; exporting is simpler and safer)
- remove ({e.name}) from cli.py TUI missing-dep error message
- simplify README install snippet to uv sync --extra tui; remove
  implementation detail paragraph (already covered in for_devs.md)
---
 README.md              |  4 +-
 pyproject.toml         |  1 +
 src/mmore/cli.py       |  4 +-
 src/mmore/tui/app.py   |  4 ++
 src/mmore/tui/setup.py | 86 ++++++++++++------------------------------
 5 files changed, 32 insertions(+), 67 deletions(-)

diff --git a/README.md b/README.md
index ecf3c5d6..be8cd84f 100644
--- a/README.md
+++ b/README.md
@@ -110,7 +110,7 @@ uv pip install "mmore[process,cpu]"
 Prefer a guided experience over editing YAML by hand? Install the `tui` extra and launch the interactive Terminal UI:
 
 ```bash
-uv sync --extra tui --extra process --extra index --extra cpu
+uv sync --extra tui
 mmore tui
 ```
 
@@ -121,8 +121,6 @@ From the launcher you can:
 - generate stage YAML configs through a guided wizard,
 - pick from existing example configs without leaving the terminal.
 
-Generated configs land in `./tui-configs/` and are validated against the stage's dataclass before any run. Stages whose extras are missing are greyed out in the menu with the exact `uv sync --extra ...` command to enable them. Press `Ctrl-C` inside a sub-flow to cancel and return to the main menu; press it at the main menu to quit.
-
 ### Minimal Example
 
 You can use our predefined CLI commands to execute parts of the pipeline. Note that you might need to prepend `python -m` to the command if the package does not properly create bash aliases.
diff --git a/pyproject.toml b/pyproject.toml
index 414ef7d8..2a22fe4b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -132,6 +132,7 @@ tui = [
     # Interactive terminal launcher (`mmore tui`)
     "questionary>=2.0",
     "rich>=13",
+    "uv",
 ]
 
 all = [
diff --git a/src/mmore/cli.py b/src/mmore/cli.py
index d6333a69..1030a465 100644
--- a/src/mmore/cli.py
+++ b/src/mmore/cli.py
@@ -272,9 +272,7 @@ def tui():
         from .tui import run
     except ModuleNotFoundError as e:
         if e.name in ("questionary", "rich", "prompt_toolkit"):
-            click.echo(
-                f"TUI dependency missing ({e.name}). Install with: uv sync --extra tui"
-            )
+            click.echo("TUI dependency missing. Install with: uv sync --extra tui")
             raise SystemExit(1)
         raise
     run()
diff --git a/src/mmore/tui/app.py b/src/mmore/tui/app.py
index 4f0f286c..0621f163 100644
--- a/src/mmore/tui/app.py
+++ b/src/mmore/tui/app.py
@@ -287,6 +287,10 @@ def run() -> None:
                 _run_full_wizard()
             elif mode == "chat":
                 _chat_only()
+            elif mode == "setup":
+                from mmore.tui.setup import run_setup_wizard
+
+                run_setup_wizard()
         except (UserCancelledError, KeyboardInterrupt):
             console.print(f"[{ACCENT2}]cancelled — back to menu.[/]")
             continue
diff --git a/src/mmore/tui/setup.py b/src/mmore/tui/setup.py
index cffeb224..beea7848 100644
--- a/src/mmore/tui/setup.py
+++ b/src/mmore/tui/setup.py
@@ -1,11 +1,10 @@
-"""Setup wizard: install extras + generate .env in one guided flow."""
+"""Setup wizard: install extras + print export commands in one guided flow."""
 
 from __future__ import annotations
 
 import os
 import subprocess
 import sys
-from pathlib import Path
 
 import questionary
 from rich.panel import Panel
@@ -185,54 +184,18 @@ def _collect_env_vars(stages: list[str]) -> dict[str, str]:
     return env_vars
 
 
-def _write_dotenv(env_vars: dict[str, str], path: str = ".env") -> str:
-    """Write or merge env vars into a .env file.
+def _print_export_commands(env_vars: dict[str, str]) -> None:
+    """Print export commands for the collected env vars.
 
-    Existing variables in the file are preserved; new ones are appended.
+    Displays a table with masked values, then prints the shell commands
+    the user can copy-paste into their shell or profile file.
     """
-    existing: dict[str, str] = {}
-    lines: list[str] = []
-    env_path = Path(path)
-
-    if env_path.exists():
-        raw = env_path.read_text()
-        for line in raw.splitlines():
-            stripped = line.strip()
-            if stripped and not stripped.startswith("#") and "=" in stripped:
-                key = stripped.split("=", 1)[0].strip()
-                existing[key] = line
-            lines.append(line)
-
-    # Append new vars
-    added = []
-    for key, value in env_vars.items():
-        if key in existing:
-            continue  # don't overwrite existing
-        # Quote values that contain spaces
-        if " " in value:
-            entry = f'{key}="{value}"'
-        else:
-            entry = f"{key}={value}"
-        lines.append(entry)
-        added.append(key)
-
-    if lines and not lines[-1].endswith("\n"):
-        content = "\n".join(lines) + "\n"
-    else:
-        content = "\n".join(lines)
-
-    env_path.write_text(content)
-    return str(env_path)
-
-
-def _preview_dotenv(env_vars: dict[str, str]) -> None:
-    """Show what will be written to .env."""
     if not env_vars:
-        console.print("  [dim]No environment variables to write.[/dim]")
+        console.print("  [dim]No environment variables needed.[/dim]")
         return
 
     table = Table(
-        title="[bold].env preview[/bold]",
+        title="[bold]Environment variables[/bold]",
         title_style=ACCENT2,
         border_style=ACCENT,
         show_lines=False,
@@ -241,7 +204,7 @@ def _preview_dotenv(env_vars: dict[str, str]) -> None:
     table.add_column("Value", style=MUTED)
 
     for key, value in env_vars.items():
-        # Mask API keys
+        # Mask API keys and tokens
         if "KEY" in key or "TOKEN" in key:
             display = value[:4] + "…" + value[-4:] if len(value) > 8 else "****"
         else:
@@ -249,16 +212,28 @@ def _preview_dotenv(env_vars: dict[str, str]) -> None:
         table.add_row(key, display)
 
     console.print(table)
+    console.print()
+    console.print(
+        Panel(
+            "\n".join(
+                f'export {k}="{v}"' if " " in v else f"export {k}={v}"
+                for k, v in env_vars.items()
+            ),
+            title="[bold]Add to your shell profile (e.g. ~/.bashrc or ~/.zshrc)[/bold]",
+            border_style=ACCENT,
+            padding=(1, 2),
+        )
+    )
 
 
 def run_setup_wizard() -> None:
-    """Full setup wizard: pick stages → install deps → generate .env."""
+    """Full setup wizard: pick stages → install deps → print export commands."""
     console.print(
         Panel(
             Text(
                 "This wizard will:\n"
                 "  1. Install the right Python dependencies for your pipeline\n"
-                "  2. Generate a .env file with the required environment variables",
+                "  2. Show the environment variables you need to export",
             ),
             title="[bold]Setup wizard[/bold]",
             border_style=ACCENT2,
@@ -289,26 +264,15 @@ def run_setup_wizard() -> None:
     if _confirm("Install dependencies now?", default=True):
         if not _install_deps(stages, compute):
             if not _confirm(
-                "Continue to .env setup despite install failure?", default=False
+                "Continue to env var setup despite install failure?", default=False
             ):
                 return
 
     # Step 4: collect env vars
     env_vars = _collect_env_vars(stages)
 
-    # Step 5: preview and write .env
-    if env_vars:
-        _preview_dotenv(env_vars)
-        env_path = _prompt(".env file path", default=".env")
-        if _confirm(f"Write {len(env_vars)} variable(s) to {env_path}?", default=True):
-            written = _write_dotenv(env_vars, env_path)
-            console.print(f"  [{OK}]✓[/] Saved to {written}")
-        else:
-            console.print("  [dim]Skipped .env generation.[/dim]")
-    else:
-        console.print(
-            "  [dim]No environment variables needed for selected stages.[/dim]"
-        )
+    # Step 5: print export commands
+    _print_export_commands(env_vars)
 
     console.print(
         f"\n  [{OK}]✓ Setup complete![/] Run [bold]mmore tui[/bold] to start.\n"

From 1a68bd88eecbe9ebd3b37fcba3ed84618cffc69d Mon Sep 17 00:00:00 2001
From: ArthurPerrin <arthur.perrin2511@gmail.com>
Date: Thu, 28 May 2026 18:51:18 +0200
Subject: [PATCH 23/24] Update setup choice text in app.py

---
 src/mmore/tui/app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mmore/tui/app.py b/src/mmore/tui/app.py
index 0621f163..f97fa526 100644
--- a/src/mmore/tui/app.py
+++ b/src/mmore/tui/app.py
@@ -253,7 +253,7 @@ def _main_menu() -> str | None:
             chat_choice,
             questionary.Separator(),
             questionary.Choice(
-                "🔧 Setup (install deps + generate .env)", value="setup"
+                "🔧 Setup (install dependencies)", value="setup"
             ),
             questionary.Choice("✕  Quit", value="quit"),
         ],

From 25fcd5aa0e01bf19056d9cccb8a352e418b9df77 Mon Sep 17 00:00:00 2001
From: JCHAVEROT <chaverotjrmy7@gmail.com>
Date: Thu, 28 May 2026 19:01:46 +0200
Subject: [PATCH 24/24] chores: fix linter

---
 src/mmore/tui/app.py |  4 +---
 uv.lock              | 29 +++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/src/mmore/tui/app.py b/src/mmore/tui/app.py
index f97fa526..37186654 100644
--- a/src/mmore/tui/app.py
+++ b/src/mmore/tui/app.py
@@ -252,9 +252,7 @@ def _main_menu() -> str | None:
             wizard_choice,
             chat_choice,
             questionary.Separator(),
-            questionary.Choice(
-                "🔧 Setup (install dependencies)", value="setup"
-            ),
+            questionary.Choice("🔧 Setup (install dependencies)", value="setup"),
             questionary.Choice("✕  Quit", value="quit"),
         ],
         style=QSTYLE,
diff --git a/uv.lock b/uv.lock
index 8f887fea..1e1ff7b4 100644
--- a/uv.lock
+++ b/uv.lock
@@ -3703,6 +3703,7 @@ all = [
     { name = "trafilatura" },
     { name = "transformers" },
     { name = "unidecode" },
+    { name = "uv" },
     { name = "uvicorn" },
     { name = "xlrd" },
 ]
@@ -3806,6 +3807,7 @@ rag = [
 tui = [
     { name = "questionary" },
     { name = "rich" },
+    { name = "uv" },
 ]
 websearch = [
     { name = "ddgs" },
@@ -3902,6 +3904,7 @@ requires-dist = [
     { name = "transformers", marker = "extra == 'process'", specifier = ">=4.44" },
     { name = "typing-extensions", specifier = ">=4.15.0,<5.0" },
     { name = "unidecode", marker = "extra == 'process'" },
+    { name = "uv", marker = "extra == 'tui'" },
     { name = "uvicorn", marker = "extra == 'api'", specifier = ">=0.29" },
     { name = "validators", specifier = ">=0.28" },
     { name = "xlrd", marker = "extra == 'process'", specifier = ">=2.0.1" },
@@ -9145,6 +9148,32 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ed/d0/5bf7cbf1ac138c92b9ac21066d18faf4d7e7f651047b700eb192ca4b9fdb/uuid_utils-0.14.1-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:258186964039a8e36db10810c1ece879d229b01331e09e9030bc5dcabe231bd2", size = 364700, upload-time = "2026-02-20T22:50:21.732Z" },
 ]
 
+[[package]]
+name = "uv"
+version = "0.11.16"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a1/99/025154611a4bd97a23851574c15d73bb71ada09d35f092d6972f9ac87f70/uv-0.11.16.tar.gz", hash = "sha256:4b435fcb0af8f34833dcc1903a8a223856437efd0d515c2160a2871def221238", size = 4177038, upload-time = "2026-05-21T22:10:01.009Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/55/e3/8b8cfc802bc476c67e31a39725538193265cf3a19585b4a60c232659f919/uv-0.11.16-py3-none-linux_armv6l.whl", hash = "sha256:c9e9d9cb73ee8cd2ad696dbf1bc3232abaac363270557684b6b85a2bdb8eb276", size = 23508087, upload-time = "2026-05-21T22:10:06.227Z" },
+    { url = "https://files.pythonhosted.org/packages/45/78/d5ca91c636ac88e902b6b3ff31ad32d2d02663232d844aff871467a323d2/uv-0.11.16-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:01172238a75e42a5a55d12555cd9ec98bee24249f3645b98a4b32eb5f1ff5e43", size = 23028989, upload-time = "2026-05-21T22:09:50.127Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/26/c84580dfec5a87c36fb1218eac17c5194fa3e58e2a9232cf085d69eb6bed/uv-0.11.16-py3-none-macosx_11_0_arm64.whl", hash = "sha256:c75f9b5bac49b97131973910c220feac60fe47b10a333941b237ff0ae4b36721", size = 21572023, upload-time = "2026-05-21T22:09:58.703Z" },
+    { url = "https://files.pythonhosted.org/packages/84/68/ba2bdc64fea96ef8c9796a991f244541b65bb9d31c661b322cc724857a4e/uv-0.11.16-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:a801484f4507b6c2133e557350f3143b61b8f8b61dddb01ff7b84a74cdfab1fb", size = 23289936, upload-time = "2026-05-21T22:10:15.423Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/81/74922f693d5804a77d009338ca8dc709eff871fb60d9f2c263dede8d77d1/uv-0.11.16-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.musllinux_1_1_armv7l.whl", hash = "sha256:eb538069e768b042cf870be700a210518ce628e36d99d9a83b85acaf484d7f6a", size = 23020906, upload-time = "2026-05-21T22:10:24.242Z" },
+    { url = "https://files.pythonhosted.org/packages/60/81/cda8886f5df4dd28854a9b97bcc3ee6a7d1b5b5b23aaaccfbf1ed3e5e2bf/uv-0.11.16-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d7cdb23457a4d1bc76bf1016638ea1d1ada0e8e032f656168e933d4d17c47e72", size = 23004220, upload-time = "2026-05-21T22:10:32.847Z" },
+    { url = "https://files.pythonhosted.org/packages/98/7c/65837e07de23f0a40ab860bc6601f7c022d4bcf4b97ca79b6c35a2e72e65/uv-0.11.16-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:451327388d59ac3041cbda474296f3ceeafac5b1f645476198e7b95f504fcfd5", size = 24319651, upload-time = "2026-05-21T22:10:21.492Z" },
+    { url = "https://files.pythonhosted.org/packages/85/70/9d364542bf118433b60ed71422e47d2c8c470aca7d3aef0df9449a5f726a/uv-0.11.16-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e7992b8276149b3ffaf35ce9434702d3e16bae6ec393e99df209b870a7e19eb0", size = 25359517, upload-time = "2026-05-21T22:09:46.519Z" },
+    { url = "https://files.pythonhosted.org/packages/99/b4/650896e8cff5a3289cee860c41fd9876da83ca628c5871f9a61d5fc75c72/uv-0.11.16-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:83a8db9b3314d900e7a240105afce43f806c9e04c59ea10a40bdbdca84c6d0c5", size = 24563421, upload-time = "2026-05-21T22:10:35.82Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/7d/184711a8c02466e1486d57efdc9394ce09cbf43ee2c5794da70bd25db3fb/uv-0.11.16-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b10086165189c39c53142a0e2f34e0b8889ef681886f589ed17be45a1a774c7", size = 24676607, upload-time = "2026-05-21T22:10:39.784Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/3f/5b338df6505f77f73c20eae38cb29f57d14dba56dac835386e3dc6e2a5d6/uv-0.11.16-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:cfe1f06fb8f135a735a961065d5ee90f99cccf41749fb1f964edb5b3c3dae19b", size = 23401615, upload-time = "2026-05-21T22:10:30.124Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/f9/54bbcbc77443dc76468f09a49cc9f4f92ca49b4159a011c6010d223de4ea/uv-0.11.16-py3-none-manylinux_2_31_riscv64.musllinux_1_1_riscv64.whl", hash = "sha256:2454f80d8b548fb2e246151578809b14ad4395b3f357d738bae1af11918e91af", size = 24104468, upload-time = "2026-05-21T22:09:53.323Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/0a/b5f105514fddea5110fe3947cd18a9f199ff93dbad78e5e5a08e1b5d0ea2/uv-0.11.16-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:4249d57a563165d368050680deeb722f9c0053a0dbf3244b11cca3e6d85a3c7d", size = 24164861, upload-time = "2026-05-21T22:10:09.458Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/01/15d4ca2be7257862b077a9077ac31ce81c419f35ef7994e76356a317716b/uv-0.11.16-py3-none-musllinux_1_1_i686.whl", hash = "sha256:374c30126483ce95675c5de49e54c2454ddedb01c17b8321417fe4eb9da83406", size = 23644919, upload-time = "2026-05-21T22:10:03.129Z" },
+    { url = "https://files.pythonhosted.org/packages/49/bf/9de3e262e6ff93aec2e0a4c238857293fd2c616dd79f25bb440f126bf32c/uv-0.11.16-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:746edfc9d1d8cd03dd58739989f634d3580648048d09f81a9c68da74c4eb9d62", size = 24973746, upload-time = "2026-05-21T22:10:18.413Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/7d/f4126dce104f1b5d0b451ce3ca41c4db69b963c2e78c3465fcda6440de31/uv-0.11.16-py3-none-win32.whl", hash = "sha256:50299b20aab2d28c05ff27d781ce2af3f5af2102bc304dc07a4ad54b05e2af8a", size = 22400991, upload-time = "2026-05-21T22:10:27.119Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/38/99627cb995a03389b227ce4b12b08e770565d0aa7850cd0420973194a638/uv-0.11.16-py3-none-win_amd64.whl", hash = "sha256:e901aafa5007beffafe57bfa44e5e248d99fb5d97036a3718fd65cf9723c5cd3", size = 25067163, upload-time = "2026-05-21T22:10:12.317Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/68/3ed1c0bdfb4bec501e5cde73419b4f39c8a125ef905a85fc0f239f19eb9b/uv-0.11.16-py3-none-win_arm64.whl", hash = "sha256:d777cb29661cdfa7f90dae77406c85fb5b729bf8bc13941dc237958a1ea1ba00", size = 23502015, upload-time = "2026-05-21T22:09:56.014Z" },
+]
+
 [[package]]
 name = "uvicorn"
 version = "0.42.0"