diff --git a/README.md b/README.md index 77acee2..a606987 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ Useful extras: pip install -e .[llm] # screening and other LLM-backed workflows pip install -e .[meta] # `autonima meta` pip install -e .[readability] # enhanced HTML extraction +pip install -e .[ui] # `autonima ui` local web app pip install -e .[docs] # local docs build ``` @@ -60,6 +61,12 @@ Run meta-analysis on the generated NiMADS outputs: autonima meta runs/my_review/outputs ``` +Launch the local web UI: + +```bash +autonima ui --workspace . +``` + ## Minimal Config Example ```yaml diff --git a/autonima.egg-info/PKG-INFO b/autonima.egg-info/PKG-INFO index be5d3e6..8e331bd 100644 --- a/autonima.egg-info/PKG-INFO +++ b/autonima.egg-info/PKG-INFO @@ -54,6 +54,9 @@ Requires-Dist: mkdocs>=1.6; extra == "docs" Requires-Dist: mkdocs-material>=9.5; extra == "docs" Requires-Dist: mkdocs-click>=0.8; extra == "docs" Requires-Dist: pymdown-extensions>=10.0; extra == "docs" +Provides-Extra: ui +Requires-Dist: fastapi>=0.115; extra == "ui" +Requires-Dist: uvicorn>=0.30; extra == "ui" Dynamic: author Dynamic: classifier Dynamic: description @@ -88,6 +91,7 @@ Useful extras: pip install -e .[llm] # screening and other LLM-backed workflows pip install -e .[meta] # `autonima meta` pip install -e .[readability] # enhanced HTML extraction +pip install -e .[ui] # `autonima ui` local web app pip install -e .[docs] # local docs build ``` @@ -128,6 +132,12 @@ Run meta-analysis on the generated NiMADS outputs: autonima meta runs/my_review/outputs ``` +Launch the local web UI: + +```bash +autonima ui --workspace . +``` + ## Minimal Config Example ```yaml diff --git a/autonima.egg-info/SOURCES.txt b/autonima.egg-info/SOURCES.txt index 2df692a..5d6ad61 100644 --- a/autonima.egg-info/SOURCES.txt +++ b/autonima.egg-info/SOURCES.txt @@ -45,6 +45,15 @@ autonima/templates/sample_config.yml autonima/utils/__init__.py autonima/utils/base.py autonima/utils/criteria.py +autonima/webui/__init__.py +autonima/webui/app.py +autonima/webui/progress.py +autonima/webui/runs.py +autonima/webui/secrets.py +autonima/webui/state.py +autonima/webui/static/app.jsx +autonima/webui/static/index.html +autonima/webui/static/styles.css tests/test_annotation_incremental_caching.py tests/test_annotation_multi_analysis_validation.py tests/test_annotation_retry.py @@ -56,7 +65,9 @@ tests/test_docs.py tests/test_fulltext_incomplete_outputs.py tests/test_fulltext_loading.py tests/test_fulltext_screening.py +tests/test_llm_client.py tests/test_multi_annotation.py +tests/test_nimads_annotations.py tests/test_objective_in_prompt.py tests/test_parallel_screening.py tests/test_pipeline_retrieval.py @@ -64,4 +75,6 @@ tests/test_pipeline_stage_cutoff.py tests/test_pubmed.py tests/test_retrieval.py tests/test_screening.py -tests/test_simplified_screening.py \ No newline at end of file +tests/test_simplified_screening.py +tests/test_webui.py +tests/test_webui_api.py \ No newline at end of file diff --git a/autonima.egg-info/requires.txt b/autonima.egg-info/requires.txt index d280576..ed1e89e 100644 --- a/autonima.egg-info/requires.txt +++ b/autonima.egg-info/requires.txt @@ -35,3 +35,7 @@ nimare>=0.1.0 [readability] readabilipy>=0.2.0 + +[ui] +fastapi>=0.115 +uvicorn>=0.30 diff --git a/autonima/annotation/processor.py b/autonima/annotation/processor.py index 2f839ed..97aa492 100644 --- a/autonima/annotation/processor.py +++ b/autonima/annotation/processor.py @@ -146,8 +146,15 @@ def process_studies( self._save_results_by_study(all_decisions, output_dir, existing_cached_results) self.annotation_results = all_decisions - # Return all cached results (existing + new) - return self._load_cached_results(output_dir) or [] + # Return only results eligible for this execution. Cached decisions + # for studies that no longer pass current screening can remain on disk + # for reuse, but must not flow into current outputs. + return self._filter_cached_results_for_current_run( + self._load_cached_results(output_dir) or [], + included_studies=included_studies or [], + all_studies=all_studies or [], + all_abstract_studies=all_abstract_studies or [], + ) def _create_all_analyses_annotations( self, @@ -402,6 +409,33 @@ def _get_studies_with_complete_results( studies_with_complete_results.add(study_id) return studies_with_complete_results + + def _filter_cached_results_for_current_run( + self, + results: List[AnnotationDecision], + included_studies: List[Study], + all_studies: List[Study], + all_abstract_studies: List[Study], + ) -> List[AnnotationDecision]: + """Filter cached annotation decisions to the current eligibility graph.""" + included_ids = {study.pmid for study in included_studies} + all_ids = {study.pmid for study in all_studies} + abstract_ids = {study.pmid for study in all_abstract_studies} + custom_names = {annotation.name for annotation in self.config.annotations} + + filtered: List[AnnotationDecision] = [] + for result in results: + name = result.annotation_name + if name == "all_studies" and result.study_id in all_ids: + filtered.append(result) + elif name == "all_abstract" and result.study_id in abstract_ids: + filtered.append(result) + elif name == "all_analyses" and result.study_id in included_ids: + filtered.append(result) + elif name in custom_names and result.study_id in included_ids: + filtered.append(result) + + return filtered def _get_annotations_with_complete_results_for_study( self, diff --git a/autonima/annotation/schema.py b/autonima/annotation/schema.py index 50bd87e..7d27b43 100644 --- a/autonima/annotation/schema.py +++ b/autonima/annotation/schema.py @@ -54,6 +54,7 @@ class AnnotationDecision(BaseModel): # NEW: Track which criteria were applied inclusion_criteria_applied: List[str] = [] exclusion_criteria_applied: List[str] = [] + cache_signature: Optional[Dict[str, Any]] = None class TableMetadata(BaseModel): diff --git a/autonima/cli.py b/autonima/cli.py index 2be36b0..58284f0 100644 --- a/autonima/cli.py +++ b/autonima/cli.py @@ -1,6 +1,7 @@ """Command-line interface for Autonima.""" import asyncio +import inspect import logging import sys from pathlib import Path @@ -9,6 +10,7 @@ import click from .utils import set_debug_mode, log_error_with_debug +from .execution import CACHE_POLICIES, CLEAR_CACHE_STAGES # Set up logging logging.basicConfig( @@ -186,6 +188,9 @@ def _run_pipeline_command( num_workers: int, stop_after_stage: Literal["search", "abstract", "full"] = "full", force_reextract_incomplete_fulltext: bool = False, + cache_policy: str = "auto", + clear_cache: tuple[str, ...] = (), + copy_valid_cache_from: str | None = None, ) -> None: """Run pipeline commands that share config loading and execution logic.""" _configure_run_logging(verbose) @@ -218,14 +223,28 @@ def _run_pipeline_command( async def execute_pipeline(): run_pipeline_from_config = _get_run_pipeline_from_config() - results = await run_pipeline_from_config( - config=pipeline_config, - stop_after_stage=stop_after_stage, - num_workers=num_workers, - force_reextract_incomplete_fulltext=( + kwargs = { + "config": pipeline_config, + "stop_after_stage": stop_after_stage, + "num_workers": num_workers, + "force_reextract_incomplete_fulltext": ( force_reextract_incomplete_fulltext ), - ) + "cache_policy": cache_policy, + "clear_cache": list(clear_cache), + "copy_valid_cache_from": copy_valid_cache_from, + } + signature = inspect.signature(run_pipeline_from_config) + if not any( + parameter.kind == inspect.Parameter.VAR_KEYWORD + for parameter in signature.parameters.values() + ): + kwargs = { + key: value + for key, value in kwargs.items() + if key in signature.parameters + } + results = await run_pipeline_from_config(**kwargs) _print_pipeline_summary( results=results, pipeline_config=pipeline_config, @@ -268,6 +287,25 @@ async def execute_pipeline(): "fulltext_incomplete using current full-text files." ), ) +@click.option( + "--cache-policy", + type=click.Choice(sorted(CACHE_POLICIES)), + default="auto", + show_default=True, + help="How Autonima should treat existing cache artifacts.", +) +@click.option( + "--clear-cache", + type=click.Choice(sorted(CLEAR_CACHE_STAGES)), + multiple=True, + help="Delete selected stage cache before running. Repeat for multiple stages.", +) +@click.option( + "--copy-valid-cache-from", + type=click.Path(exists=True, file_okay=False, dir_okay=True), + default=None, + help="Copy signature-matching cache artifacts from another output folder.", +) def run( config: str, output_folder: str | None, @@ -276,6 +314,9 @@ def run( debug: bool, num_workers: int, force_reextract_incomplete_fulltext: bool, + cache_policy: str, + clear_cache: tuple[str, ...], + copy_valid_cache_from: str | None, ): """ Run the Autonima systematic review pipeline. @@ -314,6 +355,9 @@ def run( force_reextract_incomplete_fulltext=( force_reextract_incomplete_fulltext ), + cache_policy=cache_policy, + clear_cache=clear_cache, + copy_valid_cache_from=copy_valid_cache_from, ) @@ -328,6 +372,25 @@ def run( help='Enable debug mode with post-mortem debugging on errors') @click.option('--num-workers', '-j', type=int, default=1, help='Number of parallel workers for screening (default: 1)') +@click.option( + "--cache-policy", + type=click.Choice(sorted(CACHE_POLICIES)), + default="auto", + show_default=True, + help="How Autonima should treat existing cache artifacts.", +) +@click.option( + "--clear-cache", + type=click.Choice(sorted(CLEAR_CACHE_STAGES)), + multiple=True, + help="Delete selected stage cache before running. Repeat for multiple stages.", +) +@click.option( + "--copy-valid-cache-from", + type=click.Path(exists=True, file_okay=False, dir_okay=True), + default=None, + help="Copy signature-matching cache artifacts from another output folder.", +) def run_search( config: str, output_folder: str | None, @@ -335,6 +398,9 @@ def run_search( dry_run: bool, debug: bool, num_workers: int, + cache_policy: str, + clear_cache: tuple[str, ...], + copy_valid_cache_from: str | None, ): """ Run Autonima through the search stage only. @@ -352,6 +418,9 @@ def run_search( debug=debug, num_workers=num_workers, stop_after_stage="search", + cache_policy=cache_policy, + clear_cache=clear_cache, + copy_valid_cache_from=copy_valid_cache_from, ) @@ -366,6 +435,25 @@ def run_search( help='Enable debug mode with post-mortem debugging on errors') @click.option('--num-workers', '-j', type=int, default=1, help='Number of parallel workers for screening (default: 1)') +@click.option( + "--cache-policy", + type=click.Choice(sorted(CACHE_POLICIES)), + default="auto", + show_default=True, + help="How Autonima should treat existing cache artifacts.", +) +@click.option( + "--clear-cache", + type=click.Choice(sorted(CLEAR_CACHE_STAGES)), + multiple=True, + help="Delete selected stage cache before running. Repeat for multiple stages.", +) +@click.option( + "--copy-valid-cache-from", + type=click.Path(exists=True, file_okay=False, dir_okay=True), + default=None, + help="Copy signature-matching cache artifacts from another output folder.", +) def run_abstract( config: str, output_folder: str | None, @@ -373,6 +461,9 @@ def run_abstract( dry_run: bool, debug: bool, num_workers: int, + cache_policy: str, + clear_cache: tuple[str, ...], + copy_valid_cache_from: str | None, ): """ Run Autonima through abstract screening. @@ -391,6 +482,9 @@ def run_abstract( debug=debug, num_workers=num_workers, stop_after_stage="abstract", + cache_policy=cache_policy, + clear_cache=clear_cache, + copy_valid_cache_from=copy_valid_cache_from, ) @@ -604,6 +698,69 @@ def create_sample_config(): log_error_with_debug(logger, f"Failed to create sample config: {e}") +@click.command() +@click.option( + "--workspace", + type=click.Path(file_okay=False, dir_okay=True, path_type=Path), + default=None, + help="Workspace root directory for .autonima-ui state", +) +@click.option( + "--host", + type=str, + default="127.0.0.1", + show_default=True, + help="Host interface to bind the web app", +) +@click.option( + "--port", + type=int, + default=8765, + show_default=True, + help="Port to bind the web app", +) +@click.option( + "--open/--no-open", + "open_browser", + default=True, + show_default=True, + help="Open the app in a browser after launch", +) +def ui( + workspace: Path | None, + host: str, + port: int, + open_browser: bool, +): + """ + Launch the local Autonima web UI. + + This command starts a localhost FastAPI + React web app for: + - project management + - YAML spec creation and validation + - interactive run orchestration with live progress and logs + - secrets setup via ~/.autonima.env + """ + try: + from .webui import run_ui_server + + run_ui_server( + workspace=str(workspace) if workspace else None, + host=host, + port=port, + open_browser=open_browser, + ) + except ImportError as e: + log_error_with_debug( + logger, + str(e) + ) + sys.exit(1) + except Exception as e: + log_error_with_debug(logger, f"Failed to launch web UI: {e}") + sys.exit(1) + + # CLI group would be used with actual click @click.group() def cli(): @@ -617,6 +774,7 @@ def cli(): cli.add_command(validate) cli.add_command(create_sample_config) cli.add_command(meta) +cli.add_command(ui) def main(): diff --git a/autonima/config.py b/autonima/config.py index a43e464..9a906a4 100644 --- a/autonima/config.py +++ b/autonima/config.py @@ -1,5 +1,6 @@ """Configuration management and validation for Autonima.""" +from copy import deepcopy from importlib import resources import logging from pathlib import Path @@ -81,6 +82,8 @@ def load_from_dict(self, config_dict: Dict[str, Any]) -> PipelineConfig: ConfigurationError: If configuration is invalid """ try: + config_dict = self._apply_global_model_defaults(config_dict) + # Build nested configurations search_config = SearchConfig(**config_dict.get('search', {})) @@ -146,6 +149,73 @@ def load_from_dict(self, config_dict: Dict[str, Any]) -> PipelineConfig: except Exception as e: raise ConfigurationError(f"Error parsing configuration: {e}") + def _apply_global_model_defaults( + self, + config_dict: Dict[str, Any], + ) -> Dict[str, Any]: + """ + Apply defaults.model fallback to all model-related config fields. + + Behavior: + - If defaults.model is set, it is used as a fallback when stage-level + model fields are missing or blank. + - Explicit stage-level model values are preserved. + """ + config_copy = deepcopy(config_dict) + defaults = config_copy.get("defaults") + + if defaults is None: + return config_copy + if not isinstance(defaults, dict): + raise ConfigurationError("defaults section must be a mapping") + + global_model = defaults.get("model") + if global_model is None: + return config_copy + if not isinstance(global_model, str) or not global_model.strip(): + raise ConfigurationError( + "defaults.model must be a non-empty string" + ) + global_model = global_model.strip() + + def _set_default_string( + section: Dict[str, Any], + key: str, + fallback_value: str, + ) -> None: + value = section.get(key) + if isinstance(value, str) and value.strip(): + return + section[key] = fallback_value + + screening = config_copy.setdefault("screening", {}) + if not isinstance(screening, dict): + raise ConfigurationError("screening section must be a mapping") + for stage in ("abstract", "fulltext"): + stage_config = screening.setdefault(stage, {}) + if not isinstance(stage_config, dict): + raise ConfigurationError( + f"screening.{stage} must be a mapping" + ) + _set_default_string(stage_config, "model", global_model) + + annotation = config_copy.setdefault("annotation", {}) + if not isinstance(annotation, dict): + raise ConfigurationError("annotation section must be a mapping") + _set_default_string(annotation, "model", global_model) + + parsing = config_copy.setdefault("parsing", {}) + if not isinstance(parsing, dict): + raise ConfigurationError("parsing section must be a mapping") + _set_default_string(parsing, "coordinate_model", global_model) + + retrieval = config_copy.setdefault("retrieval", {}) + if not isinstance(retrieval, dict): + raise ConfigurationError("retrieval section must be a mapping") + _set_default_string(retrieval, "coordinate_model", global_model) + + return config_copy + def _validate_config(self, config: PipelineConfig) -> None: """ Validate the loaded configuration. diff --git a/autonima/execution.py b/autonima/execution.py new file mode 100644 index 0000000..7292cf3 --- /dev/null +++ b/autonima/execution.py @@ -0,0 +1,750 @@ +"""Execution provenance and cache-signature helpers.""" + +from __future__ import annotations + +import hashlib +import json +import logging +import shutil +from dataclasses import asdict, is_dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Dict, Iterable, List, Optional +from uuid import uuid4 + +import yaml + +logger = logging.getLogger(__name__) + +CACHE_POLICIES = {"auto", "ignore", "trust-legacy"} +CLEAR_CACHE_STAGES = { + "search", + "abstract", + "retrieval", + "fulltext", + "parsing", + "annotation", + "output", + "all", +} + +STAGE_ARTIFACTS: Dict[str, List[str]] = { + "search": ["search_results.json"], + "abstract": ["abstract_screening_results.json"], + "retrieval": ["fulltext_retrieval_results.json"], + "fulltext": ["fulltext_screening_results.json"], + "parsing": ["coordinate_parsing_results.json"], + "annotation": ["annotation_results.json"], + "output": [ + "final_results.json", + "nimads_studyset.json", + "missing_fulltexts.csv", + "missing_fulltexts.txt", + ], +} + +EXECUTION_PROGRESS_STATUSES = { + "pending", + "running", + "completed", + "skipped", + "failed", +} +EXECUTION_PROGRESS_SOURCES = { + "fresh", + "cache", + "not_applicable", + "unknown", +} + + +def utc_now_iso() -> str: + """Return a stable UTC timestamp string.""" + return datetime.now(timezone.utc).isoformat() + + +def _to_plain(value: Any) -> Any: + """Convert dataclasses/Pydantic models to JSON-compatible primitives.""" + if is_dataclass(value): + return _to_plain(asdict(value)) + if hasattr(value, "model_dump"): + return _to_plain(value.model_dump()) + if isinstance(value, dict): + return {str(k): _to_plain(v) for k, v in value.items()} + if isinstance(value, (list, tuple, set)): + return [_to_plain(v) for v in value] + if hasattr(value, "value"): + return value.value + return value + + +def _strip_noise(value: Any) -> Any: + """Remove volatile fields that should not affect semantic hashes.""" + if isinstance(value, dict): + return { + k: _strip_noise(v) + for k, v in sorted(value.items()) + if k + not in { + "timestamp", + "started_at", + "completed_at", + "retrieved_at", + "screened_at", + "criteria_mapping", + } + } + if isinstance(value, list): + return [_strip_noise(v) for v in value] + return value + + +def stable_hash(value: Any) -> str: + """Hash canonical JSON for deterministic config/input signatures.""" + plain = _strip_noise(_to_plain(value)) + encoded = json.dumps( + plain, + sort_keys=True, + separators=(",", ":"), + ensure_ascii=True, + ).encode("utf-8") + return hashlib.sha256(encoded).hexdigest() + + +def pipeline_config_to_dict(config: Any) -> Dict[str, Any]: + """Serialize a PipelineConfig-like object without requiring a hard type.""" + if hasattr(config, "to_dict"): + return config.to_dict() + return _to_plain(config) or {} + + +def _pick(mapping: Dict[str, Any], keys: Iterable[str]) -> Dict[str, Any]: + return {key: mapping.get(key) for key in keys if key in mapping} + + +def stage_signature_payloads(config_or_dict: Any) -> Dict[str, Any]: + """Build per-stage semantic payloads for cache validation.""" + config = pipeline_config_to_dict(config_or_dict) + search = config.get("search") or {} + screening = config.get("screening") or {} + retrieval = config.get("retrieval") or {} + parsing = config.get("parsing") or {} + annotation = config.get("annotation") or {} + output = config.get("output") or {} + + return { + "search": _pick( + search, + [ + "database", + "query", + "max_results", + "date_from", + "date_to", + "pmids_file", + "pmids_list", + ], + ), + "abstract": screening.get("abstract") or {}, + "retrieval": _pick( + retrieval, + [ + "sources", + "timeout", + "max_retries", + "download_directory", + "n_jobs", + "load_excluded", + "full_text_sources", + ], + ), + "fulltext": screening.get("fulltext") or {}, + "parsing": _pick( + parsing or retrieval, + ["parse_coordinates", "coordinate_model"], + ), + "annotation": _pick( + annotation, + [ + "model", + "create_all_included_annotations", + "metadata_fields", + "annotations", + "enabled", + "prompt_type", + "inclusion_criteria", + "exclusion_criteria", + ], + ), + "output": _pick( + output, + ["prisma_diagram", "formats", "nimads", "export_excluded_studies"], + ), + } + + +def stage_hashes(config_or_dict: Any) -> Dict[str, str]: + """Return semantic hashes for all pipeline stages.""" + return { + stage: stable_hash(payload) + for stage, payload in stage_signature_payloads(config_or_dict).items() + } + + +def manifest_path(output_dir: Path) -> Path: + return output_dir / "outputs" / "execution_manifest.json" + + +def execution_progress_path(output_dir: Path) -> Path: + return output_dir / "outputs" / "execution_progress.json" + + +def load_execution_progress(output_dir: Path) -> Optional[Dict[str, Any]]: + """Load the execution progress file if present.""" + path = execution_progress_path(output_dir) + if not path.exists(): + return None + try: + return json.loads(path.read_text(encoding="utf-8")) + except Exception as exc: + logger.warning("Failed to load execution progress %s: %s", path, exc) + return None + + +def load_execution_manifest(output_dir: Path) -> Optional[Dict[str, Any]]: + """Load the execution manifest if present.""" + path = manifest_path(output_dir) + if not path.exists(): + return None + try: + return json.loads(path.read_text(encoding="utf-8")) + except Exception as exc: + logger.warning("Failed to load execution manifest %s: %s", path, exc) + return None + + +def _load_legacy_config_hashes(output_dir: Path) -> Optional[Dict[str, str]]: + final_results = output_dir / "outputs" / "final_results.json" + if not final_results.exists(): + return None + try: + data = json.loads(final_results.read_text(encoding="utf-8")) + config = data.get("config") + if isinstance(config, dict): + return stage_hashes(config) + except Exception as exc: + logger.warning("Failed to inspect legacy final_results config: %s", exc) + return None + + +def _safe_read_json(path: Path) -> Any: + if not path.exists(): + return None + try: + return json.loads(path.read_text(encoding="utf-8")) + except Exception: + return None + + +def _count_screening_decisions(results: Any) -> Dict[str, int]: + screened = len(results) if isinstance(results, list) else 0 + included = 0 + excluded = 0 + incomplete = 0 + for item in results if isinstance(results, list) else []: + decision = str(item.get("decision", "")).lower() if isinstance(item, dict) else "" + if "included" in decision: + included += 1 + elif "incomplete" in decision: + incomplete += 1 + else: + excluded += 1 + payload = { + "screened": screened, + "included": included, + "excluded": excluded, + } + if incomplete: + payload["incomplete"] = incomplete + return payload + + +def _read_stage_counters(output_dir: Path, stage: str) -> Dict[str, Any]: + """Read lightweight summary counters from existing stage artifacts.""" + outputs_dir = output_dir / "outputs" + if stage == "search": + data = _safe_read_json(outputs_dir / "search_results.json") + if isinstance(data, dict): + studies = data.get("studies", []) + return {"studies_found": len(studies) if isinstance(studies, list) else 0} + if stage == "abstract": + data = _safe_read_json(outputs_dir / "abstract_screening_results.json") + if isinstance(data, dict): + return _count_screening_decisions(data.get("screening_results", [])) + if stage == "retrieval": + data = _safe_read_json(outputs_dir / "fulltext_retrieval_results.json") + if isinstance(data, dict): + rows = data.get("studies_with_fulltext", []) + available = [ + row for row in rows + if isinstance(row, dict) and row.get("fulltext_available") + ] if isinstance(rows, list) else [] + return { + "fulltext_candidates": len(rows) if isinstance(rows, list) else 0, + "available": len(available), + } + if stage == "fulltext": + data = _safe_read_json(outputs_dir / "fulltext_screening_results.json") + if isinstance(data, dict): + counters = _count_screening_decisions(data.get("screening_results", [])) + counters.setdefault("incomplete", 0) + return counters + if stage == "parsing": + data = _safe_read_json(outputs_dir / "coordinate_parsing_results.json") + if isinstance(data, dict): + studies = data.get("studies", []) + analyses_count = 0 + coordinates_count = 0 + for study in studies if isinstance(studies, list) else []: + analyses = study.get("analyses", []) if isinstance(study, dict) else [] + if not isinstance(analyses, list): + continue + analyses_count += len(analyses) + for analysis in analyses: + points = analysis.get("points", []) if isinstance(analysis, dict) else [] + if isinstance(points, list): + coordinates_count += len(points) + return { + "studies": len(studies) if isinstance(studies, list) else 0, + "analyses": analyses_count, + "coordinates": coordinates_count, + } + if stage == "annotation": + data = _safe_read_json(outputs_dir / "annotation_results.json") + if isinstance(data, list): + annotation_names = { + str(item.get("annotation_name", "")).strip() + for item in data + if isinstance(item, dict) and str(item.get("annotation_name", "")).strip() + } + return {"decisions": len(data), "annotations": len(annotation_names)} + if stage == "output": + data = _safe_read_json(outputs_dir / "final_results.json") + if isinstance(data, dict): + stats = data.get("execution_stats", {}) + counters = stats.get("prisma_stats", {}) + if isinstance(counters, dict): + payload = dict(counters) + payload["nimads_available"] = (outputs_dir / "nimads_studyset.json").exists() + return payload + return {} + + +def _expand_clear_stages(stages: Iterable[str]) -> List[str]: + normalized = [str(stage).strip().lower() for stage in stages if str(stage).strip()] + invalid = [stage for stage in normalized if stage not in CLEAR_CACHE_STAGES] + if invalid: + raise ValueError(f"Invalid clear-cache stage(s): {', '.join(invalid)}") + if "all" in normalized: + return [stage for stage in STAGE_ARTIFACTS] + return sorted(set(normalized), key=list(STAGE_ARTIFACTS).index) + + +def _delete_stage_artifacts(output_dir: Path, stages: Iterable[str]) -> List[Dict[str, Any]]: + outputs_dir = output_dir / "outputs" + removed: List[Dict[str, Any]] = [] + for stage in _expand_clear_stages(stages): + for filename in STAGE_ARTIFACTS.get(stage, []): + path = outputs_dir / filename + if path.exists(): + path.unlink() + removed.append({"stage": stage, "path": str(path)}) + return removed + + +def _stages_to_invalidate(changed: Iterable[str]) -> List[str]: + """Map changed semantic signatures to conservative cache invalidation.""" + stages = set() + changed_set = set(changed) + if "search" in changed_set: + stages.add("search") + stages.add("output") + if "abstract" in changed_set: + stages.add("abstract") + stages.add("output") + if "retrieval" in changed_set: + stages.update({"retrieval", "fulltext", "parsing", "annotation", "output"}) + if "fulltext" in changed_set: + stages.add("fulltext") + stages.add("output") + if "parsing" in changed_set: + stages.update({"parsing", "annotation", "output"}) + if "annotation" in changed_set: + stages.update({"annotation", "output"}) + if "output" in changed_set: + stages.add("output") + return sorted(stages, key=list(STAGE_ARTIFACTS).index) + + +def _copy_stage_artifacts( + source_output_dir: Path, + target_output_dir: Path, + stages: Iterable[str], +) -> List[Dict[str, Any]]: + copied: List[Dict[str, Any]] = [] + source_outputs = source_output_dir / "outputs" + target_outputs = target_output_dir / "outputs" + target_outputs.mkdir(parents=True, exist_ok=True) + for stage in _expand_clear_stages(stages): + for filename in STAGE_ARTIFACTS.get(stage, []): + src = source_outputs / filename + dst = target_outputs / filename + if src.exists(): + shutil.copy2(src, dst) + copied.append({"stage": stage, "from": str(src), "to": str(dst)}) + retrieval_src = source_output_dir / "retrieval" + retrieval_dst = target_output_dir / "retrieval" + if retrieval_src.exists() and "retrieval" in set(stages): + if retrieval_dst.exists(): + shutil.rmtree(retrieval_dst) + shutil.copytree(retrieval_src, retrieval_dst) + copied.append({"stage": "retrieval", "from": str(retrieval_src), "to": str(retrieval_dst)}) + return copied + + +def write_executed_config(config: Any, output_dir: Path) -> Path: + """Write the exact runtime config snapshot used by this execution.""" + outputs_dir = output_dir / "outputs" + outputs_dir.mkdir(parents=True, exist_ok=True) + path = outputs_dir / "config.executed.yaml" + path.write_text( + yaml.safe_dump(pipeline_config_to_dict(config), sort_keys=False), + encoding="utf-8", + ) + return path + + +def prepare_execution( + config: Any, + output_dir: Path, + *, + cache_policy: str = "auto", + clear_cache: Optional[Iterable[str]] = None, + copy_valid_cache_from: Optional[str] = None, +) -> Dict[str, Any]: + """Prepare output provenance and invalidate stale stage caches.""" + cache_policy = (cache_policy or "auto").strip().lower() + if cache_policy not in CACHE_POLICIES: + raise ValueError( + f"Invalid cache policy '{cache_policy}'. Expected one of: " + f"{', '.join(sorted(CACHE_POLICIES))}" + ) + + output_dir = output_dir.expanduser().resolve() + outputs_dir = output_dir / "outputs" + outputs_dir.mkdir(parents=True, exist_ok=True) + + current_hashes = stage_hashes(config) + current_config_hash = stable_hash( + { + key: value + for key, value in pipeline_config_to_dict(config).items() + if key != "output" + } + ) + + previous_manifest = load_execution_manifest(output_dir) + previous_hashes = None + legacy_unverified = False + parent_execution_id = None + if previous_manifest: + previous_hashes = previous_manifest.get("stage_hashes") or {} + parent_execution_id = previous_manifest.get("execution_id") + else: + previous_hashes = _load_legacy_config_hashes(output_dir) + legacy_unverified = previous_hashes is None and any(outputs_dir.glob("*.json")) + + changed_stages = [] + if previous_hashes: + changed_stages = [ + stage + for stage, value in current_hashes.items() + if previous_hashes.get(stage) != value + ] + + copied = [] + if copy_valid_cache_from: + source_output_dir = Path(copy_valid_cache_from).expanduser().resolve() + source_hashes = {} + source_manifest = load_execution_manifest(source_output_dir) + if source_manifest: + source_hashes = source_manifest.get("stage_hashes") or {} + parent_execution_id = source_manifest.get("execution_id") + else: + source_hashes = _load_legacy_config_hashes(source_output_dir) or {} + copyable = [ + stage + for stage, value in current_hashes.items() + if source_hashes.get(stage) == value + ] + copied = _copy_stage_artifacts(source_output_dir, output_dir, copyable) + + invalidated = [] + if cache_policy == "ignore": + invalidated.extend(_delete_stage_artifacts(output_dir, ["all"])) + elif cache_policy == "auto" and previous_hashes: + invalidated.extend(_delete_stage_artifacts(output_dir, _stages_to_invalidate(changed_stages))) + + explicit_clear = _expand_clear_stages(clear_cache or []) + if explicit_clear: + invalidated.extend(_delete_stage_artifacts(output_dir, explicit_clear)) + + executed_config_path = write_executed_config(config, output_dir) + manifest = { + "schema_version": 1, + "execution_id": str(uuid4()), + "parent_execution_id": parent_execution_id, + "status": "running", + "cache_policy": cache_policy, + "created_at": utc_now_iso(), + "started_at": utc_now_iso(), + "completed_at": None, + "config_hash": current_config_hash, + "stage_hashes": current_hashes, + "changed_stages": changed_stages, + "invalidated": invalidated, + "copied_cache": copied, + "legacy_unverified": legacy_unverified, + "executed_config_path": str(executed_config_path), + } + write_execution_manifest(output_dir, manifest) + + if changed_stages: + logger.info( + "Execution signatures changed for stages: %s", + ", ".join(changed_stages), + ) + if invalidated: + logger.info("Invalidated %s stale cache artifact(s)", len(invalidated)) + if copied: + logger.info("Copied %s valid cache artifact(s)", len(copied)) + + return manifest + + +def preview_execution_changes(config: Any, output_dir: Path) -> Dict[str, Any]: + """Preview signature changes without modifying cache artifacts.""" + output_dir = output_dir.expanduser().resolve() + current_hashes = stage_hashes(config) + previous_manifest = load_execution_manifest(output_dir) + previous_hashes = None + legacy_unverified = False + if previous_manifest: + previous_hashes = previous_manifest.get("stage_hashes") or {} + else: + previous_hashes = _load_legacy_config_hashes(output_dir) + legacy_unverified = ( + previous_hashes is None + and (output_dir / "outputs").exists() + and any((output_dir / "outputs").glob("*.json")) + ) + + changed_stages = [] + if previous_hashes: + changed_stages = [ + stage + for stage, value in current_hashes.items() + if previous_hashes.get(stage) != value + ] + + return { + "has_previous_execution": bool(previous_manifest or previous_hashes), + "legacy_unverified": legacy_unverified, + "changed_stages": changed_stages, + "invalidates": _stages_to_invalidate(changed_stages), + "stage_hashes": current_hashes, + } + + +def write_execution_manifest(output_dir: Path, manifest: Dict[str, Any]) -> Path: + """Persist manifest atomically.""" + path = manifest_path(output_dir) + path.parent.mkdir(parents=True, exist_ok=True) + tmp = path.with_suffix(path.suffix + ".tmp") + tmp.write_text(json.dumps(manifest, indent=2, sort_keys=True), encoding="utf-8") + tmp.replace(path) + return path + + +def write_execution_progress(output_dir: Path, progress: Dict[str, Any]) -> Path: + """Persist execution progress atomically.""" + path = execution_progress_path(output_dir) + path.parent.mkdir(parents=True, exist_ok=True) + tmp = path.with_suffix(path.suffix + ".tmp") + tmp.write_text(json.dumps(progress, indent=2, sort_keys=True), encoding="utf-8") + tmp.replace(path) + return path + + +def _stage_status_template(stage: str) -> Dict[str, Any]: + return { + "stage": stage, + "status": "pending", + "source": "unknown", + "started_at": None, + "completed_at": None, + "error": None, + "counters": {}, + } + + +def _invalidated_stage_names(manifest: Dict[str, Any]) -> List[str]: + invalidated = { + str(item.get("stage", "")).strip() + for item in manifest.get("invalidated", []) + if isinstance(item, dict) and item.get("stage") + } + invalidated.update(_stages_to_invalidate(manifest.get("changed_stages", []))) + return sorted(invalidated, key=list(STAGE_ARTIFACTS).index) + + +def initialize_execution_progress(output_dir: Path, manifest: Dict[str, Any]) -> Dict[str, Any]: + """Create the authoritative progress file for a new execution.""" + output_dir = output_dir.expanduser().resolve() + outputs_dir = output_dir / "outputs" + outputs_dir.mkdir(parents=True, exist_ok=True) + + invalidated = set(_invalidated_stage_names(manifest)) + copied = { + str(item.get("stage", "")).strip() + for item in manifest.get("copied_cache", []) + if isinstance(item, dict) and item.get("stage") + } + + stages: List[Dict[str, Any]] = [] + for stage in STAGE_ARTIFACTS: + item = _stage_status_template(stage) + has_artifact = any((outputs_dir / filename).exists() for filename in STAGE_ARTIFACTS[stage]) + if stage not in invalidated and (stage in copied or has_artifact): + item.update( + { + "status": "completed", + "source": "cache", + "completed_at": utc_now_iso(), + "counters": _read_stage_counters(output_dir, stage), + } + ) + stages.append(item) + + progress = { + "schema_version": 1, + "execution_id": manifest.get("execution_id"), + "status": "running", + "current_stage": None, + "started_at": utc_now_iso(), + "updated_at": utc_now_iso(), + "completed_at": None, + "cache": { + "changed_stages": manifest.get("changed_stages", []), + "invalidated_stages": sorted(invalidated, key=list(STAGE_ARTIFACTS).index), + }, + "stages": stages, + } + write_execution_progress(output_dir, progress) + return progress + + +def update_execution_progress_stage( + output_dir: Path, + stage: str, + *, + status: str, + source: str = "fresh", + counters: Optional[Dict[str, Any]] = None, + error: Optional[str] = None, +) -> Dict[str, Any]: + """Update one stage in the authoritative execution progress file.""" + stage = str(stage).strip().lower() + if stage not in STAGE_ARTIFACTS: + raise ValueError(f"Unknown execution stage: {stage}") + if status not in EXECUTION_PROGRESS_STATUSES: + raise ValueError(f"Unknown execution progress status: {status}") + if source not in EXECUTION_PROGRESS_SOURCES: + raise ValueError(f"Unknown execution progress source: {source}") + + output_dir = output_dir.expanduser().resolve() + progress = load_execution_progress(output_dir) + if not progress: + manifest = load_execution_manifest(output_dir) or {} + progress = initialize_execution_progress(output_dir, manifest) + + now = utc_now_iso() + stage_items = progress.setdefault( + "stages", + [_stage_status_template(item) for item in STAGE_ARTIFACTS], + ) + by_stage = { + str(item.get("stage")): item + for item in stage_items + if isinstance(item, dict) and item.get("stage") + } + item = by_stage.get(stage) + if item is None: + item = _stage_status_template(stage) + stage_items.append(item) + + item["status"] = status + item["source"] = source + item["error"] = error + if status == "running" and not item.get("started_at"): + item["started_at"] = now + if status in {"completed", "skipped", "failed"}: + item["completed_at"] = now + if counters is not None: + item["counters"] = counters + elif status in {"completed", "skipped"} and not item.get("counters"): + item["counters"] = _read_stage_counters(output_dir, stage) + + progress["status"] = "failed" if status == "failed" else "running" + progress["current_stage"] = stage if status == "running" else None + progress["updated_at"] = now + write_execution_progress(output_dir, progress) + return progress + + +def complete_execution_progress( + output_dir: Path, + *, + status: str, + error: Optional[str] = None, +) -> None: + """Mark execution progress globally complete/failed/canceled.""" + output_dir = output_dir.expanduser().resolve() + progress = load_execution_progress(output_dir) + if not progress: + return + progress["status"] = status + progress["current_stage"] = None + progress["updated_at"] = utc_now_iso() + progress["completed_at"] = utc_now_iso() + if error: + progress["error"] = error + write_execution_progress(output_dir, progress) + + +def complete_execution_manifest( + output_dir: Path, + *, + status: str, + completed_stage: Optional[str] = None, + errors: Optional[List[str]] = None, +) -> None: + """Update manifest status at run completion/failure.""" + manifest = load_execution_manifest(output_dir) or {} + manifest["status"] = status + manifest["completed_at"] = utc_now_iso() + if completed_stage: + manifest["completed_stage"] = completed_stage + if errors: + manifest["errors"] = errors + write_execution_manifest(output_dir, manifest) diff --git a/autonima/models/types.py b/autonima/models/types.py index 11e76da..f22d9f3 100644 --- a/autonima/models/types.py +++ b/autonima/models/types.py @@ -387,6 +387,9 @@ def serialize_screening_dict( self.annotation.create_all_included_annotations ), "metadata_fields": self.annotation.metadata_fields, + "prompt_type": self.annotation.prompt_type, + "inclusion_criteria": self.annotation.inclusion_criteria, + "exclusion_criteria": self.annotation.exclusion_criteria, "annotations": [ { "name": criteria.name, @@ -420,6 +423,7 @@ class ScreeningResult: timestamp: datetime = field(default_factory=datetime.now) inclusion_criteria_applied: List[str] = field(default_factory=list) exclusion_criteria_applied: List[str] = field(default_factory=list) + cache_signature: Optional[Dict[str, Any]] = None def to_dict(self) -> Dict[str, Any]: """Convert screening result to dictionary.""" @@ -433,6 +437,7 @@ def to_dict(self) -> Dict[str, Any]: "timestamp": self.timestamp.isoformat(), "inclusion_criteria_applied": self.inclusion_criteria_applied, "exclusion_criteria_applied": self.exclusion_criteria_applied, + "cache_signature": self.cache_signature, } diff --git a/autonima/pipeline.py b/autonima/pipeline.py index bb63806..14cd222 100644 --- a/autonima/pipeline.py +++ b/autonima/pipeline.py @@ -26,6 +26,13 @@ ) from .utils import log_error_with_debug from .annotation.processor import AnnotationProcessor +from .execution import ( + complete_execution_manifest, + complete_execution_progress, + initialize_execution_progress, + prepare_execution, + update_execution_progress_stage, +) logger = logging.getLogger(__name__) @@ -58,7 +65,10 @@ def __init__( self, config: PipelineConfig, num_workers: int = 1, - force_reextract_incomplete_fulltext: bool = False + force_reextract_incomplete_fulltext: bool = False, + cache_policy: str = "auto", + clear_cache: List[str] | None = None, + copy_valid_cache_from: str | None = None, ): """ Initialize the pipeline with configuration. @@ -71,6 +81,9 @@ def __init__( self.force_reextract_incomplete_fulltext = ( force_reextract_incomplete_fulltext ) + self.cache_policy = cache_policy + self.clear_cache = clear_cache or [] + self.copy_valid_cache_from = copy_valid_cache_from self.results = PipelineResult( config=config, started_at=datetime.now() @@ -84,9 +97,168 @@ def __init__( output_dir = Path(self.config.output.directory) output_dir.mkdir(parents=True, exist_ok=True) + self.execution_manifest = prepare_execution( + self.config, + output_dir, + cache_policy=self.cache_policy, + clear_cache=self.clear_cache, + copy_valid_cache_from=self.copy_valid_cache_from, + ) + self.stage_hashes = self.execution_manifest.get("stage_hashes", {}) + initialize_execution_progress(output_dir, self.execution_manifest) + # Initialize components self._setup_components() + @property + def _output_dir(self) -> Path: + return Path(self.config.output.directory) + + def _update_progress_stage( + self, + stage: str, + *, + status: str, + source: str = "fresh", + counters: Dict[str, Any] | None = None, + error: str | None = None, + ) -> None: + update_execution_progress_stage( + self._output_dir, + stage, + status=status, + source=source, + counters=counters, + error=error, + ) + + async def _run_progress_stage(self, stage: str, func) -> None: + self._update_progress_stage(stage, status="running", source="fresh") + try: + await func() + except Exception as exc: + self._update_progress_stage( + stage, + status="failed", + source="fresh", + counters=self._stage_counters(stage), + error=str(exc), + ) + raise + + counters = self._stage_counters(stage) + source = "not_applicable" if counters.get("status") in {"Off", "Skipped"} else "fresh" + status = "skipped" if counters.get("status") in {"Off", "Skipped"} else "completed" + self._update_progress_stage( + stage, + status=status, + source=source, + counters=counters, + ) + + def _stage_counters(self, stage: str) -> Dict[str, Any]: + if stage == "search": + return { + "studies_found": len(self.results.studies), + } + if stage == "abstract": + abstract_results = self.results.abstract_screening_results + if abstract_results: + included = sum( + 1 for result in abstract_results + if "included" in str(result.decision).lower() + ) + excluded = sum( + 1 for result in abstract_results + if "excluded" in str(result.decision).lower() + ) + return { + "screened": len(abstract_results), + "included": included, + "excluded": excluded, + } + return { + "screened": len([s for s in self.results.studies if s.status != StudyStatus.PENDING]), + "included": len([s for s in self.results.studies if s.status == StudyStatus.INCLUDED_ABSTRACT]), + "excluded": len([s for s in self.results.studies if s.status == StudyStatus.EXCLUDED_ABSTRACT]), + } + if stage == "retrieval": + stats = self.results.execution_stats.get("retrieval", {}) + if isinstance(stats, dict) and stats: + return { + "fulltext_candidates": stats.get("total_considered", 0), + "available": stats.get("retrieved_or_cached", 0), + "missing": stats.get("missing_full_text", 0), + } + return {} + if stage == "fulltext": + fulltext_results = self.results.fulltext_screening_results + included = sum( + 1 for result in fulltext_results + if "included" in str(result.decision).lower() + ) + excluded = sum( + 1 for result in fulltext_results + if "excluded" in str(result.decision).lower() + ) + incomplete = sum( + 1 for result in fulltext_results + if "incomplete" in str(result.decision).lower() + ) + return { + "screened": len(fulltext_results), + "included": included, + "excluded": excluded, + "incomplete": incomplete, + } + if stage == "parsing": + stats = self.results.execution_stats.get("coordinate_parsing", {}) + if isinstance(stats, dict) and stats.get("enabled") is False: + return {"status": "Off"} + studies_with_analyses = [ + study for study in self.results.studies + if study.status == StudyStatus.INCLUDED_FULLTEXT and study.analyses + ] + analyses = sum(len(study.analyses or []) for study in studies_with_analyses) + coordinates = sum( + len(getattr(analysis, "points", []) or []) + for study in studies_with_analyses + for analysis in study.analyses or [] + ) + return { + "studies": len(studies_with_analyses), + "analyses": analyses, + "coordinates": coordinates, + } + if stage == "annotation": + stats = self.results.execution_stats.get("annotation", {}) + if isinstance(stats, dict) and stats.get("enabled") is False: + return {"status": "Off"} + output_file = self._output_dir / "outputs" / "annotation_results.json" + annotation_count = 0 + if output_file.exists(): + try: + rows = json.loads(output_file.read_text(encoding="utf-8")) + annotation_count = len({ + str(row.get("annotation_name", "")).strip() + for row in rows + if isinstance(row, dict) and str(row.get("annotation_name", "")).strip() + }) if isinstance(rows, list) else 0 + except Exception: + annotation_count = 0 + return { + "decisions": stats.get("decisions", 0) if isinstance(stats, dict) else 0, + "annotations": annotation_count, + } + if stage == "output": + stats = self.results.execution_stats.get("prisma_stats", {}) + counters = dict(stats) if isinstance(stats, dict) else {} + counters["nimads_available"] = ( + self._output_dir / "outputs" / "nimads_studyset.json" + ).exists() + return counters + return {} + def _setup_components(self): """Initialize pipeline components based on configuration.""" # Initialize search engine @@ -130,6 +302,16 @@ def _complete_run(self, completed_stage: RunStopStage) -> PipelineResult: completed_stage, duration, ) + complete_execution_manifest( + Path(self.config.output.directory), + status="completed", + completed_stage=completed_stage, + errors=self.results.errors, + ) + complete_execution_progress( + Path(self.config.output.directory), + status="completed", + ) return self.results async def run(self, stop_after_stage: RunStopStage = "full") -> PipelineResult: @@ -160,29 +342,29 @@ async def run(self, stop_after_stage: RunStopStage = "full") -> PipelineResult: try: # Phase 1: Literature Search - await self._execute_search_phase() + await self._run_progress_stage("search", self._execute_search_phase) if stop_stage == "search": return self._complete_run("search") # Phase 2: Abstract Screening - await self._execute_abstract_screening() + await self._run_progress_stage("abstract", self._execute_abstract_screening) if stop_stage == "abstract": return self._complete_run("abstract") # Phase 3: Full-text Retrieval - await self._execute_retrieval_phase() + await self._run_progress_stage("retrieval", self._execute_retrieval_phase) # Phase 4: Full-text Screening - await self._execute_fulltext_screening() + await self._run_progress_stage("fulltext", self._execute_fulltext_screening) # Phase 5: Coordinate Parsing - await self._execute_coordinate_parsing() + await self._run_progress_stage("parsing", self._execute_coordinate_parsing) # Phase 6: Analysis Annotation - await self._execute_annotation_phase() + await self._run_progress_stage("annotation", self._execute_annotation_phase) # Phase 7: Generate Outputs - await self._execute_output_phase() + await self._run_progress_stage("output", self._execute_output_phase) # Complete pipeline return self._complete_run("full") @@ -190,6 +372,16 @@ async def run(self, stop_after_stage: RunStopStage = "full") -> PipelineResult: except Exception as e: log_error_with_debug(logger, f"Pipeline failed: {e}") self.results.errors.append(str(e)) + complete_execution_manifest( + Path(self.config.output.directory), + status="failed", + errors=self.results.errors, + ) + complete_execution_progress( + Path(self.config.output.directory), + status="failed", + error=str(e), + ) raise async def _execute_search_phase(self): @@ -220,7 +412,12 @@ async def _execute_search_phase(self): search_results_file = output_dir / "search_results.json" search_data = { "studies": [study.to_dict() for study in studies], - "timestamp": datetime.now().isoformat() + "timestamp": datetime.now().isoformat(), + "cache_signature": { + "schema_version": 1, + "stage": "search", + "stage_hash": self.stage_hashes.get("search"), + }, } with open(search_results_file, 'w') as f: import json @@ -277,7 +474,12 @@ async def _execute_abstract_screening(self): "screening_results": [ result.to_dict() for result in screening_results ], - "timestamp": datetime.now().isoformat() + "timestamp": datetime.now().isoformat(), + "cache_signature": { + "schema_version": 1, + "stage": "abstract", + "stage_hash": self.stage_hashes.get("abstract"), + }, } _atomic_write_json(screening_results_file, screening_data) @@ -496,7 +698,12 @@ def _study_has_coordinates(study) -> bool: for study in self.results.studies if study.fulltext_available or study.pmcid ], - "timestamp": datetime.now().isoformat() + "timestamp": datetime.now().isoformat(), + "cache_signature": { + "schema_version": 1, + "stage": "retrieval", + "stage_hash": self.stage_hashes.get("retrieval"), + }, } with open(retrieval_results_file, 'w') as f: json.dump(retrieval_data, f, indent=2) @@ -579,7 +786,12 @@ async def _execute_fulltext_screening(self): "screening_results": [ result.to_dict() for result in screening_results ], - "timestamp": datetime.now().isoformat() + "timestamp": datetime.now().isoformat(), + "cache_signature": { + "schema_version": 1, + "stage": "fulltext", + "stage_hash": self.stage_hashes.get("fulltext"), + }, } _atomic_write_json( fulltext_screening_results_file, fulltext_screening_data @@ -742,9 +954,10 @@ async def _execute_annotation_phase(self): # Get studies for system-wide annotations when enabled. all_studies = None all_abstract_studies = None + load_excluded = getattr(self.config.retrieval, 'load_excluded', False) if getattr( self.config.annotation, 'create_all_included_annotations', True - ): + ) and load_excluded: all_studies = [ s for s in self.results.studies if s.analyses @@ -754,6 +967,13 @@ async def _execute_annotation_phase(self): s for s in all_studies if s.status != StudyStatus.EXCLUDED_ABSTRACT ] + elif getattr( + self.config.annotation, 'create_all_included_annotations', True + ): + logger.info( + "Skipping 'all_studies' and 'all_abstract' annotations " + "because retrieval.load_excluded is false" + ) if ( not included_studies @@ -853,6 +1073,17 @@ async def _load_cached_coordinate_results(self): import json with open(coordinate_cache_file, 'r') as f: cached_data = json.load(f) + + cache_signature = cached_data.get("cache_signature") or {} + cached_stage_hash = cache_signature.get("stage_hash") + if ( + cached_stage_hash + and cached_stage_hash != self.stage_hashes.get("parsing") + ): + logger.info( + "Skipping stale coordinate parsing cache for current parsing signature" + ) + return # Apply cached results to studies cached_studies = {study_data['pmid']: study_data for study_data in cached_data.get('studies', [])} @@ -937,7 +1168,12 @@ async def _save_coordinate_parsing_results(self): } for study in studies_with_analyses ], - "timestamp": datetime.now().isoformat() + "timestamp": datetime.now().isoformat(), + "cache_signature": { + "schema_version": 1, + "stage": "parsing", + "stage_hash": self.stage_hashes.get("parsing"), + }, } # Save to file @@ -1271,7 +1507,10 @@ async def run_pipeline_from_config( config: PipelineConfig = None, stop_after_stage: RunStopStage = "full", num_workers: int = 1, - force_reextract_incomplete_fulltext: bool = False + force_reextract_incomplete_fulltext: bool = False, + cache_policy: str = "auto", + clear_cache: List[str] | None = None, + copy_valid_cache_from: str | None = None, ) -> PipelineResult: """ Run pipeline from configuration file or config object. @@ -1298,5 +1537,8 @@ async def run_pipeline_from_config( force_reextract_incomplete_fulltext=( force_reextract_incomplete_fulltext ), + cache_policy=cache_policy, + clear_cache=clear_cache, + copy_valid_cache_from=copy_valid_cache_from, ) return await pipeline.run(stop_after_stage=stop_after_stage) diff --git a/autonima/screening/screener.py b/autonima/screening/screener.py index 30485fc..6d05a96 100644 --- a/autonima/screening/screener.py +++ b/autonima/screening/screener.py @@ -15,6 +15,7 @@ from .openai_client import ScreeningLLMClient as GenericLLMClient from ..models.types import Study, ScreeningConfig, ScreeningResult, StudyStatus from ..utils import log_error_with_debug +from ..execution import stable_hash logger = logging.getLogger(__name__) @@ -266,6 +267,66 @@ def _get_screening_config(self, screening_type: str): else self.config.fulltext ) + def _study_input_hash(self, study: Study, screening_type: str) -> str: + """Hash the study input used for a cached screening decision.""" + payload: Dict[str, Any] = { + "pmid": study.pmid, + "title": study.title, + } + if screening_type == "abstract": + payload.update( + { + "abstract": study.abstract, + "journal": study.journal, + "publication_date": study.publication_date, + "doi": study.doi, + } + ) + else: + payload.update( + { + "pmcid": study.pmcid, + "fulltext_available": study.fulltext_available, + } + ) + try: + if not study.full_text_output_dir: + study.full_text_output_dir = str(self.result_dir) + payload["full_text_hash"] = stable_hash(study.full_text) + except Exception: + payload["full_text_hash"] = None + return stable_hash(payload) + + def _screening_cache_signature( + self, + study: Study, + screening_type: str, + config: Dict[str, Any], + ) -> Dict[str, Any]: + """Build a per-study signature for cache reuse validation.""" + model = config.get( + "model", + "gpt-4o-mini" if screening_type == "abstract" else "gpt-4", + ) + return { + "schema_version": 1, + "stage": screening_type, + "stage_hash": stable_hash(config), + "study_input_hash": self._study_input_hash(study, screening_type), + "model": model, + } + + def _cached_signature_matches( + self, + existing_result: Dict[str, Any], + expected_signature: Dict[str, Any], + ) -> bool: + """Return True for valid signed caches; allow legacy unsigned caches.""" + cached_signature = existing_result.get("cache_signature") + if not cached_signature: + return True + return cached_signature == expected_signature + def _get_status_for_decision( self, screening_type: str, @@ -343,7 +404,12 @@ def _create_screening_result( model_used=model, screening_type=screening_type, inclusion_criteria_applied=inclusion_criteria_applied or [], - exclusion_criteria_applied=exclusion_criteria_applied or [] + exclusion_criteria_applied=exclusion_criteria_applied or [], + cache_signature=self._screening_cache_signature( + study, + screening_type, + self._get_screening_config(screening_type), + ), ) def _screen_single_study( @@ -536,6 +602,17 @@ async def screen_studies( for study in screenable_studies: if study.pmid in existing_results_dict: existing_result = existing_results_dict[study.pmid] + expected_signature = self._screening_cache_signature( + study, + screening_type, + config, + ) + if not self._cached_signature_matches( + existing_result, + expected_signature, + ): + studies_to_screen.append(study) + continue # Normalize cached decisions to the current screening stage. old_decision = str(existing_result["decision"]).strip().lower() diff --git a/autonima/search/pubmed.py b/autonima/search/pubmed.py index ba729fc..453fa56 100644 --- a/autonima/search/pubmed.py +++ b/autonima/search/pubmed.py @@ -81,7 +81,8 @@ async def search(self, query: str) -> List[Study]: # Load existing search results for caching cached_studies = self._load_cached_search_results() - cached_pmids = {study.pmid for study in cached_studies} + cached_by_pmid = {study.pmid: study for study in cached_studies} + cached_pmids = set(cached_by_pmid) # Identify PMIDs that need to be fetched new_pmids = [pmid for pmid in pmids if pmid not in cached_pmids] @@ -99,8 +100,15 @@ async def search(self, query: str) -> List[Study]: f"Successfully retrieved {len(new_studies)} new studies" ) - # Combine cached and new studies - studies = cached_studies + new_studies + # Combine only records requested by the current search/list. This + # preserves metadata caching without letting stale studies from an + # older broader query flow into the current execution. + new_by_pmid = {study.pmid: study for study in new_studies} + studies = [ + cached_by_pmid.get(pmid) or new_by_pmid[pmid] + for pmid in pmids + if pmid in cached_by_pmid or pmid in new_by_pmid + ] return studies diff --git a/autonima/templates/sample_config.yml b/autonima/templates/sample_config.yml index cc26be4..be4693d 100644 --- a/autonima/templates/sample_config.yml +++ b/autonima/templates/sample_config.yml @@ -6,6 +6,10 @@ # - the repository example config # - the user documentation +# Optional global defaults applied when stage-level values are omitted. +# defaults: +# model: "gpt-5-mini-2025-08-07" + search: database: "pubmed" # Option 1: Specify a search query. diff --git a/autonima/webui/__init__.py b/autonima/webui/__init__.py new file mode 100644 index 0000000..1358d22 --- /dev/null +++ b/autonima/webui/__init__.py @@ -0,0 +1,5 @@ +"""Local web UI for Autonima.""" + +from .app import create_app, run_ui_server + +__all__ = ["create_app", "run_ui_server"] diff --git a/autonima/webui/app.py b/autonima/webui/app.py new file mode 100644 index 0000000..25f785b --- /dev/null +++ b/autonima/webui/app.py @@ -0,0 +1,613 @@ +"""FastAPI app for Autonima local web UI.""" + +import os +import webbrowser +from pathlib import Path, PurePosixPath +from typing import Any, Dict, Optional + +import yaml + +from autonima.config import ConfigManager, ConfigurationError + +from .preferences import PreferencesManager +from .runs import RunManager +from .secrets import SECRETS_KEYS, SecretsManager +from .state import WorkspaceState + + +def _ensure_fastapi_imports(): + try: + from fastapi import FastAPI, HTTPException, Query + from fastapi.middleware.cors import CORSMiddleware + from fastapi.responses import FileResponse + from fastapi.staticfiles import StaticFiles + from pydantic import BaseModel, Field + except ImportError as exc: + raise ImportError( + "FastAPI UI dependencies are missing. Install with `pip install -e .[ui]`." + ) from exc + + return { + "FastAPI": FastAPI, + "HTTPException": HTTPException, + "Query": Query, + "CORSMiddleware": CORSMiddleware, + "FileResponse": FileResponse, + "StaticFiles": StaticFiles, + "BaseModel": BaseModel, + "Field": Field, + } + + +def _config_to_form(config: Dict[str, Any]) -> Dict[str, Any]: + """Config -> form adapter for wizard UI without synthetic defaults.""" + return dict(config or {}) + + +def _form_to_yaml_text(form: Dict[str, Any]) -> str: + """Form payload -> YAML text.""" + return yaml.safe_dump(form, sort_keys=False) + + +def create_app( + workspace_root: Path, + env_path: Optional[Path] = None, + preferences_path: Optional[Path] = None, +): + """Create FastAPI app instance.""" + deps = _ensure_fastapi_imports() + FastAPI = deps["FastAPI"] + HTTPException = deps["HTTPException"] + Query = deps["Query"] + CORSMiddleware = deps["CORSMiddleware"] + FileResponse = deps["FileResponse"] + StaticFiles = deps["StaticFiles"] + BaseModel = deps["BaseModel"] + Field = deps["Field"] + + class WorkspaceUpdate(BaseModel): + workspace_root: str + + class ProjectCreate(BaseModel): + name: Optional[str] = None + config_path: Optional[str] = None + description: Optional[str] = None + + class ProjectImport(BaseModel): + config_path: str + name: Optional[str] = None + description: Optional[str] = None + + class ProjectUpdate(BaseModel): + name: Optional[str] = None + description: Optional[str] = None + + class ProjectCloneRequest(BaseModel): + mode: str = "schema_only" + name: Optional[str] = None + description: Optional[str] = None + + class SpecUpdate(BaseModel): + yaml_text: Optional[str] = None + form: Optional[Dict[str, Any]] = None + + class PipelineRunRequest(BaseModel): + mode: str = Field(default="run") + output_folder: Optional[str] = None + verbose: bool = False + dry_run: bool = False + debug: bool = False + num_workers: int = 1 + force_reextract_incomplete_fulltext: bool = False + apply_default_email: bool = True + cache_policy: str = "auto" + clear_cache: list[str] = Field(default_factory=list) + copy_valid_cache_from: Optional[str] = None + execution_mode: str = "auto_new_on_change" + + class MetaRunRequest(BaseModel): + output_folder: str + source_run_id: Optional[str] = None + estimator: str = "mkdadensity" + estimator_args: str = "{}" + corrector: str = "fdr" + corrector_args: str = "{}" + include_ids: Optional[str] = None + run_reports: bool = False + fail_fast: bool = False + debug: bool = False + + class SecretsUpdate(BaseModel): + OPENAI_API_KEY: Optional[str] = None + OPENAI_API_GATEWAY: Optional[str] = None + PUBGET_API_KEY: Optional[str] = None + NCBI_EMAIL: Optional[str] = None + + class PreferencesUpdate(BaseModel): + preferred_models: Optional[list[str]] = None + default_model: Optional[str] = None + + class ProjectDeleteRequest(BaseModel): + mode: str + + state = WorkspaceState(workspace_root) + secrets = SecretsManager(env_path=env_path) + preferences = PreferencesManager(preferences_path=preferences_path) + run_manager = RunManager(state=state, secrets_provider=secrets.load) + + app = FastAPI(title="Autonima UI", version="0.1.0") + app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + ) + + static_dir = Path(__file__).parent / "static" + app.mount("/static", StaticFiles(directory=str(static_dir)), name="static") + + def _is_supported_meta_artifact(path: Path) -> bool: + lower_name = path.name.lower() + return lower_name.endswith(".nii") or lower_name.endswith(".nii.gz") + + def _resolve_run_output_folder(run_id: str) -> tuple[Dict[str, Any], Path]: + metadata = state.load_run_metadata(run_id) + if not metadata: + raise HTTPException(status_code=404, detail=f"Run not found: {run_id}") + + output_folder_raw = str(metadata.get("output_folder") or "").strip() + if not output_folder_raw: + raise HTTPException( + status_code=400, + detail="Run does not declare an output_folder", + ) + + output_folder = Path(output_folder_raw).expanduser().resolve(strict=False) + if not output_folder.exists() or not output_folder.is_dir(): + raise HTTPException( + status_code=404, + detail=f"Output folder not found: {output_folder}", + ) + + return metadata, output_folder + + def _resolve_meta_results_root(output_folder: Path) -> Path: + candidates = [ + output_folder / "outputs" / "meta_analysis_results", + output_folder / "meta_analysis_results", + output_folder, + ] + for candidate in candidates: + if candidate.exists() and candidate.is_dir(): + return candidate + return output_folder + + def _collect_meta_artifacts(artifacts_root: Path) -> list[Dict[str, Any]]: + files: list[Dict[str, Any]] = [] + for path in artifacts_root.rglob("*"): + if not path.is_file() or not _is_supported_meta_artifact(path): + continue + relative_path = path.relative_to(artifacts_root).as_posix() + files.append( + { + "name": path.name, + "relative_path": relative_path, + "size_bytes": path.stat().st_size, + "absolute_path": path, + } + ) + files.sort(key=lambda item: item.get("relative_path", "")) + return files + + def _normalize_relative_artifact_path(raw_path: str) -> str: + text = str(raw_path or "").strip() + if not text: + raise HTTPException(status_code=400, detail="Artifact path is required") + cleaned = PurePosixPath(text).as_posix().lstrip("/") + if cleaned in {"", "."}: + raise HTTPException(status_code=400, detail="Artifact path is required") + if cleaned.startswith("../") or "/../" in cleaned or cleaned == "..": + raise HTTPException(status_code=400, detail="Invalid artifact path") + return cleaned + + def _find_meta_artifact_path(run_id: str, relative_path: str) -> tuple[Dict[str, Any], Path, Path, Dict[str, Any]]: + metadata, output_folder = _resolve_run_output_folder(run_id) + artifacts_root = _resolve_meta_results_root(output_folder) + normalized_relative_path = _normalize_relative_artifact_path(relative_path) + files = _collect_meta_artifacts(artifacts_root) + artifact = next( + (item for item in files if item.get("relative_path") == normalized_relative_path), + None, + ) + if not artifact: + raise HTTPException(status_code=404, detail="Artifact not found") + absolute_path = artifact.get("absolute_path") + if not isinstance(absolute_path, Path) or not absolute_path.exists() or not absolute_path.is_file(): + raise HTTPException(status_code=404, detail="Artifact not found") + return metadata, output_folder, artifacts_root, artifact + + def _resolve_missing_fulltexts_artifact_path(run_id: str, filename: str) -> tuple[Dict[str, Any], Path]: + if filename not in {"missing_fulltexts.txt", "missing_fulltexts.csv"}: + raise HTTPException(status_code=400, detail="Unsupported artifact") + metadata, output_folder = _resolve_run_output_folder(run_id) + artifact_path = output_folder / "outputs" / filename + if not artifact_path.exists() or not artifact_path.is_file(): + raise HTTPException(status_code=404, detail="Artifact not found") + return metadata, artifact_path + + @app.get("/") + async def root(): + return FileResponse(static_dir / "index.html") + + @app.get("/api/workspace") + async def get_workspace(): + info = state.get_workspace() + info["projects_count"] = len(state.list_projects()) + return info + + @app.post("/api/workspace") + async def set_workspace(payload: WorkspaceUpdate): + nonlocal state, run_manager + new_state = WorkspaceState(Path(payload.workspace_root).expanduser()) + state = new_state + run_manager = RunManager(state=state, secrets_provider=secrets.load) + return state.get_workspace() + + @app.get("/api/projects") + async def list_projects(): + return {"projects": state.list_projects()} + + @app.post("/api/projects") + async def create_project(payload: ProjectCreate): + try: + if payload.config_path: + project = state.import_project( + payload.config_path, + payload.name, + payload.description, + ) + else: + if not payload.name: + raise ValueError( + "Provide `name` for a new project or `config_path` for import." + ) + project = state.create_project( + name=payload.name, + description=payload.description, + ) + return state.get_project(project["id"]) + except Exception as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + + @app.post("/api/projects/import") + async def import_project(payload: ProjectImport): + try: + project = state.import_project( + payload.config_path, + payload.name, + payload.description, + ) + return project + except Exception as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + + @app.get("/api/projects/{project_id}") + async def get_project(project_id: str): + project = state.get_project(project_id) + if not project: + raise HTTPException(status_code=404, detail="Project not found") + return project + + @app.put("/api/projects/{project_id}") + async def update_project(project_id: str, payload: ProjectUpdate): + try: + updates: Dict[str, Any] = {} + if payload.name is not None: + name = payload.name.strip() + if not name: + raise ValueError("Project name cannot be empty") + updates["name"] = name + if payload.description is not None: + updates["description"] = payload.description.strip() + if not updates: + raise ValueError("No updates provided") + return state.update_project(project_id, updates) + except KeyError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + except Exception as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + + @app.post("/api/projects/{project_id}/clone") + async def clone_project(project_id: str, payload: ProjectCloneRequest): + try: + return state.clone_project( + project_id=project_id, + mode=payload.mode, + name=payload.name, + description=payload.description, + ) + except KeyError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + except Exception as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + + @app.put("/api/projects/{project_id}/clone") + async def clone_project_put(project_id: str, payload: ProjectCloneRequest): + return await clone_project(project_id, payload) + + @app.get("/api/projects/{project_id}/delete-preview") + async def get_project_delete_preview(project_id: str): + try: + return state.get_project_delete_preview(project_id) + except KeyError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + + @app.post("/api/projects/{project_id}/delete") + async def delete_project(project_id: str, payload: ProjectDeleteRequest): + try: + report = state.delete_project(project_id, payload.mode) + return report + except KeyError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + except RuntimeError as exc: + raise HTTPException(status_code=409, detail=str(exc)) from exc + except Exception as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + + @app.get("/api/projects/{project_id}/spec") + async def get_project_spec(project_id: str): + try: + spec_payload = state.get_project_spec(project_id) + yaml_text = spec_payload["yaml_text"] + config_dict = yaml.safe_load(yaml_text) or {} + return { + "project_id": project_id, + "config_path": spec_payload["config_path"], + "yaml_text": yaml_text, + "form": _config_to_form(config_dict), + } + except KeyError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + + @app.put("/api/projects/{project_id}/spec") + async def put_project_spec(project_id: str, payload: SpecUpdate): + if payload.yaml_text is None and payload.form is None: + raise HTTPException(status_code=400, detail="Provide yaml_text or form") + + yaml_text = payload.yaml_text + if yaml_text is None: + yaml_text = _form_to_yaml_text(payload.form or {}) + + try: + state.save_project_spec(project_id, yaml_text) + config_dict = yaml.safe_load(yaml_text) or {} + return { + "project_id": project_id, + "yaml_text": yaml_text, + "form": _config_to_form(config_dict), + } + except KeyError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + except Exception as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + + @app.post("/api/projects/{project_id}/validate") + async def validate_spec(project_id: str): + try: + spec_payload = state.get_project_spec(project_id) + except KeyError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + + yaml_text = spec_payload["yaml_text"] + try: + config_dict = yaml.safe_load(yaml_text) + if config_dict is None: + raise ConfigurationError("Configuration file is empty") + manager = ConfigManager() + validated = manager.load_from_dict(config_dict) + config_out = validated.to_dict() + return { + "ok": True, + "message": "Configuration is valid", + "config": config_out, + } + except ConfigurationError as exc: + return { + "ok": False, + "message": str(exc), + } + except Exception as exc: + return { + "ok": False, + "message": f"Unexpected validation error: {exc}", + } + + @app.post("/api/projects/{project_id}/runs") + async def start_pipeline_run(project_id: str, payload: PipelineRunRequest): + project = state.get_project(project_id) + if not project: + raise HTTPException(status_code=404, detail="Project not found") + + allowed_modes = {"run", "run-search", "run-abstract"} + if payload.mode not in allowed_modes: + raise HTTPException( + status_code=400, + detail=f"Invalid mode '{payload.mode}'. Expected one of {sorted(allowed_modes)}", + ) + + try: + metadata = run_manager.start_pipeline_run( + project=project, + mode=payload.mode, + output_folder=payload.output_folder, + verbose=payload.verbose, + dry_run=payload.dry_run, + debug=payload.debug, + num_workers=payload.num_workers, + force_reextract_incomplete_fulltext=payload.force_reextract_incomplete_fulltext, + apply_default_email=payload.apply_default_email, + cache_policy=payload.cache_policy, + clear_cache=payload.clear_cache, + copy_valid_cache_from=payload.copy_valid_cache_from, + execution_mode=payload.execution_mode, + ) + return metadata + except Exception as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + + @app.post("/api/projects/{project_id}/meta-runs") + async def start_meta_run(project_id: str, payload: MetaRunRequest): + project = state.get_project(project_id) + if not project: + raise HTTPException(status_code=404, detail="Project not found") + + try: + metadata = run_manager.start_meta_run( + project=project, + output_folder=payload.output_folder, + source_run_id=payload.source_run_id, + estimator=payload.estimator, + estimator_args=payload.estimator_args, + corrector=payload.corrector, + corrector_args=payload.corrector_args, + include_ids=payload.include_ids, + run_reports=payload.run_reports, + fail_fast=payload.fail_fast, + debug=payload.debug, + ) + return metadata + except Exception as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + + @app.get("/api/runs") + async def list_runs(project_id: Optional[str] = None): + runs = state.list_runs(project_id=project_id) + enriched = [] + for run in runs: + try: + enriched.append(run_manager.get_run(run["id"])) + except Exception: + enriched.append(run) + return {"runs": enriched} + + @app.get("/api/runs/{run_id}") + async def get_run(run_id: str): + try: + return run_manager.get_run(run_id) + except KeyError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + + @app.get("/api/runs/{run_id}/logs") + async def get_run_logs(run_id: str, offset: int = Query(default=0, ge=0)): + try: + return run_manager.get_logs(run_id, offset=offset) + except KeyError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + + @app.get("/api/runs/{run_id}/meta-artifacts") + async def list_meta_artifacts(run_id: str): + metadata, output_folder = _resolve_run_output_folder(run_id) + artifacts_root = _resolve_meta_results_root(output_folder) + files = _collect_meta_artifacts(artifacts_root) + return { + "run_id": run_id, + "run_kind": metadata.get("kind"), + "run_status": metadata.get("status"), + "artifacts_root": str(artifacts_root), + "files": [ + { + "name": item.get("name"), + "relative_path": item.get("relative_path"), + "size_bytes": item.get("size_bytes"), + } + for item in files + ], + } + + @app.get("/api/runs/{run_id}/meta-artifact") + async def get_meta_artifact_by_query(run_id: str, path: str = Query(default="")): + _, _, _, artifact = _find_meta_artifact_path(run_id, path) + absolute_path = artifact["absolute_path"] + return FileResponse(absolute_path, filename=str(artifact.get("name") or absolute_path.name)) + + @app.get("/api/runs/{run_id}/meta-artifacts/{artifact_path:path}") + async def get_meta_artifact(run_id: str, artifact_path: str): + _, _, _, artifact = _find_meta_artifact_path(run_id, artifact_path) + absolute_path = artifact["absolute_path"] + return FileResponse(absolute_path, filename=str(artifact.get("name") or absolute_path.name)) + + @app.get("/api/runs/{run_id}/missing-fulltexts.txt") + async def get_missing_fulltexts_txt(run_id: str): + _, artifact_path = _resolve_missing_fulltexts_artifact_path(run_id, "missing_fulltexts.txt") + return FileResponse(artifact_path, filename="missing_fulltexts.txt") + + @app.get("/api/runs/{run_id}/missing-fulltexts.csv") + async def get_missing_fulltexts_csv(run_id: str): + _, artifact_path = _resolve_missing_fulltexts_artifact_path(run_id, "missing_fulltexts.csv") + return FileResponse(artifact_path, filename="missing_fulltexts.csv") + + @app.post("/api/runs/{run_id}/cancel") + async def cancel_run(run_id: str): + try: + return run_manager.cancel_run(run_id) + except KeyError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + + @app.get("/api/settings/secrets") + async def get_secrets(): + values = secrets.load() + masked = secrets.load_masked() + return { + "masked": masked, + "values": {key: values.get(key, "") for key in SECRETS_KEYS}, + } + + @app.put("/api/settings/secrets") + async def put_secrets(payload: SecretsUpdate): + updates = payload.model_dump() + saved = secrets.save(updates) + return { + "saved": {key: saved.get(key, "") for key in SECRETS_KEYS}, + "masked": secrets.load_masked(), + } + + @app.get("/api/settings/preferences") + async def get_preferences(): + return preferences.load() + + @app.put("/api/settings/preferences") + async def put_preferences(payload: PreferencesUpdate): + updates = payload.model_dump(exclude_unset=True) + return preferences.save(updates) + + @app.get("/{full_path:path}") + async def spa_fallback(full_path: str): + if full_path.startswith("api/"): + raise HTTPException(status_code=404, detail="Not found") + return FileResponse(static_dir / "index.html") + + return app + + +def run_ui_server( + workspace: Optional[str], + host: str, + port: int, + open_browser: bool, +) -> None: + """Run the local FastAPI UI server.""" + try: + import uvicorn + except ImportError as exc: + raise ImportError( + "Uvicorn is required for `autonima ui`. Install with `pip install -e .[ui]`." + ) from exc + + workspace_path = Path(workspace).expanduser() if workspace else Path.cwd() + app = create_app(workspace_path) + + if open_browser: + webbrowser.open(f"http://{host}:{port}", new=1) + + uvicorn.run(app, host=host, port=port, log_level=os.getenv("AUTONIMA_UI_LOG_LEVEL", "info")) diff --git a/autonima/webui/preferences.py b/autonima/webui/preferences.py new file mode 100644 index 0000000..0874c4b --- /dev/null +++ b/autonima/webui/preferences.py @@ -0,0 +1,100 @@ +"""Global (non-secret) UI preferences for the Autonima web UI.""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any, Dict, List + +PREFERENCES_KEY_MODELS = "preferred_models" +PREFERENCES_KEY_DEFAULT_MODEL = "default_model" + + +class PreferencesManager: + """Manage UI preferences stored in ~/.autonima-ui.json.""" + + def __init__(self, preferences_path: Path | None = None): + self.preferences_path = preferences_path or (Path.home() / ".autonima-ui.json") + + def _normalize_models(self, values: Any) -> List[str]: + if not isinstance(values, list): + return [] + + cleaned: List[str] = [] + seen = set() + for value in values: + if not isinstance(value, str): + continue + model = value.strip() + if not model or model in seen: + continue + seen.add(model) + cleaned.append(model) + return cleaned + + def _normalize_default_model(self, value: Any, allowed_models: List[str]) -> str: + if not isinstance(value, str): + return "" + model = value.strip() + if not model: + return "" + if model not in allowed_models: + return "" + return model + + def _normalize(self, raw: Dict[str, Any]) -> Dict[str, Any]: + models = self._normalize_models(raw.get(PREFERENCES_KEY_MODELS, [])) + return { + PREFERENCES_KEY_MODELS: models, + PREFERENCES_KEY_DEFAULT_MODEL: self._normalize_default_model( + raw.get(PREFERENCES_KEY_DEFAULT_MODEL, ""), + models, + ), + } + + def load(self) -> Dict[str, Any]: + if not self.preferences_path.exists(): + return { + PREFERENCES_KEY_MODELS: [], + PREFERENCES_KEY_DEFAULT_MODEL: "", + } + + try: + payload = json.loads(self.preferences_path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return { + PREFERENCES_KEY_MODELS: [], + PREFERENCES_KEY_DEFAULT_MODEL: "", + } + + if not isinstance(payload, dict): + return { + PREFERENCES_KEY_MODELS: [], + PREFERENCES_KEY_DEFAULT_MODEL: "", + } + + return self._normalize(payload) + + def save(self, updates: Dict[str, Any]) -> Dict[str, Any]: + current = self.load() + merged = dict(current) + + if PREFERENCES_KEY_MODELS in updates: + merged[PREFERENCES_KEY_MODELS] = updates.get(PREFERENCES_KEY_MODELS) + if PREFERENCES_KEY_DEFAULT_MODEL in updates: + merged[PREFERENCES_KEY_DEFAULT_MODEL] = updates.get( + PREFERENCES_KEY_DEFAULT_MODEL + ) + + normalized = self._normalize(merged) + self.preferences_path.write_text( + json.dumps(normalized, indent=2) + "\n", + encoding="utf-8", + ) + try: + self.preferences_path.chmod(0o600) + except OSError: + # Best-effort permissions for cross-platform compatibility. + pass + + return normalized diff --git a/autonima/webui/progress.py b/autonima/webui/progress.py new file mode 100644 index 0000000..903c763 --- /dev/null +++ b/autonima/webui/progress.py @@ -0,0 +1,502 @@ +"""Run progress aggregation utilities.""" + +from __future__ import annotations + +import csv +import json +import re +from pathlib import Path +from typing import Any, Dict, List + +STAGES = [ + "search", + "abstract", + "retrieval", + "fulltext", + "parsing", + "annotation", + "output", +] + + +def _safe_read_json(path: Path) -> Dict[str, Any] | List[Any] | None: + if not path.exists(): + return None + try: + with path.open("r", encoding="utf-8") as f: + return json.load(f) + except Exception: + return None + + +def infer_stage_from_logs(log_lines: List[str]) -> str | None: + """Best-effort current stage from logs.""" + current_stage = None + for line in log_lines: + text = line.lower() + if "starting autonima pipeline" in text: + current_stage = "search" + elif "starting abstract screening" in text: + current_stage = "abstract" + elif "retrieval: starting" in text: + current_stage = "retrieval" + elif "starting full-text screening" in text: + current_stage = "fulltext" + elif "starting coordinate parsing" in text: + current_stage = "parsing" + elif "processing" in text and "annotation" in text: + current_stage = "annotation" + elif "pipeline completed" in text: + current_stage = "output" + return current_stage + + +def extract_log_stage_flow(log_lines: List[str]) -> Dict[str, Any]: + """Extract stages reached/completed during the current process logs.""" + reached = set() + completed = set() + + for raw_line in log_lines: + line = str(raw_line or "").strip() + if not line: + continue + text = line.lower() + + inferred = infer_stage_from_logs([line]) + if inferred: + reached.add(inferred) + + if "found" in text and "potential studies from search" in text: + reached.add("search") + completed.add("search") + elif "abstract screening:" in text and "eligible" in text: + reached.add("abstract") + completed.add("abstract") + elif "retrieval completed" in text: + reached.add("retrieval") + completed.add("retrieval") + elif "full-text screening:" in text and "eligible" in text: + reached.add("fulltext") + completed.add("fulltext") + elif "coordinate parsing" in text and ( + "completed" in text or "saved" in text or "skipping" in text + ): + reached.add("parsing") + completed.add("parsing") + elif "annotation phase completed" in text or "saved" in text and "annotation results" in text: + reached.add("annotation") + completed.add("annotation") + elif "nimads export completed" in text or "pipeline completed" in text: + reached.add("output") + completed.add("output") + + completed_ordered = [stage for stage in STAGES if stage in completed] + reached_ordered = [stage for stage in STAGES if stage in reached or stage in completed] + return { + "reached_stages": reached_ordered, + "completed_stages": completed_ordered, + } + + +def extract_log_issues(log_lines: List[str], max_items: int = 25) -> Dict[str, Any]: + """Extract warning/error log lines for dedicated UI display.""" + errors: List[str] = [] + warnings: List[str] = [] + seen_errors = set() + seen_warnings = set() + + for raw_line in log_lines: + line = str(raw_line or "").strip() + if not line: + continue + text = line.lower() + + is_error = " - error - " in text or text.startswith("error:") + is_warning = " - warning - " in text or text.startswith("warning:") + + if is_error and line not in seen_errors: + seen_errors.add(line) + errors.append(line) + continue + if is_warning and line not in seen_warnings: + seen_warnings.add(line) + warnings.append(line) + + if max_items > 0: + errors = errors[-max_items:] + warnings = warnings[-max_items:] + + return { + "errors": errors, + "warnings": warnings, + "error_count": len(errors), + "warning_count": len(warnings), + } + + +def extract_live_progress(log_lines: List[str], current_stage: str | None) -> Dict[str, Any] | None: + """Parse the latest tqdm-style progress sample from captured subprocess output.""" + stage = current_stage + latest: Dict[str, Any] | None = None + + for raw_line in log_lines: + line = str(raw_line or "").strip() + if not line: + continue + + inferred_stage = infer_stage_from_logs([line]) + if inferred_stage: + stage = inferred_stage + + if "|" not in line: + continue + match = re.search(r"(?P\d+)\s*/\s*(?P\d+)", line) + if not match: + continue + + total = int(match.group("total")) + if total <= 0: + continue + current = min(int(match.group("current")), total) + percent = round((current / total) * 100, 1) + label = line.split("|", 1)[0].strip() or (stage or "running") + latest = { + "stage": stage, + "label": label, + "current": current, + "total": total, + "percent": percent, + } + + return latest + + +def _is_parsing_enabled_from_config(config: Dict[str, Any]) -> bool: + """Infer whether coordinate parsing is enabled from saved config.""" + if not isinstance(config, dict): + return True + + parsing = config.get("parsing") + if isinstance(parsing, dict) and isinstance(parsing.get("parse_coordinates"), bool): + return bool(parsing.get("parse_coordinates")) + + # Backward-compat fallback for configs where this lived under retrieval. + retrieval = config.get("retrieval") + if isinstance(retrieval, dict) and isinstance(retrieval.get("parse_coordinates"), bool): + return bool(retrieval.get("parse_coordinates")) + + return True + + +def _read_missing_fulltexts(outputs_dir: Path | None) -> Dict[str, Any]: + missing_payload: Dict[str, Any] = { + "available": False, + "count": 0, + "txt_path": None, + "csv_path": None, + "preview_pmids": [], + "preview_rows": [], + } + if not outputs_dir or not outputs_dir.exists(): + return missing_payload + + txt_path = outputs_dir / "missing_fulltexts.txt" + csv_path = outputs_dir / "missing_fulltexts.csv" + + pmids: List[str] = [] + if txt_path.exists() and txt_path.is_file(): + try: + with txt_path.open("r", encoding="utf-8") as f: + pmids = [line.strip() for line in f if line.strip()] + missing_payload["txt_path"] = str(txt_path) + except Exception: + pmids = [] + + preview_rows: List[Dict[str, Any]] = [] + csv_count = 0 + if csv_path.exists() and csv_path.is_file(): + try: + with csv_path.open("r", encoding="utf-8", newline="") as f: + rows = list(csv.DictReader(f)) + csv_count = len(rows) + preview_rows = rows[:5] + missing_payload["csv_path"] = str(csv_path) + except Exception: + csv_count = 0 + preview_rows = [] + + count = len(pmids) if pmids else csv_count + if count > 0: + missing_payload["available"] = True + missing_payload["count"] = count + missing_payload["preview_pmids"] = pmids[:10] + missing_payload["preview_rows"] = preview_rows + + return missing_payload + + +def _progress_from_execution_file( + progress_data: Dict[str, Any], + *, + run_status: str, + log_lines: List[str], + nimads_available: bool, + nimads_export_logged: bool, + nimads_studyset_path: Path | None, + missing_fulltexts: Dict[str, Any], +) -> Dict[str, Any]: + """Build UI progress payload from authoritative execution_progress.json.""" + stage_items = progress_data.get("stages", []) + by_stage = { + str(item.get("stage")): item + for item in stage_items + if isinstance(item, dict) and item.get("stage") + } + timeline = [] + counters: Dict[str, Any] = {} + for stage in STAGES: + item = by_stage.get(stage) or {} + status = str(item.get("status") or "pending") + timeline.append( + { + "stage": stage, + "status": status, + "source": item.get("source") or "unknown", + "started_at": item.get("started_at"), + "completed_at": item.get("completed_at"), + "error": item.get("error"), + } + ) + stage_counters = item.get("counters") + if isinstance(stage_counters, dict) and stage_counters: + counters[stage] = stage_counters + + current_stage = progress_data.get("current_stage") + stage_flow = { + "reached_stages": [ + item["stage"] + for item in timeline + if item["status"] in {"running", "completed", "skipped", "failed"} + ], + "completed_stages": [ + item["stage"] + for item in timeline + if item["status"] in {"completed", "skipped"} + ], + } + live_progress = ( + extract_live_progress(log_lines, str(current_stage) if current_stage else None) + if run_status in {"running", "canceling"} + else None + ) + return { + "timeline": timeline, + "counters": counters, + "current_stage": current_stage, + "reached_stages": stage_flow["reached_stages"], + "completed_stages": stage_flow["completed_stages"], + "live_progress": live_progress, + "log_issues": extract_log_issues(log_lines), + "nimads_available": nimads_available, + "nimads_export_logged": nimads_export_logged, + "nimads_studyset_path": str(nimads_studyset_path) if nimads_available else None, + "missing_fulltexts": missing_fulltexts, + "execution_progress_available": True, + } + + +def build_stage_status( + run_status: str, + output_folder: str | None, + log_lines: List[str], +) -> Dict[str, Any]: + """Build stage timeline and counters from outputs and logs.""" + stages = {stage: {"status": "pending"} for stage in STAGES} + counters: Dict[str, Any] = {} + parsing_enabled = True + + output_dir = Path(output_folder).expanduser().resolve() if output_folder else None + outputs_dir = output_dir / "outputs" if output_dir else None + nimads_studyset_path = outputs_dir / "nimads_studyset.json" if outputs_dir else None + nimads_available = bool(nimads_studyset_path and nimads_studyset_path.exists()) + missing_fulltexts = _read_missing_fulltexts(outputs_dir) + nimads_export_logged = any( + "nimads export completed" in str(line or "").lower() + for line in log_lines + ) + + progress_data = ( + _safe_read_json(outputs_dir / "execution_progress.json") + if outputs_dir and outputs_dir.exists() + else None + ) + if isinstance(progress_data, dict): + return _progress_from_execution_file( + progress_data, + run_status=run_status, + log_lines=log_lines, + nimads_available=nimads_available, + nimads_export_logged=nimads_export_logged, + nimads_studyset_path=nimads_studyset_path, + missing_fulltexts=missing_fulltexts, + ) + + if outputs_dir and outputs_dir.exists(): + search_data = _safe_read_json(outputs_dir / "search_results.json") + if isinstance(search_data, dict): + stages["search"]["status"] = "completed" + studies = search_data.get("studies", []) + counters["search"] = { + "studies_found": len(studies) if isinstance(studies, list) else 0 + } + + abstract_data = _safe_read_json(outputs_dir / "abstract_screening_results.json") + if isinstance(abstract_data, dict): + stages["abstract"]["status"] = "completed" + results = abstract_data.get("screening_results", []) + included = 0 + excluded = 0 + incomplete = 0 + for item in results if isinstance(results, list) else []: + decision = str(item.get("decision", "")).lower() + if "included" in decision: + included += 1 + elif "incomplete" in decision: + incomplete += 1 + else: + excluded += 1 + counters["abstract"] = { + "screened": len(results) if isinstance(results, list) else 0, + "included": included, + "excluded": excluded, + "incomplete": incomplete, + } + + retrieval_data = _safe_read_json(outputs_dir / "fulltext_retrieval_results.json") + if isinstance(retrieval_data, dict): + stages["retrieval"]["status"] = "completed" + rows = retrieval_data.get("studies_with_fulltext", []) + counters["retrieval"] = { + "fulltext_candidates": len(rows) if isinstance(rows, list) else 0 + } + + fulltext_data = _safe_read_json(outputs_dir / "fulltext_screening_results.json") + if isinstance(fulltext_data, dict): + stages["fulltext"]["status"] = "completed" + results = fulltext_data.get("screening_results", []) + included = 0 + excluded = 0 + incomplete = 0 + for item in results if isinstance(results, list) else []: + decision = str(item.get("decision", "")).lower() + if "included" in decision: + included += 1 + elif "incomplete" in decision: + incomplete += 1 + else: + excluded += 1 + counters["fulltext"] = { + "screened": len(results) if isinstance(results, list) else 0, + "included": included, + "excluded": excluded, + "incomplete": incomplete, + } + + parsing_data = _safe_read_json(outputs_dir / "coordinate_parsing_results.json") + if parsing_data is not None: + stages["parsing"]["status"] = "completed" + if isinstance(parsing_data, dict): + studies = parsing_data.get("studies", []) + analyses_count = 0 + coordinates_count = 0 + for study in studies if isinstance(studies, list) else []: + analyses = study.get("analyses", []) if isinstance(study, dict) else [] + if not isinstance(analyses, list): + continue + analyses_count += len(analyses) + for analysis in analyses: + points = analysis.get("points", []) if isinstance(analysis, dict) else [] + if isinstance(points, list): + coordinates_count += len(points) + counters["parsing"] = { + "studies": len(studies) if isinstance(studies, list) else 0, + "analyses": analyses_count, + "coordinates": coordinates_count, + } + else: + counters["parsing"] = {"status": "Done"} + + annotation_data = _safe_read_json(outputs_dir / "annotation_results.json") + if isinstance(annotation_data, list): + stages["annotation"]["status"] = "completed" + annotation_names = { + str(item.get("annotation_name", "")).strip() + for item in annotation_data + if isinstance(item, dict) and str(item.get("annotation_name", "")).strip() + } + counters["annotation"] = { + "decisions": len(annotation_data), + "annotations": len(annotation_names), + } + + final_data = _safe_read_json(outputs_dir / "final_results.json") + if isinstance(final_data, dict): + parsing_enabled = _is_parsing_enabled_from_config(final_data.get("config", {})) + stages["output"]["status"] = "completed" + execution_stats = final_data.get("execution_stats", {}) + prisma = execution_stats.get("prisma_stats", {}) + if prisma: + counters["output"] = prisma + + if stages["parsing"]["status"] != "completed": + if not parsing_enabled: + stages["parsing"]["status"] = "completed" + counters["parsing"] = {"status": "Off"} + elif run_status == "completed": + # The run reached final outputs; parsing did not produce a results file + # (e.g., no parseable tables). Treat this stage as complete rather than + # leaving a misleading pending state. + stages["parsing"]["status"] = "completed" + + current_stage = infer_stage_from_logs(log_lines) + stage_flow = extract_log_stage_flow(log_lines) + live_progress = ( + extract_live_progress(log_lines, current_stage) + if run_status in {"running", "canceling"} + else None + ) + + if run_status in {"running", "canceling"}: + for stage in STAGES: + if stages[stage]["status"] == "completed": + continue + if current_stage == stage: + stages[stage]["status"] = "running" + break + if current_stage is None: + stages[stage]["status"] = "running" + break + + if run_status == "failed": + for stage in STAGES: + if stages[stage]["status"] == "running": + stages[stage]["status"] = "failed" + + if run_status == "canceled": + for stage in STAGES: + if stages[stage]["status"] == "running": + stages[stage]["status"] = "canceled" + + return { + "timeline": [{"stage": stage, **stages[stage]} for stage in STAGES], + "counters": counters, + "current_stage": current_stage, + "reached_stages": stage_flow["reached_stages"], + "completed_stages": stage_flow["completed_stages"], + "live_progress": live_progress, + "log_issues": extract_log_issues(log_lines), + "nimads_available": nimads_available, + "nimads_export_logged": nimads_export_logged, + "nimads_studyset_path": str(nimads_studyset_path) if nimads_available else None, + "missing_fulltexts": missing_fulltexts, + } diff --git a/autonima/webui/runs.py b/autonima/webui/runs.py new file mode 100644 index 0000000..0eed029 --- /dev/null +++ b/autonima/webui/runs.py @@ -0,0 +1,547 @@ +"""Run orchestration for Autonima web UI.""" + +from __future__ import annotations + +import os +import subprocess +import sys +import threading +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Dict, List, Optional +from uuid import uuid4 + +import yaml + +from autonima.config import ConfigManager +from autonima.execution import complete_execution_progress, preview_execution_changes + +from .progress import build_stage_status +from .state import WorkspaceState, utc_now_iso + + +@dataclass +class ManagedRun: + """In-memory runtime process state.""" + + run_id: str + metadata: Dict[str, Any] + process: subprocess.Popen[str] + log_lines: List[str] = field(default_factory=list) + lock: threading.Lock = field(default_factory=threading.Lock) + cancel_requested: bool = False + + +class RunManager: + """Manage subprocess-backed pipeline/meta runs.""" + + def __init__( + self, + state: WorkspaceState, + secrets_provider, + ): + self.state = state + self.secrets_provider = secrets_provider + self._managed: Dict[str, ManagedRun] = {} + self._lock = threading.Lock() + + def _reconcile_stale_metadata(self, metadata: Dict[str, Any]) -> Dict[str, Any]: + """Mark persisted active-looking runs as canceled when no process exists.""" + status = str(metadata.get("status") or "").lower() + if status not in {"queued", "running", "canceling"}: + return metadata + + updated = dict(metadata) + now = utc_now_iso() + updated["status"] = "canceled" + updated["completed_at"] = updated.get("completed_at") or now + updated["stale_recovered_at"] = now + updated["status_message"] = ( + "Run process is no longer active; marked canceled after app restart." + ) + self.state.save_run_metadata(str(updated["id"]), updated) + + output_folder = updated.get("output_folder") + if output_folder: + try: + complete_execution_progress( + Path(output_folder), + status="canceled", + error=updated["status_message"], + ) + except Exception: + pass + + return updated + + def _resolve_output_folder(self, config_path: Path, output_folder: Optional[str]) -> Path: + if output_folder: + return Path(output_folder).expanduser().resolve() + return config_path.with_suffix("").resolve() + + def _load_spec_for_run( + self, + project: Dict[str, Any], + run_id: str, + apply_default_email: bool, + default_email: Optional[str], + ) -> Path: + config_path = Path(project["config_path"]).expanduser().resolve() + if not apply_default_email or not default_email: + return config_path + + try: + data = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {} + except Exception: + return config_path + + search = data.get("search") + if not isinstance(search, dict): + return config_path + + existing_email = (search.get("email") or "").strip() + if existing_email: + return config_path + + search["email"] = default_email + data["search"] = search + + temp_config_dir = self.state.paths.state_dir / "temp-configs" + temp_config_dir.mkdir(parents=True, exist_ok=True) + temp_config_path = temp_config_dir / f"{run_id}.yaml" + temp_config_path.write_text( + yaml.safe_dump(data, sort_keys=False), + encoding="utf-8", + ) + return temp_config_path + + def _build_pipeline_command( + self, + mode: str, + config_path: Path, + output_folder: Optional[str], + verbose: bool, + dry_run: bool, + debug: bool, + num_workers: int, + force_reextract_incomplete_fulltext: bool, + cache_policy: str, + clear_cache: List[str], + copy_valid_cache_from: Optional[str], + ) -> List[str]: + cmd = [sys.executable, "-m", "autonima", mode, str(config_path)] + if output_folder: + cmd.append(output_folder) + if verbose: + cmd.append("--verbose") + if dry_run: + cmd.append("--dry-run") + if debug: + cmd.append("--debug") + if num_workers and num_workers > 0: + cmd.extend(["--num-workers", str(num_workers)]) + if force_reextract_incomplete_fulltext and mode == "run": + cmd.append("--force-reextract-incomplete-fulltext") + if cache_policy: + cmd.extend(["--cache-policy", cache_policy]) + for stage in clear_cache or []: + cmd.extend(["--clear-cache", stage]) + if copy_valid_cache_from: + cmd.extend(["--copy-valid-cache-from", copy_valid_cache_from]) + return cmd + + def _maybe_create_execution_output( + self, + runtime_config_path: Path, + resolved_output: Path, + execution_mode: str, + cache_source_output: Optional[Path] = None, + ) -> tuple[Path, Optional[str], Dict[str, Any]]: + """Default UI behavior: branch to a new output when signatures changed.""" + if execution_mode != "auto_new_on_change": + return resolved_output, None, {} + source_output = cache_source_output or resolved_output + try: + config = ConfigManager().load_from_file(runtime_config_path) + config.output.directory = str(source_output) + preview = preview_execution_changes(config, source_output) + except Exception: + return resolved_output, None, {} + + if not preview.get("changed_stages"): + return source_output, None, preview + + execution_name = ( + time.strftime("%Y%m%d-%H%M%S") + + "-" + + str(preview.get("stage_hashes", {}).get("output", ""))[:8] + ) + branched_output = resolved_output / "executions" / execution_name + return branched_output, str(source_output), preview + + def _build_meta_command( + self, + output_folder: str, + estimator: str, + estimator_args: str, + corrector: str, + corrector_args: str, + include_ids: Optional[str], + run_reports: bool, + fail_fast: bool, + debug: bool, + ) -> List[str]: + cmd = [ + sys.executable, + "-m", + "autonima", + "meta", + output_folder, + "--estimator", + estimator, + "--estimator-args", + estimator_args, + "--corrector", + corrector, + "--corrector-args", + corrector_args, + ] + if include_ids: + cmd.extend(["--include-ids", include_ids]) + if run_reports: + cmd.append("--run-reports") + if fail_fast: + cmd.append("--fail-fast") + if debug: + cmd.append("--debug") + return cmd + + def _append_log(self, managed: ManagedRun, line: str) -> None: + cleaned = line.rstrip("\n") + with managed.lock: + managed.log_lines.append(cleaned) + if len(managed.log_lines) > 8000: + managed.log_lines = managed.log_lines[-8000:] + + def _reader_worker(self, managed: ManagedRun) -> None: + assert managed.process.stdout is not None + buffer = "" + while True: + chunk = managed.process.stdout.read(1) + if chunk == "": + break + if chunk in {"\n", "\r"}: + if buffer: + self._append_log(managed, buffer) + buffer = "" + continue + buffer += chunk + if buffer: + self._append_log(managed, buffer) + + def _watcher_worker(self, managed: ManagedRun) -> None: + return_code = managed.process.wait() + with managed.lock: + metadata = managed.metadata + metadata["return_code"] = return_code + metadata["completed_at"] = utc_now_iso() + if managed.cancel_requested: + metadata["status"] = "canceled" + elif return_code == 0: + metadata["status"] = "completed" + else: + metadata["status"] = "failed" + self.state.save_run_metadata(managed.run_id, managed.metadata) + + def _start_process( + self, + run_id: str, + metadata: Dict[str, Any], + cmd: List[str], + env: Dict[str, str], + ) -> ManagedRun: + process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + cwd=str(self.state.paths.root), + env=env, + ) + + metadata["status"] = "running" + metadata["started_at"] = utc_now_iso() + metadata["command"] = cmd + + managed = ManagedRun(run_id=run_id, metadata=metadata, process=process) + self.state.save_run_metadata(run_id, metadata) + + reader_thread = threading.Thread( + target=self._reader_worker, + args=(managed,), + daemon=True, + ) + watcher_thread = threading.Thread( + target=self._watcher_worker, + args=(managed,), + daemon=True, + ) + reader_thread.start() + watcher_thread.start() + + with self._lock: + self._managed[run_id] = managed + + return managed + + def _base_metadata( + self, + run_id: str, + project_id: str, + run_kind: str, + mode: str, + output_folder: Optional[str], + ) -> Dict[str, Any]: + return { + "id": run_id, + "project_id": project_id, + "kind": run_kind, + "mode": mode, + "status": "queued", + "created_at": utc_now_iso(), + "started_at": None, + "completed_at": None, + "return_code": None, + "output_folder": output_folder, + "command": [], + } + + def start_pipeline_run( + self, + project: Dict[str, Any], + mode: str, + output_folder: Optional[str], + verbose: bool, + dry_run: bool, + debug: bool, + num_workers: int, + force_reextract_incomplete_fulltext: bool, + apply_default_email: bool, + cache_policy: str = "auto", + clear_cache: Optional[List[str]] = None, + copy_valid_cache_from: Optional[str] = None, + execution_mode: str = "auto_new_on_change", + ) -> Dict[str, Any]: + run_id = str(uuid4()) + config_path = Path(project["config_path"]).expanduser().resolve() + + secrets = self.secrets_provider() + runtime_config_path = self._load_spec_for_run( + project=project, + run_id=run_id, + apply_default_email=apply_default_email, + default_email=secrets.get("NCBI_EMAIL"), + ) + + resolved_output = self._resolve_output_folder(config_path, output_folder) + cache_preview: Dict[str, Any] = {} + branched_from: Optional[str] = None + if not output_folder: + previous_output_raw = str(project.get("last_output_folder") or "").strip() + previous_output = ( + Path(previous_output_raw).expanduser().resolve() + if previous_output_raw + else None + ) + if previous_output and not previous_output.exists(): + previous_output = None + resolved_output, branched_from, cache_preview = ( + self._maybe_create_execution_output( + runtime_config_path, + resolved_output, + execution_mode, + cache_source_output=previous_output, + ) + ) + if branched_from and not copy_valid_cache_from: + copy_valid_cache_from = branched_from + metadata = self._base_metadata( + run_id=run_id, + project_id=project["id"], + run_kind="pipeline", + mode=mode, + output_folder=str(resolved_output), + ) + + cmd = self._build_pipeline_command( + mode=mode, + config_path=runtime_config_path, + output_folder=str(resolved_output), + verbose=verbose, + dry_run=dry_run, + debug=debug, + num_workers=num_workers, + force_reextract_incomplete_fulltext=force_reextract_incomplete_fulltext, + cache_policy=cache_policy, + clear_cache=clear_cache or [], + copy_valid_cache_from=copy_valid_cache_from, + ) + metadata["cache_preview"] = cache_preview + metadata["branched_from_output_folder"] = branched_from + metadata["execution_mode"] = execution_mode + + env = os.environ.copy() + env.update({k: v for k, v in secrets.items() if v}) + managed = self._start_process(run_id, metadata, cmd, env) + + run_ids = list(project.get("run_ids", [])) + run_ids.append(run_id) + self.state.update_project( + project["id"], + { + "run_ids": run_ids, + "last_output_folder": str(resolved_output), + }, + ) + + return managed.metadata + + def start_meta_run( + self, + project: Dict[str, Any], + output_folder: str, + source_run_id: Optional[str], + estimator: str, + estimator_args: str, + corrector: str, + corrector_args: str, + include_ids: Optional[str], + run_reports: bool, + fail_fast: bool, + debug: bool, + ) -> Dict[str, Any]: + run_id = str(uuid4()) + output_path = str(Path(output_folder).expanduser().resolve()) + metadata = self._base_metadata( + run_id=run_id, + project_id=project["id"], + run_kind="meta", + mode="meta", + output_folder=output_path, + ) + metadata["source_run_id"] = source_run_id + metadata["source_output_folder"] = output_path + + cmd = self._build_meta_command( + output_folder=output_path, + estimator=estimator, + estimator_args=estimator_args, + corrector=corrector, + corrector_args=corrector_args, + include_ids=include_ids, + run_reports=run_reports, + fail_fast=fail_fast, + debug=debug, + ) + env = os.environ.copy() + env.update({k: v for k, v in self.secrets_provider().items() if v}) + + managed = self._start_process(run_id, metadata, cmd, env) + + run_ids = list(project.get("run_ids", [])) + run_ids.append(run_id) + self.state.update_project( + project["id"], + { + "run_ids": run_ids, + "last_meta_output_folder": output_path, + }, + ) + + return managed.metadata + + def cancel_run(self, run_id: str) -> Dict[str, Any]: + with self._lock: + managed = self._managed.get(run_id) + + if managed is None: + metadata = self.state.load_run_metadata(run_id) + if not metadata: + raise KeyError(f"Run not found: {run_id}") + return self._reconcile_stale_metadata(metadata) + + if managed.process.poll() is not None: + return managed.metadata + + managed.cancel_requested = True + with managed.lock: + managed.metadata["status"] = "canceling" + managed.metadata["cancel_requested_at"] = utc_now_iso() + self.state.save_run_metadata(run_id, managed.metadata) + + managed.process.terminate() + + def _graceful_kill() -> None: + deadline = time.time() + 5.0 + while time.time() < deadline: + if managed.process.poll() is not None: + return + time.sleep(0.1) + if managed.process.poll() is None: + managed.process.kill() + + threading.Thread(target=_graceful_kill, daemon=True).start() + return managed.metadata + + def get_run(self, run_id: str) -> Dict[str, Any]: + with self._lock: + managed = self._managed.get(run_id) + + if managed: + with managed.lock: + metadata = dict(managed.metadata) + logs = list(managed.log_lines) + self.state.save_run_metadata(run_id, metadata) + else: + metadata = self.state.load_run_metadata(run_id) + if not metadata: + raise KeyError(f"Run not found: {run_id}") + metadata = self._reconcile_stale_metadata(metadata) + logs = [] + + progress = build_stage_status( + run_status=str(metadata.get("status", "")), + output_folder=metadata.get("output_folder"), + log_lines=logs, + ) + metadata["progress"] = progress + metadata["log_line_count"] = len(logs) + return metadata + + def get_logs(self, run_id: str, offset: int = 0) -> Dict[str, Any]: + with self._lock: + managed = self._managed.get(run_id) + + if managed: + with managed.lock: + lines = list(managed.log_lines) + status = managed.metadata.get("status") + else: + metadata = self.state.load_run_metadata(run_id) + if not metadata: + raise KeyError(f"Run not found: {run_id}") + metadata = self._reconcile_stale_metadata(metadata) + lines = [] + status = metadata.get("status") + + safe_offset = max(0, min(offset, len(lines))) + new_lines = lines[safe_offset:] + return { + "run_id": run_id, + "offset": safe_offset, + "next_offset": len(lines), + "lines": new_lines, + "completed": status in {"completed", "failed", "canceled"}, + } diff --git a/autonima/webui/secrets.py b/autonima/webui/secrets.py new file mode 100644 index 0000000..f6ae60f --- /dev/null +++ b/autonima/webui/secrets.py @@ -0,0 +1,81 @@ +"""Secrets management for the Autonima web UI.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Dict + +SECRETS_KEYS = [ + "OPENAI_API_KEY", + "OPENAI_API_GATEWAY", + "PUBGET_API_KEY", + "NCBI_EMAIL", +] + + +class SecretsManager: + """Manage keys stored in ~/.autonima.env.""" + + def __init__(self, env_path: Path | None = None): + self.env_path = env_path or (Path.home() / ".autonima.env") + + def _parse(self, text: str) -> Dict[str, str]: + values: Dict[str, str] = {} + for line in text.splitlines(): + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + if "=" not in stripped: + continue + key, value = stripped.split("=", 1) + values[key.strip()] = value.strip() + return values + + def load(self) -> Dict[str, str]: + if not self.env_path.exists(): + return {} + content = self.env_path.read_text(encoding="utf-8") + return self._parse(content) + + def load_masked(self) -> Dict[str, str]: + secrets = self.load() + masked: Dict[str, str] = {} + for key in SECRETS_KEYS: + value = secrets.get(key, "") + if not value: + masked[key] = "" + elif len(value) <= 6: + masked[key] = "*" * len(value) + else: + masked[key] = f"{value[:3]}{'*' * max(3, len(value) - 6)}{value[-3:]}" + return masked + + def save(self, updates: Dict[str, str]) -> Dict[str, str]: + current = self.load() + for key in SECRETS_KEYS: + if key not in updates: + continue + value = (updates.get(key) or "").strip() + if value: + current[key] = value + elif key in current: + del current[key] + + lines = [ + "# Autonima UI secrets", + "# Generated by `autonima ui`", + "", + ] + for key in SECRETS_KEYS: + value = current.get(key) + if value: + lines.append(f"{key}={value}") + + self.env_path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8") + try: + self.env_path.chmod(0o600) + except OSError: + # Best-effort permissions for cross-platform compatibility. + pass + + return self.load() diff --git a/autonima/webui/state.py b/autonima/webui/state.py new file mode 100644 index 0000000..aef7aa7 --- /dev/null +++ b/autonima/webui/state.py @@ -0,0 +1,460 @@ +"""Workspace/project persistence for the Autonima web UI.""" + +from __future__ import annotations + +import json +import shutil +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Dict, List, Optional +from uuid import uuid4 + + +def utc_now_iso() -> str: + """Return an ISO timestamp in UTC.""" + return datetime.now(timezone.utc).isoformat() + + +@dataclass +class WorkspacePaths: + """Resolved filesystem paths for web UI state.""" + + root: Path + state_dir: Path + runs_dir: Path + projects_dir: Path + workspace_file: Path + projects_file: Path + + +class WorkspaceState: + """Filesystem-backed state for workspace, projects, and runs.""" + + def __init__(self, workspace_root: Path): + self.paths = WorkspacePaths( + root=workspace_root.resolve(), + state_dir=(workspace_root / ".autonima-ui").resolve(), + runs_dir=(workspace_root / ".autonima-ui" / "runs").resolve(), + projects_dir=(workspace_root / ".autonima-ui" / "projects").resolve(), + workspace_file=(workspace_root / ".autonima-ui" / "workspace.json").resolve(), + projects_file=(workspace_root / ".autonima-ui" / "projects.json").resolve(), + ) + self._ensure_layout() + + def _ensure_layout(self) -> None: + self.paths.root.mkdir(parents=True, exist_ok=True) + self.paths.state_dir.mkdir(parents=True, exist_ok=True) + self.paths.runs_dir.mkdir(parents=True, exist_ok=True) + self.paths.projects_dir.mkdir(parents=True, exist_ok=True) + + if not self.paths.workspace_file.exists(): + self._save_json( + self.paths.workspace_file, + { + "workspace_root": str(self.paths.root), + "state_dir": str(self.paths.state_dir), + "created_at": utc_now_iso(), + "updated_at": utc_now_iso(), + }, + ) + + if not self.paths.projects_file.exists(): + self._save_json( + self.paths.projects_file, + { + "projects": [], + "created_at": utc_now_iso(), + "updated_at": utc_now_iso(), + }, + ) + + def _load_json(self, path: Path, default: Any) -> Any: + if not path.exists(): + return default + with path.open("r", encoding="utf-8") as f: + return json.load(f) + + def _save_json(self, path: Path, data: Any) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + tmp_path = path.with_suffix(path.suffix + ".tmp") + with tmp_path.open("w", encoding="utf-8") as f: + json.dump(data, f, indent=2) + tmp_path.replace(path) + + def get_workspace(self) -> Dict[str, Any]: + info = self._load_json(self.paths.workspace_file, {}) + info["workspace_root"] = str(self.paths.root) + info["state_dir"] = str(self.paths.state_dir) + return info + + def set_workspace(self, workspace_root: Path) -> Dict[str, Any]: + self.paths = WorkspaceState(workspace_root).paths + return self.get_workspace() + + def list_projects(self) -> List[Dict[str, Any]]: + payload = self._load_json(self.paths.projects_file, {"projects": []}) + projects = payload.get("projects", []) + for project in projects: + if "description" not in project: + project["description"] = "" + return sorted(projects, key=lambda item: item.get("updated_at", ""), reverse=True) + + def _save_projects(self, projects: List[Dict[str, Any]]) -> None: + payload = self._load_json(self.paths.projects_file, {"projects": []}) + payload["projects"] = projects + payload["updated_at"] = utc_now_iso() + self._save_json(self.paths.projects_file, payload) + + def get_project(self, project_id: str) -> Optional[Dict[str, Any]]: + for project in self.list_projects(): + if project.get("id") == project_id: + return project + return None + + def create_project( + self, + name: str, + config_path: Optional[str] = None, + imported: bool = False, + description: Optional[str] = None, + ) -> Dict[str, Any]: + if not name.strip(): + raise ValueError("Project name cannot be empty") + + project_id = str(uuid4()) + project_folder = self.paths.projects_dir / project_id + project_folder.mkdir(parents=True, exist_ok=True) + + if config_path: + config_file = Path(config_path).expanduser().resolve() + else: + config_file = project_folder / "config.yaml" + if not config_file.exists(): + config_file.write_text("", encoding="utf-8") + + now = utc_now_iso() + description_text = (description or "").strip() + project = { + "id": project_id, + "name": name.strip(), + "description": description_text, + "config_path": str(config_file), + "source": "imported" if imported else "created", + "created_at": now, + "updated_at": now, + "last_output_folder": None, + "run_ids": [], + } + + projects = self.list_projects() + projects.append(project) + self._save_projects(projects) + return project + + def import_project( + self, + config_path: str, + name: Optional[str] = None, + description: Optional[str] = None, + ) -> Dict[str, Any]: + config_file = Path(config_path).expanduser().resolve() + if not config_file.exists(): + raise FileNotFoundError(f"Config file not found: {config_file}") + + project_name = name.strip() if name and name.strip() else config_file.stem + for project in self.list_projects(): + if Path(project.get("config_path", "")) == config_file: + return project + + return self.create_project( + name=project_name, + config_path=str(config_file), + imported=True, + description=description, + ) + + def update_project(self, project_id: str, updates: Dict[str, Any]) -> Dict[str, Any]: + projects = self.list_projects() + updated_project: Optional[Dict[str, Any]] = None + + for project in projects: + if project.get("id") != project_id: + continue + project.update(updates) + project["updated_at"] = utc_now_iso() + updated_project = project + break + + if updated_project is None: + raise KeyError(f"Project not found: {project_id}") + + self._save_projects(projects) + return updated_project + + def clone_project( + self, + project_id: str, + mode: str = "schema_only", + name: Optional[str] = None, + description: Optional[str] = None, + ) -> Dict[str, Any]: + allowed_modes = {"schema_only", "schema_and_cached_results"} + if mode not in allowed_modes: + raise ValueError( + f"Invalid clone mode '{mode}'. Expected one of {sorted(allowed_modes)}" + ) + + source_project = self.get_project(project_id) + if not source_project: + raise KeyError(f"Project not found: {project_id}") + + source_spec = self.get_project_spec(project_id) + source_yaml_text = source_spec.get("yaml_text", "") + + clone_name = (name or "").strip() or f'{source_project.get("name", "Project")} copy' + clone_description = ( + (description or "").strip() + if description is not None + else str(source_project.get("description") or "") + ) + + cloned_project = self.create_project( + name=clone_name, + imported=False, + description=clone_description, + ) + cloned_config_path = Path(cloned_project["config_path"]).expanduser().resolve() + cloned_config_path.write_text(source_yaml_text, encoding="utf-8") + + cloned_run_ids: List[str] = [] + skipped_active_runs = 0 + if mode == "schema_and_cached_results": + active_statuses = {"queued", "running", "canceling"} + source_runs = self.list_runs(project_id=project_id) + for source_run in source_runs: + status = str(source_run.get("status") or "").strip().lower() + if status in active_statuses: + skipped_active_runs += 1 + continue + new_run_id = str(uuid4()) + cloned_run = dict(source_run) + cloned_run["id"] = new_run_id + cloned_run["project_id"] = cloned_project["id"] + cloned_run["created_at"] = utc_now_iso() + self.save_run_metadata(new_run_id, cloned_run) + cloned_run_ids.append(new_run_id) + + self.update_project( + cloned_project["id"], + { + "run_ids": cloned_run_ids, + "last_output_folder": source_project.get("last_output_folder"), + }, + ) + cloned_project = self.get_project(cloned_project["id"]) or cloned_project + + cloned_project = dict(cloned_project) + cloned_project["clone_report"] = { + "mode": mode, + "cloned_runs_count": len(cloned_run_ids), + "skipped_active_runs_count": skipped_active_runs, + } + return cloned_project + + def get_project_spec(self, project_id: str) -> Dict[str, Any]: + project = self.get_project(project_id) + if not project: + raise KeyError(f"Project not found: {project_id}") + + config_path = Path(project["config_path"]) + if not config_path.exists(): + config_path.parent.mkdir(parents=True, exist_ok=True) + config_path.write_text("", encoding="utf-8") + + return { + "project": project, + "config_path": str(config_path), + "yaml_text": config_path.read_text(encoding="utf-8"), + } + + def save_project_spec(self, project_id: str, yaml_text: str) -> Dict[str, Any]: + project = self.get_project(project_id) + if not project: + raise KeyError(f"Project not found: {project_id}") + + config_path = Path(project["config_path"]) + config_path.parent.mkdir(parents=True, exist_ok=True) + config_path.write_text(yaml_text, encoding="utf-8") + + return self.update_project(project_id, {}) + + def run_metadata_path(self, run_id: str) -> Path: + return self.paths.runs_dir / f"{run_id}.json" + + def save_run_metadata(self, run_id: str, metadata: Dict[str, Any]) -> None: + self._save_json(self.run_metadata_path(run_id), metadata) + + def load_run_metadata(self, run_id: str) -> Optional[Dict[str, Any]]: + path = self.run_metadata_path(run_id) + if not path.exists(): + return None + return self._load_json(path, None) + + def list_runs(self, project_id: Optional[str] = None) -> List[Dict[str, Any]]: + runs: List[Dict[str, Any]] = [] + for run_file in sorted(self.paths.runs_dir.glob("*.json")): + payload = self._load_json(run_file, None) + if not payload: + continue + if project_id and payload.get("project_id") != project_id: + continue + runs.append(payload) + return sorted(runs, key=lambda item: item.get("created_at", ""), reverse=True) + + def _is_within_workspace(self, candidate: Path) -> bool: + workspace_root = self.paths.root.resolve() + resolved = candidate.expanduser().resolve(strict=False) + return workspace_root == resolved or workspace_root in resolved.parents + + def _collect_project_output_folders(self, project: Dict[str, Any]) -> List[str]: + output_folders: List[str] = [] + seen = set() + + for run in self.list_runs(project_id=project["id"]): + output_folder = (run.get("output_folder") or "").strip() + if not output_folder or output_folder in seen: + continue + seen.add(output_folder) + output_folders.append(output_folder) + + last_output_folder = (project.get("last_output_folder") or "").strip() + if last_output_folder and last_output_folder not in seen: + output_folders.append(last_output_folder) + + return output_folders + + def get_project_delete_preview(self, project_id: str) -> Dict[str, Any]: + project = self.get_project(project_id) + if not project: + raise KeyError(f"Project not found: {project_id}") + + runs = self.list_runs(project_id=project_id) + active_statuses = {"queued", "running", "canceling"} + active_run_ids = [ + run.get("id") + for run in runs + if (run.get("status") or "").strip() in active_statuses + ] + + config_path = Path(project["config_path"]).expanduser().resolve(strict=False) + output_folders_detected = self._collect_project_output_folders(project) + output_folders_deletable = [ + folder + for folder in output_folders_detected + if self._is_within_workspace(Path(folder)) + ] + + return { + "project_id": project_id, + "project_name": project.get("name"), + "project_source": project.get("source"), + "config_path": str(config_path), + "config_deletable": self._is_within_workspace(config_path), + "run_metadata_count": len(runs), + "has_active_runs": bool(active_run_ids), + "active_run_ids": active_run_ids, + "has_outputs": bool(output_folders_detected), + "output_folders_detected": output_folders_detected, + "output_folders_deletable": output_folders_deletable, + } + + def delete_project(self, project_id: str, mode: str) -> Dict[str, Any]: + allowed_modes = { + "metadata_only", + "metadata_and_config", + "metadata_config_and_outputs", + } + if mode not in allowed_modes: + raise ValueError( + f"Invalid delete mode '{mode}'. Expected one of {sorted(allowed_modes)}" + ) + + preview = self.get_project_delete_preview(project_id) + if preview["has_active_runs"]: + raise RuntimeError( + "Cannot delete project while runs are active. Cancel or wait for runs to finish." + ) + + project = self.get_project(project_id) + if not project: + raise KeyError(f"Project not found: {project_id}") + + # Remove project metadata from projects.json + projects = self.list_projects() + remaining_projects = [ + item for item in projects if item.get("id") != project_id + ] + self._save_projects(remaining_projects) + + removed_run_metadata_files: List[str] = [] + for run in self.list_runs(project_id=project_id): + run_id = run.get("id") + if not run_id: + continue + run_path = self.run_metadata_path(str(run_id)) + if run_path.exists(): + run_path.unlink() + removed_run_metadata_files.append(str(run_path)) + + removed_config_path: Optional[str] = None + skipped_config_reason: Optional[str] = None + config_path = Path(preview["config_path"]) + include_config = mode in {"metadata_and_config", "metadata_config_and_outputs"} + if include_config: + if preview["config_deletable"]: + if config_path.exists(): + config_path.unlink() + removed_config_path = str(config_path) + else: + skipped_config_reason = "Config path is outside workspace boundary" + + removed_output_folders: List[str] = [] + skipped_output_folders: List[Dict[str, str]] = [] + include_outputs = mode == "metadata_config_and_outputs" + if include_outputs: + deletable = set(preview["output_folders_deletable"]) + for output_folder in preview["output_folders_detected"]: + if output_folder not in deletable: + skipped_output_folders.append( + { + "path": output_folder, + "reason": "Outside workspace boundary", + } + ) + continue + + output_path = Path(output_folder).expanduser().resolve(strict=False) + if not output_path.exists(): + continue + if output_path.is_dir(): + shutil.rmtree(output_path) + else: + output_path.unlink() + removed_output_folders.append(str(output_path)) + + return { + "project_id": project_id, + "project_name": preview.get("project_name"), + "mode": mode, + "removed": { + "project_metadata": True, + "run_metadata_files": removed_run_metadata_files, + "config_path": removed_config_path, + "output_folders": removed_output_folders, + }, + "skipped": { + "config": skipped_config_reason, + "output_folders": skipped_output_folders, + }, + } diff --git a/autonima/webui/static/app.jsx b/autonima/webui/static/app.jsx new file mode 100644 index 0000000..ac78af1 --- /dev/null +++ b/autonima/webui/static/app.jsx @@ -0,0 +1,4142 @@ +const { useEffect, useMemo, useRef, useState } = React; +const DEFAULT_ANNOTATION_METADATA_FIELDS = [ + "analysis_name", + "analysis_description", + "table_caption", + "study_title", + "study_fulltext", +]; +const BUILD_STEPS = [ + ["search", "Find studies"], + ["screening", "Screening"], + ["parsing_annotation", "Parsing + Annotation"], + ["review", "Review"], +]; + +function formatApiError(payload) { + if (payload == null) { + return "Request failed"; + } + if (typeof payload === "string") { + return payload; + } + if (Array.isArray(payload)) { + const parts = payload + .map((item) => formatApiError(item)) + .filter(Boolean); + return parts.join(" | ") || "Request failed"; + } + if (typeof payload === "object") { + if (typeof payload.detail === "string") { + return payload.detail; + } + if (payload.detail != null) { + return formatApiError(payload.detail); + } + if (typeof payload.message === "string") { + return payload.message; + } + if (typeof payload.msg === "string") { + const loc = Array.isArray(payload.loc) ? payload.loc.join(".") : null; + return loc ? `${loc}: ${payload.msg}` : payload.msg; + } + try { + return JSON.stringify(payload); + } catch (_) { + return String(payload); + } + } + return String(payload); +} + +async function api(path, options = {}) { + const response = await fetch(path, { + headers: { "Content-Type": "application/json", ...(options.headers || {}) }, + ...options, + }); + const contentType = response.headers.get("content-type") || ""; + const payload = contentType.includes("application/json") + ? await response.json() + : await response.text(); + + if (!response.ok) { + const detail = formatApiError(payload); + throw new Error(detail); + } + return payload; +} + +function setNested(obj, path, value) { + const copy = JSON.parse(JSON.stringify(obj || {})); + let cursor = copy; + for (let i = 0; i < path.length - 1; i += 1) { + const key = path[i]; + if (typeof cursor[key] !== "object" || cursor[key] === null) { + cursor[key] = {}; + } + cursor = cursor[key]; + } + cursor[path[path.length - 1]] = value; + return copy; +} + +function getNested(obj, path, defaultValue = "") { + let cursor = obj; + for (const key of path) { + if (!cursor || typeof cursor !== "object") { + return defaultValue; + } + cursor = cursor[key]; + } + return cursor ?? defaultValue; +} + +function parseLines(value) { + return String(value || "") + .split("\n") + .map((line) => line.trim()) + .filter(Boolean); +} + +function stringifyLines(value) { + if (Array.isArray(value)) { + return value.join("\n"); + } + return ""; +} + +function pickPreferredDefaultModel(models, preferred) { + if (!Array.isArray(models) || !models.length) { + return ""; + } + if (typeof preferred === "string" && models.includes(preferred)) { + return preferred; + } + return models[0] || ""; +} + +function formatBytes(value) { + const size = Number(value || 0); + if (!Number.isFinite(size) || size < 1024) { + return `${Math.max(0, Math.round(size))} B`; + } + const units = ["KB", "MB", "GB", "TB"]; + let current = size / 1024; + let unitIndex = 0; + while (current >= 1024 && unitIndex < units.length - 1) { + current /= 1024; + unitIndex += 1; + } + return `${current.toFixed(current >= 10 ? 0 : 1)} ${units[unitIndex]}`; +} + +function buildMetaArtifactUrl(runId, relativePath) { + const normalizedRunId = String(runId || "").trim(); + const normalizedPath = String(relativePath || "").trim(); + if (!normalizedRunId || !normalizedPath) return ""; + return `/api/runs/${encodeURIComponent(normalizedRunId)}/meta-artifact?path=${encodeURIComponent(normalizedPath)}`; +} + +function metaArtifactGroupId(file) { + const relativePath = String(file?.relative_path || "").trim(); + const parts = relativePath.split("/").filter(Boolean); + return parts.length > 1 ? parts[0] : "maps"; +} + +function metaArtifactGroupLabel(groupId) { + return formatCounterLabel(String(groupId || "maps").replaceAll("-", "_")); +} + +function metaArtifactSortRank(file) { + const name = String(file?.name || "").toLowerCase(); + if (name.startsWith("z_corr")) return 0; + if (name === "z.nii" || name === "z.nii.gz") return 1; + if (name.startsWith("z")) return 2; + return 10; +} + +function metaArtifactGroupSortRank(groupId) { + const normalized = String(groupId || "").toLowerCase(); + if (normalized.startsWith("all_") || normalized === "maps") return 1; + return 0; +} + +function sortMetaArtifacts(files) { + return [...(files || [])].sort((a, b) => { + const groupA = metaArtifactGroupId(a); + const groupB = metaArtifactGroupId(b); + const groupRankCompare = metaArtifactGroupSortRank(groupA) - metaArtifactGroupSortRank(groupB); + if (groupRankCompare) return groupRankCompare; + const groupCompare = groupA.localeCompare(groupB); + if (groupCompare) return groupCompare; + const rankCompare = metaArtifactSortRank(a) - metaArtifactSortRank(b); + if (rankCompare) return rankCompare; + return String(a?.name || "").localeCompare(String(b?.name || "")); + }); +} + +function preferredMetaArtifact(files) { + const sorted = sortMetaArtifacts(files); + return sorted[0] || null; +} + +function groupMetaArtifacts(files) { + const groups = new Map(); + for (const file of sortMetaArtifacts(files)) { + const groupId = metaArtifactGroupId(file); + if (!groups.has(groupId)) { + groups.set(groupId, { + id: groupId, + label: metaArtifactGroupLabel(groupId), + files: [], + }); + } + groups.get(groupId).files.push(file); + } + return Array.from(groups.values()); +} + +function buildMissingFulltextsUrl(runId, extension) { + const normalizedRunId = String(runId || "").trim(); + const normalizedExt = String(extension || "").trim().toLowerCase(); + if (!normalizedRunId || !["txt", "csv"].includes(normalizedExt)) return ""; + return `/api/runs/${encodeURIComponent(normalizedRunId)}/missing-fulltexts.${normalizedExt}`; +} + +function statusClass(status) { + return `badge ${status || "queued"}`; +} + +function isActiveRunStatus(status) { + return ["queued", "running", "canceling"].includes(String(status || "").toLowerCase()); +} + +function runTimestampValue(run) { + const timestamp = Date.parse(run?.created_at || run?.started_at || run?.updated_at || ""); + return Number.isFinite(timestamp) ? timestamp : 0; +} + +function newestRunsFirst(runsList) { + return [...(runsList || [])].sort((a, b) => runTimestampValue(b) - runTimestampValue(a)); +} + +function runSourceLabel(run) { + if (!run) return "No NIMADS source"; + const timestamp = run.created_at || run.started_at || ""; + const date = timestamp ? new Date(timestamp) : null; + const dateLabel = date && Number.isFinite(date.getTime()) + ? date.toLocaleString() + : "Screening run"; + const folder = String(run.output_folder || ""); + const folderLabel = folder + ? folder.split("/").filter(Boolean).slice(-2).join("/") + : run.id; + return `${dateLabel} · ${folderLabel}`; +} + +function normalizePathKey(value) { + return String(value || "").trim().replace(/\/+$/, ""); +} + +function nimadsSourceKey(run) { + if (!run) return ""; + const explicitPath = normalizePathKey(run.progress?.nimads_studyset_path); + if (explicitPath) return explicitPath; + const outputFolder = normalizePathKey(run.output_folder); + return outputFolder ? `${outputFolder}/outputs/nimads_studyset.json` : ""; +} + +function metaRunMatchesSource(metaRun, sourceRun) { + if (!metaRun || !sourceRun) return false; + if (metaRun.source_run_id && metaRun.source_run_id === sourceRun.id) { + return true; + } + const sourceOutputFolder = normalizePathKey(sourceRun.output_folder); + const metaSourceFolder = normalizePathKey(metaRun.source_output_folder || metaRun.output_folder); + return Boolean(sourceOutputFolder && metaSourceFolder && sourceOutputFolder === metaSourceFolder); +} + +function formatCounterLabel(key) { + return String(key || "") + .replaceAll("_", " ") + .replace(/\b\w/g, (char) => char.toUpperCase()); +} + +function counterTone(key) { + const normalized = String(key || "").toLowerCase(); + if (normalized.includes("included") || normalized === "decisions") return "include"; + if (normalized.includes("excluded")) return "exclude"; + if (normalized.includes("incomplete")) return "incomplete"; + return "total"; +} + +function toDateInputValue(value) { + const raw = String(value || "").trim(); + if (!raw) return ""; + if (/^\d{4}-\d{2}-\d{2}$/.test(raw)) return raw; + if (/^\d{4}\/\d{2}\/\d{2}$/.test(raw)) return raw.replaceAll("/", "-"); + return ""; +} + +function fromDateInputValue(value) { + const raw = String(value || "").trim(); + if (!raw) return ""; + return raw.replaceAll("-", "/"); +} + +function MetaNiftiViewer({ fileUrl, fileName }) { + const canvasRef = useRef(null); + const viewerRef = useRef(null); + const [viewerError, setViewerError] = useState(""); + const [viewerReady, setViewerReady] = useState(false); + + useEffect(() => { + if (!fileUrl) { + setViewerError(""); + setViewerReady(false); + return; + } + + let canceled = false; + + async function loadVolume() { + try { + setViewerError(""); + setViewerReady(false); + + const niivueGlobal = window.niivue; + const NiivueCtor = niivueGlobal?.Niivue; + if (!NiivueCtor) { + throw new Error("NiiVue library was not loaded."); + } + if (!canvasRef.current) { + throw new Error("Viewer canvas is unavailable."); + } + + if (!viewerRef.current) { + const nextViewer = new NiivueCtor({ + show3Dcrosshair: true, + }); + await nextViewer.attachToCanvas(canvasRef.current); + if (niivueGlobal?.SHOW_RENDER?.ALWAYS != null) { + nextViewer.opts.multiplanarShowRender = niivueGlobal.SHOW_RENDER.ALWAYS; + } + nextViewer.opts.isColorbar = true; + nextViewer.setSliceMM(false); + viewerRef.current = nextViewer; + } + + const viewer = viewerRef.current; + while ((viewer.volumes || []).length) { + viewer.removeVolume(viewer.volumes[viewer.volumes.length - 1]); + } + + await viewer.addVolumeFromUrl({ + url: "https://neurovault.org/static/images/GenericMNI.nii.gz", + colormap: "gray", + opacity: 1, + colorbarVisible: false, + }); + + await viewer.addVolumeFromUrl({ + url: fileUrl, + colormap: "warm", + opacity: 1, + cal_min: 0, + cal_max: 6, + cal_minNeg: -6, + cal_maxNeg: 0, + }); + + viewer.setInterpolation(true); + viewer.updateGLVolume(); + + if (!canceled) { + setViewerReady(true); + } + } catch (err) { + if (!canceled) { + setViewerError(err?.message || String(err)); + setViewerReady(false); + } + } + } + + loadVolume(); + return () => { + canceled = true; + }; + }, [fileUrl, fileName]); + + return ( +
+ {viewerError ? ( +
{viewerError}
+ ) : null} + {!viewerReady && !viewerError ? ( +
Loading viewer…
+ ) : null} +
+ +
+
+ + Download NIfTI + +
+
+ ); +} + +function App() { + const [view, setView] = useState("projects"); + const [editorTab, setEditorTab] = useState("build"); + const [settingsOpen, setSettingsOpen] = useState(false); + const [workspace, setWorkspace] = useState(null); + const [projects, setProjects] = useState([]); + const [selectedProjectId, setSelectedProjectId] = useState(null); + + const [specForm, setSpecForm] = useState({}); + const [yamlText, setYamlText] = useState(""); + const [yamlMode, setYamlMode] = useState(false); + const [buildStep, setBuildStep] = useState("search"); + const [runsSubTab, setRunsSubTab] = useState("screening"); + const [specPath, setSpecPath] = useState(""); + + const [runs, setRuns] = useState([]); + const [selectedRunId, setSelectedRunId] = useState(null); + const [selectedRun, setSelectedRun] = useState(null); + const [metaArtifacts, setMetaArtifacts] = useState([]); + const [metaArtifactsLoading, setMetaArtifactsLoading] = useState(false); + const [metaArtifactsError, setMetaArtifactsError] = useState(""); + const [selectedMetaArtifactPath, setSelectedMetaArtifactPath] = useState(""); + const [selectedMetaArtifactGroup, setSelectedMetaArtifactGroup] = useState(""); + const [selectedMetaSourceRunId, setSelectedMetaSourceRunId] = useState(""); + const [metaSourceManuallySelected, setMetaSourceManuallySelected] = useState(false); + const [logs, setLogs] = useState([]); + const [logOffset, setLogOffset] = useState(0); + const logOffsetRef = useRef(0); + const yamlEditorRef = useRef(null); + + const [secrets, setSecrets] = useState({}); + const [maskedSecrets, setMaskedSecrets] = useState({}); + const [preferredModels, setPreferredModels] = useState([]); + const [preferredModelsText, setPreferredModelsText] = useState(""); + const [globalPreferredModel, setGlobalPreferredModel] = useState(""); + const [yamlModelSelection, setYamlModelSelection] = useState(""); + const [searchAdvancedTouched, setSearchAdvancedTouched] = useState({ + maxResults: false, + email: false, + }); + + const [statusMsg, setStatusMsg] = useState(null); + const [deletePreview, setDeletePreview] = useState(null); + const [deleteMode, setDeleteMode] = useState("metadata_only"); + const [deleteBusy, setDeleteBusy] = useState(false); + const [createModalOpen, setCreateModalOpen] = useState(false); + const [createBusy, setCreateBusy] = useState(false); + const [createName, setCreateName] = useState(""); + const [createDescription, setCreateDescription] = useState(""); + const [importModalOpen, setImportModalOpen] = useState(false); + const [importBusy, setImportBusy] = useState(false); + const [importConfigPath, setImportConfigPath] = useState(""); + const [importName, setImportName] = useState(""); + const [importDescription, setImportDescription] = useState(""); + const [editModalOpen, setEditModalOpen] = useState(false); + const [editBusy, setEditBusy] = useState(false); + const [editProjectId, setEditProjectId] = useState(""); + const [editName, setEditName] = useState(""); + const [editDescription, setEditDescription] = useState(""); + const [cloneModalOpen, setCloneModalOpen] = useState(false); + const [cloneBusy, setCloneBusy] = useState(false); + const [cloneProjectId, setCloneProjectId] = useState(""); + const [cloneName, setCloneName] = useState(""); + const [cloneDescription, setCloneDescription] = useState(""); + const [cloneMode, setCloneMode] = useState("schema_only"); + const [projectModelSettingsOpen, setProjectModelSettingsOpen] = useState(false); + const [studyListModalOpen, setStudyListModalOpen] = useState(false); + const [studyListBusy, setStudyListBusy] = useState(false); + const [studyListFileName, setStudyListFileName] = useState(""); + const [pubmedCountBusy, setPubmedCountBusy] = useState(false); + const [pubmedCount, setPubmedCount] = useState(null); + const [sourceModalOpen, setSourceModalOpen] = useState(false); + const [sourceModalIndex, setSourceModalIndex] = useState(null); + const [sourceType, setSourceType] = useState("custom"); + const [sourceForm, setSourceForm] = useState({ + root_path: "", + pmid_source: "folder_name", + text_path_templates: "", + coordinates_path_templates: "", + allowed_extensions: "", + processed_data_path: "", + json_filename: "", + json_pmid_key: "", + }); + + const [runForm, setRunForm] = useState({ + mode: "run", + output_folder: "", + verbose: false, + dry_run: false, + debug: false, + num_workers: 1, + force_reextract_incomplete_fulltext: false, + apply_default_email: true, + cache_policy: "auto", + clear_cache: [], + copy_valid_cache_from: "", + execution_mode: "auto_new_on_change", + }); + + const [metaForm, setMetaForm] = useState({ + output_folder: "", + estimator: "mkdadensity", + estimator_args: "{}", + corrector: "fdr", + corrector_args: "{}", + include_ids: "", + run_reports: false, + fail_fast: false, + debug: false, + }); + + const selectedProject = useMemo( + () => projects.find((project) => project.id === selectedProjectId) || null, + [projects, selectedProjectId] + ); + const buildStepIndex = useMemo( + () => Math.max(0, BUILD_STEPS.findIndex(([id]) => id === buildStep)), + [buildStep] + ); + const isFirstBuildStep = buildStepIndex <= 0; + const isLastBuildStep = buildStepIndex >= BUILD_STEPS.length - 1; + const eligibleMetaSourceRuns = useMemo( + () => { + const candidates = []; + if (selectedRun) candidates.push(selectedRun); + for (const run of runs) { + if (!candidates.some((item) => item?.id && item.id === run?.id)) { + candidates.push(run); + } + } + + const completedRuns = candidates.filter( + (run) => run?.status === "completed" && Boolean(run.output_folder) + ); + const eligibleRuns = newestRunsFirst(completedRuns.filter((run) => { + const outputStage = (run.progress?.timeline || []).find( + (stage) => stage.stage === "output" + ); + const isMetaRun = run.kind === "meta" || run.mode === "meta"; + const isScreeningRun = !isMetaRun; + const hasNimads = Boolean( + run.progress?.nimads_available || run.progress?.nimads_export_logged + ); + return isScreeningRun && outputStage?.status === "completed" && hasNimads; + })); + const latestBySource = new Map(); + for (const run of eligibleRuns) { + const key = nimadsSourceKey(run) || run.id; + if (!latestBySource.has(key)) { + latestBySource.set(key, run); + } + } + return Array.from(latestBySource.values()); + }, + [runs, selectedRun] + ); + const selectedMetaSourceRun = useMemo( + () => eligibleMetaSourceRuns.find((run) => run.id === selectedMetaSourceRunId) + || eligibleMetaSourceRuns[0] + || null, + [eligibleMetaSourceRuns, selectedMetaSourceRunId] + ); + const selectedMetaSourceOutputFolder = selectedMetaSourceRun?.output_folder || ""; + const metaAnalysisEnabled = Boolean(selectedMetaSourceRun); + const selectedOutputFolder = selectedRun?.output_folder || selectedMetaSourceOutputFolder || runForm.output_folder || ""; + const activeScreeningRun = useMemo( + () => { + const candidates = []; + if (selectedRun) candidates.push(selectedRun); + for (const run of runs) { + if (!candidates.some((item) => item?.id && item.id === run?.id)) { + candidates.push(run); + } + } + return candidates.find((run) => { + const isMetaRun = run?.kind === "meta" || run?.mode === "meta"; + return !isMetaRun && isActiveRunStatus(run?.status); + }) || null; + }, + [runs, selectedRun] + ); + const screeningRunInProgress = Boolean(activeScreeningRun); + const activeMetaRunForSelectedSource = useMemo( + () => { + const candidates = []; + if (selectedRun) candidates.push(selectedRun); + for (const run of runs) { + if (!candidates.some((item) => item?.id && item.id === run?.id)) { + candidates.push(run); + } + } + return newestRunsFirst(candidates).find((run) => { + const isMetaRun = run?.kind === "meta" || run?.mode === "meta"; + if (!isMetaRun || !isActiveRunStatus(run?.status)) return false; + if (!selectedMetaSourceRun) return true; + return metaRunMatchesSource(run, selectedMetaSourceRun); + }) || null; + }, + [runs, selectedRun, selectedMetaSourceRun?.id, selectedMetaSourceOutputFolder] + ); + const metaRunInProgress = Boolean(activeMetaRunForSelectedSource); + const runsForActiveTab = useMemo( + () => newestRunsFirst(runs.filter((run) => { + const isMetaRun = run?.kind === "meta" || run?.mode === "meta"; + if (runsSubTab !== "meta") { + return !isMetaRun; + } + if (!isMetaRun) { + return false; + } + if (!selectedMetaSourceRun) { + return true; + } + return metaRunMatchesSource(run, selectedMetaSourceRun); + })), + [runs, runsSubTab, selectedMetaSourceRun?.id, selectedMetaSourceOutputFolder] + ); + const selectedRunForActiveTab = useMemo( + () => runsForActiveTab.find((run) => run.id === selectedRunId) || null, + [runsForActiveTab, selectedRunId] + ); + const activeRunForActiveTab = runsSubTab === "meta" + ? activeMetaRunForSelectedSource + : activeScreeningRun; + const currentExecutionRun = useMemo( + () => activeRunForActiveTab || selectedRunForActiveTab || runsForActiveTab[0] || null, + [activeRunForActiveTab, selectedRunForActiveTab, runsForActiveTab] + ); + const previousRunsForActiveTab = useMemo( + () => runsForActiveTab.filter((run) => run.id !== currentExecutionRun?.id), + [runsForActiveTab, currentExecutionRun?.id] + ); + const viewingPreviousRun = Boolean( + currentExecutionRun + && selectedRunForActiveTab + && currentExecutionRun.id === selectedRunForActiveTab.id + && !activeRunForActiveTab + && runsForActiveTab[0]?.id !== currentExecutionRun.id + ); + const selectedMetaRun = useMemo( + () => (runsSubTab === "meta" ? currentExecutionRun : null), + [runsSubTab, currentExecutionRun] + ); + const resolvedMetaRun = useMemo( + () => { + if (runsSubTab !== "meta") return null; + return selectedMetaRun; + }, + [runsSubTab, selectedMetaRun] + ); + const metaArtifactGroups = useMemo( + () => groupMetaArtifacts(metaArtifacts), + [metaArtifacts] + ); + const activeMetaArtifactGroup = useMemo( + () => { + if (metaArtifactGroups.some((group) => group.id === selectedMetaArtifactGroup)) { + return selectedMetaArtifactGroup; + } + const selectedFile = metaArtifacts.find((item) => item.relative_path === selectedMetaArtifactPath); + if (selectedFile) { + return metaArtifactGroupId(selectedFile); + } + return metaArtifactGroups[0]?.id || ""; + }, + [metaArtifactGroups, metaArtifacts, selectedMetaArtifactGroup, selectedMetaArtifactPath] + ); + const activeMetaArtifacts = useMemo( + () => metaArtifactGroups.find((group) => group.id === activeMetaArtifactGroup)?.files || [], + [metaArtifactGroups, activeMetaArtifactGroup] + ); + const selectedMetaArtifact = useMemo( + () => activeMetaArtifacts.find((item) => item.relative_path === selectedMetaArtifactPath) + || preferredMetaArtifact(activeMetaArtifacts) + || null, + [activeMetaArtifacts, selectedMetaArtifactPath] + ); + const selectedMetaArtifactUrl = useMemo( + () => buildMetaArtifactUrl(resolvedMetaRun?.id, selectedMetaArtifact?.relative_path), + [resolvedMetaRun?.id, selectedMetaArtifact?.relative_path] + ); + + async function refreshWorkspace() { + const data = await api("/api/workspace"); + setWorkspace(data); + } + + async function refreshProjects() { + const data = await api("/api/projects"); + const nextProjects = data.projects || []; + setProjects(nextProjects); + setSelectedProjectId((current) => { + if (!current) return null; + const stillExists = nextProjects.some((item) => item.id === current); + return stillExists ? current : null; + }); + } + + async function refreshRuns() { + const projectQuery = selectedProjectId ? `?project_id=${encodeURIComponent(selectedProjectId)}` : ""; + const data = await api(`/api/runs${projectQuery}`); + setRuns(data.runs || []); + } + + async function loadSpec(projectId) { + if (!projectId) return; + setPubmedCount(null); + setPubmedCountBusy(false); + const data = await api(`/api/projects/${projectId}/spec`); + setSpecForm(data.form || {}); + setYamlText(data.yaml_text || ""); + setSpecPath(data.config_path || ""); + setSearchAdvancedTouched({ maxResults: false, email: false }); + setMetaForm((prev) => ({ + ...prev, + output_folder: data.form?.output?.directory || prev.output_folder, + })); + } + + async function refreshSecrets() { + const data = await api("/api/settings/secrets"); + setSecrets(data.values || {}); + setMaskedSecrets(data.masked || {}); + } + + async function refreshPreferences() { + const data = await api("/api/settings/preferences"); + const models = Array.isArray(data?.preferred_models) ? data.preferred_models : []; + const preferredDefault = typeof data?.default_model === "string" ? data.default_model : ""; + const resolvedDefault = pickPreferredDefaultModel(models, preferredDefault); + setPreferredModels(models); + setPreferredModelsText(stringifyLines(models)); + setGlobalPreferredModel(resolvedDefault); + setYamlModelSelection((prev) => { + if (models.includes(prev)) return prev; + return resolvedDefault || ""; + }); + } + + async function saveGlobalModelPreference(nextModel) { + const normalized = pickPreferredDefaultModel(preferredModels, nextModel); + setGlobalPreferredModel(normalized); + try { + const data = await api("/api/settings/preferences", { + method: "PUT", + body: JSON.stringify({ default_model: normalized }), + }); + const models = Array.isArray(data?.preferred_models) ? data.preferred_models : []; + const preferredDefault = typeof data?.default_model === "string" ? data.default_model : ""; + const resolvedDefault = pickPreferredDefaultModel(models, preferredDefault); + setPreferredModels(models); + setPreferredModelsText(stringifyLines(models)); + setGlobalPreferredModel(resolvedDefault); + setYamlModelSelection((prev) => (models.includes(prev) ? prev : (resolvedDefault || ""))); + } catch (err) { + setStatusMsg({ type: "error", text: err.message }); + } + } + + function insertIntoYaml(textToInsert) { + const editor = yamlEditorRef.current; + if (!editor) { + setYamlText((prev) => `${prev}${textToInsert}`); + return; + } + + const start = editor.selectionStart || 0; + const end = editor.selectionEnd || 0; + const current = yamlText || ""; + const nextText = `${current.slice(0, start)}${textToInsert}${current.slice(end)}`; + setYamlText(nextText); + + window.requestAnimationFrame(() => { + const caret = start + textToInsert.length; + editor.focus(); + editor.setSelectionRange(caret, caret); + }); + } + + function insertSelectedModelValue() { + if (!yamlModelSelection) return; + insertIntoYaml(`"${yamlModelSelection}"`); + } + + function insertSelectedModelLine() { + if (!yamlModelSelection) return; + const editor = yamlEditorRef.current; + const cursor = editor?.selectionStart || 0; + const current = yamlText || ""; + const lineStart = current.lastIndexOf("\n", Math.max(0, cursor - 1)) + 1; + const linePrefix = current.slice(lineStart, cursor); + const indentMatch = linePrefix.match(/^\s*/); + const indent = indentMatch ? indentMatch[0] : ""; + insertIntoYaml(`${indent}model: "${yamlModelSelection}"\n`); + } + + useEffect(() => { + (async () => { + try { + await refreshWorkspace(); + await refreshProjects(); + await refreshSecrets(); + await refreshPreferences(); + } catch (err) { + setStatusMsg({ type: "error", text: err.message }); + } + })(); + }, []); + + useEffect(() => { + if (selectedProjectId) { + setSelectedMetaSourceRunId(""); + setMetaSourceManuallySelected(false); + setBuildStep("search"); + loadSpec(selectedProjectId).catch((err) => { + setStatusMsg({ type: "error", text: err.message }); + }); + refreshRuns().catch((err) => setStatusMsg({ type: "error", text: err.message })); + } + }, [selectedProjectId]); + + useEffect(() => { + if (editorTab !== "runs" || !currentExecutionRun?.id) return; + if (selectedRunId === currentExecutionRun.id) return; + setSelectedRunId(currentExecutionRun.id); + setSelectedRun(currentExecutionRun); + setLogs([]); + logOffsetRef.current = 0; + setLogOffset(0); + }, [editorTab, currentExecutionRun?.id, selectedRunId]); + + useEffect(() => { + if (!eligibleMetaSourceRuns.length) { + if (selectedMetaSourceRunId) { + setSelectedMetaSourceRunId(""); + } + return; + } + const selectedStillExists = eligibleMetaSourceRuns.some((run) => run.id === selectedMetaSourceRunId); + if (!selectedStillExists || !metaSourceManuallySelected) { + const newestSourceId = eligibleMetaSourceRuns[0].id; + if (!selectedStillExists && metaSourceManuallySelected) { + setMetaSourceManuallySelected(false); + } + if (selectedMetaSourceRunId !== newestSourceId) { + setSelectedMetaSourceRunId(newestSourceId); + } + } + }, [eligibleMetaSourceRuns, metaSourceManuallySelected, selectedMetaSourceRunId]); + + useEffect(() => { + if (!selectedMetaSourceOutputFolder) return; + setMetaForm((prev) => { + if (prev.output_folder === selectedMetaSourceOutputFolder) return prev; + return { ...prev, output_folder: selectedMetaSourceOutputFolder }; + }); + }, [selectedMetaSourceOutputFolder]); + + useEffect(() => { + const timer = setInterval(() => { + if (!selectedProjectId) return; + refreshRuns().catch(() => {}); + }, 2500); + return () => clearInterval(timer); + }, [selectedProjectId]); + + useEffect(() => { + if (!selectedRunId) return; + let mounted = true; + + async function tick() { + try { + const runData = await api(`/api/runs/${selectedRunId}`); + if (!mounted) return; + setSelectedRun(runData); + + const logData = await api(`/api/runs/${selectedRunId}/logs?offset=${logOffsetRef.current}`); + if (!mounted) return; + if (Array.isArray(logData.lines) && logData.lines.length) { + setLogs((prev) => [...prev, ...logData.lines]); + } + const nextOffset = logData.next_offset || 0; + logOffsetRef.current = nextOffset; + setLogOffset(nextOffset); + } catch (err) { + if (mounted) { + setStatusMsg({ type: "error", text: err.message }); + } + } + } + + tick(); + const timer = setInterval(tick, 2000); + return () => { + mounted = false; + clearInterval(timer); + }; + }, [selectedRunId]); + + useEffect(() => { + if (!resolvedMetaRun?.id) { + setMetaArtifacts([]); + setMetaArtifactsError(""); + setMetaArtifactsLoading(false); + setSelectedMetaArtifactPath(""); + setSelectedMetaArtifactGroup(""); + return; + } + + let mounted = true; + + async function refreshMetaArtifacts() { + try { + if (mounted) { + setMetaArtifactsLoading(true); + setMetaArtifactsError(""); + } + const data = await api(`/api/runs/${resolvedMetaRun.id}/meta-artifacts`); + if (!mounted) return; + const files = sortMetaArtifacts(Array.isArray(data?.files) ? data.files : []); + setMetaArtifacts(files); + setSelectedMetaArtifactPath((current) => { + if (files.some((item) => item.relative_path === current)) { + return current; + } + return preferredMetaArtifact(files)?.relative_path || ""; + }); + setSelectedMetaArtifactGroup((current) => { + if (files.some((item) => metaArtifactGroupId(item) === current)) { + return current; + } + const preferred = preferredMetaArtifact(files); + return preferred ? metaArtifactGroupId(preferred) : ""; + }); + } catch (err) { + if (!mounted) return; + setMetaArtifacts([]); + setSelectedMetaArtifactPath(""); + setSelectedMetaArtifactGroup(""); + setMetaArtifactsError(err.message || String(err)); + } finally { + if (mounted) { + setMetaArtifactsLoading(false); + } + } + } + + refreshMetaArtifacts(); + const shouldPoll = !["completed", "failed", "canceled"].includes( + String(resolvedMetaRun.status || "").toLowerCase() + ); + if (!shouldPoll) { + return () => { + mounted = false; + }; + } + + const timer = setInterval(refreshMetaArtifacts, 5000); + return () => { + mounted = false; + clearInterval(timer); + }; + }, [resolvedMetaRun?.id, resolvedMetaRun?.status]); + + function updateField(path, value) { + setSpecForm((prev) => setNested(prev, path, value)); + } + + function getModelFieldValue(path) { + const explicit = String(getNested(specForm, path, "") || "").trim(); + if (explicit) return explicit; + const defaultsModel = String(getNested(specForm, ["defaults", "model"], "") || "").trim(); + if (defaultsModel) return defaultsModel; + return globalPreferredModel || ""; + } + + function renderModelSelect(label, path, optional = true) { + const value = getModelFieldValue(path); + const options = Array.from(new Set([ + ...preferredModels, + ...(value && !preferredModels.includes(value) ? [value] : []), + ])); + return ( + <> + + + + ); + } + + function renderToggleControl(label, checked, onChange, disabled = false) { + const enabled = Boolean(checked); + return ( +
+ {label} + +
+ ); + } + + function buildFunnelStages(counters, liveProgress = null, timeline = [], progressMeta = {}) { + const data = counters && typeof counters === "object" ? counters : {}; + const stages = []; + const stageOrder = ["search", "abstract", "retrieval", "fulltext", "parsing", "annotation", "output"]; + const changedStages = Array.isArray(progressMeta?.cachePreview?.changed_stages) + ? progressMeta.cachePreview.changed_stages + : []; + const invalidatedStages = Array.isArray(progressMeta?.cachePreview?.invalidates) + ? progressMeta.cachePreview.invalidates + : []; + const resetResults = changedStages.length > 0 || invalidatedStages.length > 0; + const resetBoundaryStages = invalidatedStages.length ? invalidatedStages : changedStages; + const firstResetIndex = Math.min( + ...resetBoundaryStages + .map((stage) => stageOrder.indexOf(stage)) + .filter((index) => index >= 0) + ); + const hasResetBoundary = Number.isFinite(firstResetIndex); + const reachedStages = new Set(progressMeta?.reachedStages || []); + const completedStages = new Set(progressMeta?.completedStages || []); + const maxReachedIndex = Math.max( + -1, + ...(Array.from(reachedStages).map((stage) => stageOrder.indexOf(stage)).filter((index) => index >= 0)) + ); + const canDisplayStage = (stageId, completedAliases = []) => { + if (!resetResults) return true; + const index = stageOrder.indexOf(stageId); + if (hasResetBoundary && index >= 0 && index < firstResetIndex) { + return true; + } + if (liveProgress?.stage === stageId) return true; + if (completedStages.has(stageId)) return true; + if (completedAliases.some((stage) => completedStages.has(stage))) return true; + return index >= 0 && maxReachedIndex > index; + }; + const timelineStatus = (stageId) => { + const item = (timeline || []).find((stage) => stage.stage === stageId); + return item?.status || ""; + }; + const addStage = (id, title, items, sideItems = []) => { + const filteredItems = items.filter((item) => item.value !== undefined && item.value !== null); + const filteredSideItems = sideItems.filter((item) => item.value !== undefined && item.value !== null && Number(item.value) !== 0); + if (filteredItems.length || filteredSideItems.length) { + stages.push({ id, title, items: filteredItems, sideItems: filteredSideItems }); + } + }; + const hasStage = (stageId) => stages.some((stage) => stage.id === stageId); + const stageStatusValue = (stageId) => { + const status = String(timelineStatus(stageId) || "").toLowerCase(); + if (status === "completed" || completedStages.has(stageId)) return "Done"; + if (status === "running" || liveProgress?.stage === stageId || reachedStages.has(stageId)) return "Running"; + if (status === "failed") return "Failed"; + if (status === "canceled") return "Canceled"; + return "Pending"; + }; + const addStatusStage = (id, title) => { + if (!resetResults || hasStage(id) || !canDisplayStage(id)) return; + const value = stageStatusValue(id); + addStage(id, title, [{ + key: "status", + label: "Status", + value, + tone: value === "Done" ? "include" : value === "Pending" ? "incomplete" : "total", + }]); + }; + + if (data.search && canDisplayStage("search")) { + addStage("search", "Search", [ + { key: "studies_found", label: "Found", value: data.search.studies_found, tone: "total" }, + ]); + } + addStatusStage("search", "Search"); + + if (data.abstract && canDisplayStage("abstract")) { + addStage("abstract", "Abstract Screening", [ + { key: "screened", label: "Screened", value: data.abstract.screened, tone: "total" }, + { key: "included", label: "Included", value: data.abstract.included, tone: "include" }, + ], [ + { key: "excluded", label: "Excluded", value: data.abstract.excluded, tone: "exclude" }, + ]); + } + addStatusStage("abstract", "Abstract Screening"); + + if (data.fulltext && canDisplayStage("fulltext")) { + addStage("fulltext", "Full-Text Screening", [ + { key: "screened", label: "Screened", value: data.fulltext.screened, tone: "total" }, + { key: "included", label: "Included", value: data.fulltext.included, tone: "include" }, + ], [ + { key: "excluded", label: "Excluded", value: data.fulltext.excluded, tone: "exclude" }, + { key: "incomplete", label: "Incomplete", value: data.fulltext.incomplete, tone: "incomplete" }, + ]); + } + addStatusStage("fulltext", "Full-Text Screening"); + + let finalIncluded = null; + if (data.output && typeof data.output === "object") { + const outputEntries = Object.entries(data.output); + const finalIncludedEntry = + outputEntries.find(([key]) => key === "final_included") || + outputEntries.find(([key]) => { + const normalizedKey = String(key || "").toLowerCase(); + return normalizedKey.includes("final") && normalizedKey.includes("included"); + }); + if (finalIncludedEntry) { + finalIncluded = finalIncludedEntry[1]; + } + } + if (finalIncluded === null && data.fulltext?.included !== undefined) { + finalIncluded = data.fulltext.included; + } + if (finalIncluded !== null && canDisplayStage("output", ["fulltext"])) { + addStage("output", "Final Studies", [ + { key: "final_included", label: "Included", value: finalIncluded, tone: "include" }, + ]); + } + addStatusStage("output", "Final Studies"); + + const parsingStatus = timelineStatus("parsing"); + const shouldShowParsing = (Boolean(data.parsing) && canDisplayStage("parsing")) + || (Boolean(data.annotation) && canDisplayStage("parsing")) + || liveProgress?.stage === "parsing" + || (resetResults && canDisplayStage("parsing")) + || (!resetResults && ["completed", "running", "failed", "canceled"].includes(String(parsingStatus || "").toLowerCase())); + if (shouldShowParsing) { + const parsingItems = data.parsing + ? Object.entries(data.parsing).map(([key, value]) => ({ + key, + label: formatCounterLabel(key), + value, + tone: key === "status" && String(value).toLowerCase() === "off" ? "incomplete" : "total", + })) + : [{ + key: "status", + label: "Status", + value: String(parsingStatus || "Done").toLowerCase() === "completed" + ? "Done" + : formatCounterLabel(parsingStatus || "Done"), + tone: String(parsingStatus || "").toLowerCase() === "completed" ? "include" : "total", + }]; + addStage("parsing", "Coordinate Parsing", parsingItems); + } + + if (data.annotation && canDisplayStage("annotation")) { + addStage("annotation", "Analysis annotations", [ + { key: "decisions", label: "Decisions", value: data.annotation.decisions, tone: "include" }, + { key: "annotations", label: "Annotations", value: data.annotation.annotations, tone: "total" }, + ]); + } + addStatusStage("annotation", "Analysis annotations"); + + if (liveProgress?.stage && liveProgress.total && !stages.some((stage) => stage.id === liveProgress.stage)) { + const fallbackStages = { + search: "Search", + abstract: "Abstract Screening", + retrieval: "Full-Text Screening", + fulltext: "Full-Text Screening", + parsing: "Coordinate parsing", + annotation: "Analysis annotations", + }; + const title = fallbackStages[liveProgress.stage] || formatCounterLabel(liveProgress.stage); + stages.push({ + id: liveProgress.stage, + title, + items: [], + sideItems: [], + }); + } + + if (liveProgress?.stage) { + for (const stage of stages) { + if (stage.id === liveProgress.stage) { + stage.liveProgress = liveProgress; + } + } + } + + return stages; + } + + function renderCounterFunnel(counters, liveProgress = null, timeline = [], progressMeta = {}) { + const stages = buildFunnelStages(counters, liveProgress, timeline, progressMeta); + const hasReset = Boolean( + progressMeta?.cachePreview?.changed_stages?.length + || progressMeta?.cachePreview?.invalidates?.length + ); + if (!stages.length) { + return ( +
+ {hasReset + ? "Results were reset for this rerun and will appear as each stage is evaluated." + : "Results will appear here as Autonima writes stage outputs."} +
+ ); + } + + return ( +
+ {stages.map((stage, index) => ( +
+
+
+
{stage.title}
+ {stage.liveProgress ? ( +
+
+ {stage.liveProgress.label || "Running"} + {stage.liveProgress.current} / {stage.liveProgress.total} +
+
+
+
+
+ ) : null} +
+ {stage.items.map((item) => ( +
+ {item.value} + {item.label} +
+ ))} +
+
+ {stage.sideItems?.length ? ( + <> +
+
+ {stage.sideItems.map((item) => ( +
+ {item.value} + {item.label} +
+ ))} +
+ + ) : ( + <> +
+
+ + )} +
+ {index < stages.length - 1 ? ( +
+
+
+
+
+ ) : null} +
+ ))} +
+ ); + } + + function reviewValue(value) { + const text = String(value || "").trim(); + return text || "Not set"; + } + + function reviewCount(path) { + return getCriteriaList(path).length; + } + + function renderReviewChip(label, tone = "info") { + return {label}; + } + + function renderReviewRow(label, value) { + return ( +
+ {label} + {value} +
+ ); + } + + function renderReviewCard(title, children, chips = []) { + return ( +
+
+

{title}

+ {chips.length ?
{chips}
: null} +
+
{children}
+
+ ); + } + + function renderSpecificationReview() { + const searchQuery = String(getNested(specForm, ["search", "query"], "") || ""); + const dateFrom = reviewValue(getNested(specForm, ["search", "date_from"], "")); + const dateTo = reviewValue(getNested(specForm, ["search", "date_to"], "")); + const retrievalSources = getRetrievalSources(); + const fullTextSources = getFullTextSources(); + const annotationEntries = getNested(specForm, ["annotation", "annotations"], []); + const annotations = Array.isArray(annotationEntries) ? annotationEntries : []; + const metadataFields = getNested(specForm, ["annotation", "metadata_fields"], DEFAULT_ANNOTATION_METADATA_FIELDS); + const metadataFieldCount = Array.isArray(metadataFields) ? metadataFields.length : 0; + const hasPmidTerms = /\[PMID\]|\bPMID\b/i.test(searchQuery); + + return ( +
+ {renderReviewCard( + "Find Studies", + <> + {renderReviewRow("Query", reviewValue(searchQuery))} + {renderReviewRow("Date range", `${dateFrom} → ${dateTo}`)} + {renderReviewRow("Database", reviewValue(getNested(specForm, ["search", "database"], "pubmed")))} + , + [ + renderReviewChip(searchQuery.trim() ? "Query set" : "Query missing", searchQuery.trim() ? "ok" : "muted"), + hasPmidTerms ? renderReviewChip("PMID list", "info") : null, + ].filter(Boolean) + )} + + {renderReviewCard( + "Retrieval", + <> + {renderReviewRow("Built-in sources", retrievalSources.length ? retrievalSources.join(", ") : "None")} + {renderReviewRow("Local sources", fullTextSources.length)} + {renderReviewRow("Load excluded", getNested(specForm, ["retrieval", "load_excluded"], false) ? "On" : "Off")} + , + [ + renderReviewChip(`${retrievalSources.length + fullTextSources.length} source${retrievalSources.length + fullTextSources.length === 1 ? "" : "s"}`, "info"), + ] + )} + + {renderReviewCard( + "Screening", + <> + {renderReviewRow("Abstract objective", reviewValue(getNested(specForm, ["screening", "abstract", "objective"], "")))} + {renderReviewRow("Abstract criteria", `${reviewCount(["screening", "abstract", "inclusion_criteria"])} inclusion · ${reviewCount(["screening", "abstract", "exclusion_criteria"])} exclusion`)} + {renderReviewRow("Full-text objective", reviewValue(getNested(specForm, ["screening", "fulltext", "objective"], "")))} + {renderReviewRow("Full-text criteria", `${reviewCount(["screening", "fulltext", "inclusion_criteria"])} inclusion · ${reviewCount(["screening", "fulltext", "exclusion_criteria"])} exclusion`)} + , + [ + renderReviewChip(`${reviewCount(["screening", "abstract", "inclusion_criteria"]) + reviewCount(["screening", "fulltext", "inclusion_criteria"])} inclusion`, "ok"), + renderReviewChip(`${reviewCount(["screening", "abstract", "exclusion_criteria"]) + reviewCount(["screening", "fulltext", "exclusion_criteria"])} exclusion`, "muted"), + ] + )} + + {renderReviewCard( + "Parsing", + <> + {renderReviewRow("Parse coordinates", isParsingEnabled() ? "On" : "Off")} + {renderReviewRow("Coordinate model", reviewValue(getNested(specForm, ["parsing", "coordinate_model"], "")))} + , + [renderReviewChip(isParsingEnabled() ? "Enabled" : "Disabled", isParsingEnabled() ? "ok" : "muted")] + )} + + {renderReviewCard( + "Annotation", + <> + {renderReviewRow("Annotation", isAnnotationEnabled() ? "On" : "Off")} + {renderReviewRow("Common criteria", `${reviewCount(["annotation", "inclusion_criteria"])} inclusion · ${reviewCount(["annotation", "exclusion_criteria"])} exclusion`)} + {renderReviewRow("Named annotation rules", annotations.length)} + {renderReviewRow("Metadata fields", metadataFieldCount)} + , + [ + renderReviewChip(isAnnotationEnabled() ? "Enabled" : "Disabled", isAnnotationEnabled() ? "ok" : "muted"), + renderReviewChip(`${annotations.length} rule${annotations.length === 1 ? "" : "s"}`, "info"), + ] + )} +
+ ); + } + + function updateParseCoordinates(value) { + setSpecForm((prev) => { + let next = setNested(prev, ["parsing", "parse_coordinates"], value); + if (!value) { + next = setNested(next, ["annotation", "enabled"], false); + } + return next; + }); + } + + function isParsingEnabled() { + return Boolean(getNested(specForm, ["parsing", "parse_coordinates"], true)); + } + + function isAnnotationEnabled() { + return isParsingEnabled() && Boolean(getNested(specForm, ["annotation", "enabled"], true)); + } + + function enterEditor(projectId, tabName = "build") { + if (!projectId) return; + setPubmedCount(null); + setPubmedCountBusy(false); + setSelectedProjectId(projectId); + setEditorTab(tabName); + if (tabName === "build") { + setBuildStep("search"); + } else if (tabName === "runs") { + setRunsSubTab("screening"); + } + setView("editor"); + setSettingsOpen(false); + } + + function backToProjects() { + setView("projects"); + setEditorTab("build"); + setBuildStep("search"); + setRunsSubTab("screening"); + setSettingsOpen(false); + } + + function updateAnnotationEntry(index, key, value) { + setSpecForm((prev) => { + const existing = getNested(prev, ["annotation", "annotations"], []); + const entries = Array.isArray(existing) ? [...existing] : []; + while (entries.length <= index) { + entries.push({ + name: "", + description: "", + inclusion_criteria: [], + exclusion_criteria: [], + }); + } + const nextEntry = { ...(entries[index] || {}) }; + nextEntry[key] = value; + entries[index] = nextEntry; + return setNested(prev, ["annotation", "annotations"], entries); + }); + } + + function addAnnotationEntry() { + setSpecForm((prev) => { + const existing = getNested(prev, ["annotation", "annotations"], []); + const entries = Array.isArray(existing) ? [...existing] : []; + entries.push({ + name: "", + description: "", + inclusion_criteria: [], + exclusion_criteria: [], + }); + return setNested(prev, ["annotation", "annotations"], entries); + }); + } + + function removeAnnotationEntry(index) { + setSpecForm((prev) => { + const existing = getNested(prev, ["annotation", "annotations"], []); + const entries = Array.isArray(existing) ? [...existing] : []; + const nextEntries = entries.filter((_, itemIndex) => itemIndex !== index); + return setNested(prev, ["annotation", "annotations"], nextEntries); + }); + } + + function getCriteriaList(path) { + const criteria = getNested(specForm, path, []); + return Array.isArray(criteria) ? criteria : []; + } + + function updateCriteriaItem(path, index, value) { + setSpecForm((prev) => { + const existing = getNested(prev, path, []); + const entries = Array.isArray(existing) ? [...existing] : []; + entries[index] = value; + return setNested(prev, path, entries); + }); + } + + function addCriteriaItem(path) { + setSpecForm((prev) => { + const existing = getNested(prev, path, []); + const entries = Array.isArray(existing) ? [...existing] : []; + entries.push(""); + return setNested(prev, path, entries); + }); + } + + function removeCriteriaItem(path, index) { + setSpecForm((prev) => { + const existing = getNested(prev, path, []); + const entries = Array.isArray(existing) ? [...existing] : []; + return setNested(prev, path, entries.filter((_, itemIndex) => itemIndex !== index)); + }); + } + + function renderCriteriaPanel(path, title, tone) { + const criteria = getCriteriaList(path); + const prefix = tone === "include" ? "I" : "E"; + const keyPrefix = path.join("-"); + return ( +
+
+

{title}

+ {criteria.length} +
+ {criteria.length ? ( +
+ {criteria.map((item, index) => ( +
+ {prefix}{index + 1} + updateCriteriaItem(path, index, e.target.value)} + /> + +
+ ))} +
+ ) : ( +
No criteria yet.
+ )} + +
+ ); + } + + function getRetrievalSources() { + const retrieval = getNested(specForm, ["retrieval"], {}); + if ( + retrieval + && typeof retrieval === "object" + && Object.prototype.hasOwnProperty.call(retrieval, "sources") + ) { + return Array.isArray(retrieval.sources) + ? retrieval.sources.map((source) => String(source || "").trim()).filter(Boolean) + : []; + } + return ["pubget"]; + } + + function getFullTextSources() { + const sources = getNested(specForm, ["retrieval", "full_text_sources"], []); + return Array.isArray(sources) ? sources : []; + } + + function updateRetrievalSources(nextSources) { + const uniqueSources = []; + for (const source of nextSources || []) { + const normalized = String(source || "").trim(); + if (normalized && !uniqueSources.includes(normalized)) { + uniqueSources.push(normalized); + } + } + updateField(["retrieval", "sources"], uniqueSources); + } + + function openAddSourceModal() { + const currentSources = getRetrievalSources(); + setSourceModalIndex(null); + setSourceType(currentSources.includes("pubget") ? "custom" : "pubget"); + setSourceForm({ + root_path: "", + pmid_source: "folder_name", + text_path_templates: "", + coordinates_path_templates: "", + allowed_extensions: "", + processed_data_path: "", + json_filename: "", + json_pmid_key: "", + }); + setSourceModalOpen(true); + } + + function openEditSourceModal(index) { + const source = getFullTextSources()[index] || {}; + setSourceModalIndex(index); + setSourceType("custom"); + setSourceForm({ + root_path: source.root_path || "", + pmid_source: source.pmid_source || "folder_name", + text_path_templates: stringifyLines(source.text_path_templates || []), + coordinates_path_templates: stringifyLines(source.coordinates_path_templates || []), + allowed_extensions: stringifyLines(source.allowed_extensions || []), + processed_data_path: source.processed_data_path || "", + json_filename: source.json_filename || "", + json_pmid_key: source.json_pmid_key || "", + }); + setSourceModalOpen(true); + } + + function updateSourceForm(key, value) { + setSourceForm((prev) => ({ ...prev, [key]: value })); + } + + function buildCustomSourcePayload() { + const payload = { + root_path: String(sourceForm.root_path || "").trim(), + pmid_source: String(sourceForm.pmid_source || "folder_name").trim() || "folder_name", + }; + for (const key of ["text_path_templates", "coordinates_path_templates", "allowed_extensions"]) { + const values = parseLines(sourceForm[key]); + if (values.length) { + payload[key] = values; + } + } + for (const key of ["processed_data_path", "json_filename", "json_pmid_key"]) { + const value = String(sourceForm[key] || "").trim(); + if (value) { + payload[key] = value; + } + } + return payload; + } + + function saveSourceModal() { + if (sourceType === "pubget") { + const currentSources = getRetrievalSources(); + if (currentSources.includes("pubget")) { + setStatusMsg({ type: "error", text: "PubGet is already configured." }); + return; + } + updateRetrievalSources([...currentSources, "pubget"]); + setSourceModalOpen(false); + return; + } + + const payload = buildCustomSourcePayload(); + if (!payload.root_path) { + setStatusMsg({ type: "error", text: "Root path is required for a custom source." }); + return; + } + + const currentSources = getFullTextSources(); + const nextSources = [...currentSources]; + if (sourceModalIndex == null) { + nextSources.push(payload); + } else { + nextSources[sourceModalIndex] = payload; + } + updateField(["retrieval", "full_text_sources"], nextSources); + setSourceModalOpen(false); + } + + function removeFullTextSource(index) { + const currentSources = getFullTextSources(); + updateField( + ["retrieval", "full_text_sources"], + currentSources.filter((_, itemIndex) => itemIndex !== index) + ); + } + + function extractPmids(text) { + const rawLines = String(text || "") + .split(/\r?\n/) + .map((line) => line.trim()) + .filter(Boolean); + const ids = []; + const seen = new Set(); + for (const raw of rawLines) { + const cleaned = raw.replace(/[^0-9]/g, ""); + if (!cleaned) continue; + if (!seen.has(cleaned)) { + seen.add(cleaned); + ids.push(cleaned); + } + } + return ids; + } + + function handleStudyListFileSelected(file) { + if (!file) return; + setStudyListBusy(true); + setStudyListFileName(file.name || ""); + const reader = new FileReader(); + reader.onload = () => { + try { + const content = String(reader.result || ""); + const pmids = extractPmids(content); + if (!pmids.length) { + setStatusMsg({ type: "error", text: "No PMIDs found in the selected file." }); + return; + } + const pmidQuery = pmids.map((pmid) => `${pmid}[PMID]`).join(" OR "); + const existing = String(getNested(specForm, ["search", "query"], "") || "").trim(); + const nextQuery = existing + ? `${existing} OR (${pmidQuery})` + : pmidQuery; + updateField(["search", "query"], nextQuery); + setStatusMsg({ type: "ok", text: `Imported ${pmids.length} PMIDs from ${file.name}.` }); + setStudyListModalOpen(false); + setStudyListFileName(""); + } catch (err) { + setStatusMsg({ type: "error", text: `Could not parse file: ${err.message || err}` }); + } finally { + setStudyListBusy(false); + } + }; + reader.onerror = () => { + setStudyListBusy(false); + setStatusMsg({ type: "error", text: "Failed to read file." }); + }; + reader.readAsText(file); + } + + function buildPubMedSearchTerm() { + const query = String(getNested(specForm, ["search", "query"], "") || "").trim(); + if (!query) return ""; + const dateFrom = String(getNested(specForm, ["search", "date_from"], "") || "").trim(); + const dateTo = String(getNested(specForm, ["search", "date_to"], "") || "").trim(); + if (!dateFrom && !dateTo) return query; + + const startDate = dateFrom || "1800/01/01"; + const endDate = dateTo || "3000/12/31"; + return `(${query}) AND ("${startDate}"[Date - Publication] : "${endDate}"[Date - Publication])`; + } + + function openSearchInPubMed() { + const term = buildPubMedSearchTerm(); + if (!term) { + setStatusMsg({ type: "error", text: "Enter a search query before opening PubMed." }); + return; + } + const url = `https://pubmed.ncbi.nlm.nih.gov/?term=${encodeURIComponent(term)}`; + window.open(url, "_blank", "noopener,noreferrer"); + } + + async function fetchPubMedCount() { + const term = buildPubMedSearchTerm(); + if (!term) { + setStatusMsg({ type: "error", text: "Enter a search query before counting PubMed results." }); + return; + } + + try { + setPubmedCountBusy(true); + setPubmedCount(null); + const params = new URLSearchParams({ + db: "pubmed", + term, + retmode: "json", + rettype: "count", + }); + const email = String(getNested(specForm, ["search", "email"], "") || "").trim(); + if (email) { + params.set("email", email); + } + const response = await fetch(`https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?${params.toString()}`); + if (!response.ok) { + throw new Error(`PubMed count request failed (${response.status})`); + } + const payload = await response.json(); + const count = payload?.esearchresult?.count; + if (count == null) { + throw new Error("PubMed response did not include a result count."); + } + setPubmedCount(String(count)); + } catch (err) { + setStatusMsg({ type: "error", text: err.message || String(err) }); + } finally { + setPubmedCountBusy(false); + } + } + + function sanitizeWizardForm(form) { + const cleaned = JSON.parse(JSON.stringify(form || {})); + + const search = (cleaned.search && typeof cleaned.search === "object") ? cleaned.search : {}; + const db = String(search.database || "").trim(); + search.database = db || "pubmed"; + + if (searchAdvancedTouched.maxResults) { + if (search.max_results === "" || search.max_results == null) { + delete search.max_results; + } else { + const parsed = Number(search.max_results); + if (Number.isFinite(parsed) && parsed > 0) { + search.max_results = parsed; + } else { + delete search.max_results; + } + } + } else if (search.max_results === "" || search.max_results == null) { + delete search.max_results; + } + + if (searchAdvancedTouched.email) { + const email = String(search.email || "").trim(); + if (email) { + search.email = email; + } else { + delete search.email; + } + } else if (!String(search.email || "").trim()) { + delete search.email; + } + cleaned.search = search; + + const retrieval = (cleaned.retrieval && typeof cleaned.retrieval === "object") + ? cleaned.retrieval + : {}; + if (!Array.isArray(retrieval.sources)) { + retrieval.sources = ["pubget"]; + } else { + retrieval.sources = retrieval.sources.map((source) => String(source || "").trim()).filter(Boolean); + } + if (!Array.isArray(retrieval.full_text_sources)) { + retrieval.full_text_sources = []; + } + if (typeof retrieval.load_excluded !== "boolean") { + retrieval.load_excluded = false; + } + cleaned.retrieval = retrieval; + + const screening = (cleaned.screening && typeof cleaned.screening === "object") + ? cleaned.screening + : {}; + for (const stage of ["abstract", "fulltext"]) { + const stageData = (screening[stage] && typeof screening[stage] === "object") + ? screening[stage] + : {}; + if (typeof stageData.confidence_reporting !== "boolean") { + stageData.confidence_reporting = true; + } + screening[stage] = stageData; + } + cleaned.screening = screening; + + const parsing = (cleaned.parsing && typeof cleaned.parsing === "object") + ? cleaned.parsing + : {}; + if (!String(parsing.coordinate_model || "").trim()) { + delete parsing.coordinate_model; + } + cleaned.parsing = parsing; + + const annotation = (cleaned.annotation && typeof cleaned.annotation === "object") + ? cleaned.annotation + : {}; + if (parsing.parse_coordinates === false) { + annotation.enabled = false; + } + const metadataFields = Array.isArray(annotation.metadata_fields) + ? annotation.metadata_fields.map((item) => String(item || "").trim()).filter(Boolean) + : []; + annotation.metadata_fields = metadataFields.length + ? metadataFields + : [...DEFAULT_ANNOTATION_METADATA_FIELDS]; + cleaned.annotation = annotation; + + const output = (cleaned.output && typeof cleaned.output === "object") + ? cleaned.output + : {}; + delete output.directory; + const formats = Array.isArray(output.formats) + ? output.formats.map((item) => String(item || "").trim()).filter(Boolean) + : []; + output.formats = formats.length ? formats : ["csv", "json"]; + if (typeof output.nimads !== "boolean") { + output.nimads = true; + } + if (typeof output.export_excluded_studies !== "boolean") { + output.export_excluded_studies = true; + } + cleaned.output = output; + + return cleaned; + } + + async function createProject() { + const name = createName.trim(); + if (!name) { + setStatusMsg({ type: "error", text: "Project name is required." }); + return; + } + try { + setCreateBusy(true); + const project = await api("/api/projects", { + method: "POST", + body: JSON.stringify({ + name, + description: createDescription.trim() || null, + }), + }); + if (globalPreferredModel) { + await api(`/api/projects/${project.id}/spec`, { + method: "PUT", + body: JSON.stringify({ + form: { + defaults: { + model: globalPreferredModel, + }, + }, + }), + }); + } + await refreshProjects(); + enterEditor(project.id, "build"); + setCreateModalOpen(false); + setCreateName(""); + setCreateDescription(""); + setStatusMsg({ type: "ok", text: "Project created." }); + } catch (err) { + setStatusMsg({ type: "error", text: err.message }); + } finally { + setCreateBusy(false); + } + } + + async function importProject() { + const configPath = importConfigPath.trim(); + if (!configPath) return; + try { + setImportBusy(true); + const project = await api("/api/projects", { + method: "POST", + body: JSON.stringify({ + config_path: configPath, + name: importName.trim() || null, + description: importDescription.trim() || null, + }), + }); + await refreshProjects(); + enterEditor(project.id, "build"); + setImportModalOpen(false); + setImportConfigPath(""); + setImportName(""); + setImportDescription(""); + setStatusMsg({ type: "ok", text: "Project imported." }); + } catch (err) { + setStatusMsg({ type: "error", text: err.message }); + } finally { + setImportBusy(false); + } + } + + function openEditProjectModal(project) { + if (!project?.id) return; + setEditProjectId(project.id); + setEditName(project.name || ""); + setEditDescription(project.description || ""); + setEditModalOpen(true); + } + + function openCloneProjectModal(project) { + if (!project?.id) return; + setCloneProjectId(project.id); + setCloneName(`${project.name || "Project"} copy`); + setCloneDescription(project.description || ""); + setCloneMode("schema_only"); + setCloneModalOpen(true); + } + + async function saveProjectDetails() { + const name = editName.trim(); + if (!name) { + setStatusMsg({ type: "error", text: "Project name is required." }); + return; + } + if (!editProjectId) return; + try { + setEditBusy(true); + await api(`/api/projects/${editProjectId}`, { + method: "PUT", + body: JSON.stringify({ + name, + description: editDescription.trim(), + }), + }); + await refreshProjects(); + setEditModalOpen(false); + setStatusMsg({ type: "ok", text: "Project details updated." }); + } catch (err) { + setStatusMsg({ type: "error", text: err.message }); + } finally { + setEditBusy(false); + } + } + + async function cloneProject() { + const name = cloneName.trim(); + if (!name) { + setStatusMsg({ type: "error", text: "Clone name is required." }); + return; + } + if (!cloneProjectId) return; + try { + setCloneBusy(true); + const clonePayload = { + mode: cloneMode, + name, + description: cloneDescription.trim(), + }; + let cloned; + try { + cloned = await api(`/api/projects/${cloneProjectId}/clone`, { + method: "POST", + body: JSON.stringify(clonePayload), + }); + } catch (err) { + if (!String(err?.message || "").toLowerCase().includes("method not allowed")) { + throw err; + } + cloned = await api(`/api/projects/${cloneProjectId}/clone`, { + method: "PUT", + body: JSON.stringify(clonePayload), + }); + } + await refreshProjects(); + setCloneModalOpen(false); + setCloneProjectId(""); + setCloneName(""); + setCloneDescription(""); + setCloneMode("schema_only"); + enterEditor(cloned.id, "build"); + const report = cloned?.clone_report || {}; + if (cloneMode === "schema_and_cached_results") { + setStatusMsg({ + type: "ok", + text: `Project cloned with schema + cached results (${report.cloned_runs_count || 0} runs copied).`, + }); + } else { + setStatusMsg({ type: "ok", text: "Project cloned (schema only)." }); + } + } catch (err) { + setStatusMsg({ type: "error", text: err.message }); + } finally { + setCloneBusy(false); + } + } + + function closeDeleteDialog() { + setDeletePreview(null); + setDeleteMode("metadata_only"); + } + + async function openDeleteDialog(projectId) { + if (!projectId) return; + try { + const preview = await api(`/api/projects/${projectId}/delete-preview`); + setDeletePreview(preview); + setDeleteMode(preview.has_outputs ? "metadata_config_and_outputs" : "metadata_only"); + } catch (err) { + setStatusMsg({ type: "error", text: err.message }); + } + } + + function buildDeleteResultMessage(report) { + const runCount = report?.removed?.run_metadata_files?.length || 0; + const outputsCount = report?.removed?.output_folders?.length || 0; + const configDeleted = Boolean(report?.removed?.config_path); + const skippedConfig = report?.skipped?.config; + const skippedOutputsCount = report?.skipped?.output_folders?.length || 0; + + const parts = [ + "Removed project metadata", + `removed ${runCount} run metadata file${runCount === 1 ? "" : "s"}`, + configDeleted ? "deleted config" : "kept config", + `deleted ${outputsCount} output folder${outputsCount === 1 ? "" : "s"}`, + ]; + if (skippedConfig) { + parts.push(`config skipped (${skippedConfig})`); + } + if (skippedOutputsCount) { + parts.push(`skipped ${skippedOutputsCount} output folder${skippedOutputsCount === 1 ? "" : "s"}`); + } + return parts.join(", ") + "."; + } + + async function executeDeleteProject() { + if (!deletePreview?.project_id) return; + try { + setDeleteBusy(true); + const report = await api(`/api/projects/${deletePreview.project_id}/delete`, { + method: "POST", + body: JSON.stringify({ mode: deleteMode }), + }); + closeDeleteDialog(); + if (selectedProjectId === deletePreview.project_id) { + setSelectedProjectId(null); + setSpecForm({}); + setYamlText(""); + setSpecPath(""); + setView("projects"); + setEditorTab("build"); + setSettingsOpen(false); + } + setSelectedRunId(null); + setSelectedRun(null); + setLogs([]); + logOffsetRef.current = 0; + setLogOffset(0); + await refreshProjects(); + await refreshRuns(); + setStatusMsg({ type: "ok", text: buildDeleteResultMessage(report) }); + } catch (err) { + setStatusMsg({ type: "error", text: err.message }); + } finally { + setDeleteBusy(false); + } + } + + async function saveSpec() { + if (!selectedProjectId) return; + try { + const payload = yamlMode + ? { yaml_text: yamlText } + : { form: sanitizeWizardForm(specForm) }; + const result = await api(`/api/projects/${selectedProjectId}/spec`, { + method: "PUT", + body: JSON.stringify(payload), + }); + setYamlText(result.yaml_text || ""); + setSpecForm(result.form || {}); + setStatusMsg({ type: "ok", text: "Specification saved." }); + } catch (err) { + setStatusMsg({ type: "error", text: err.message }); + } + } + + async function autosaveSpecForNavigation() { + if (!selectedProjectId) return false; + try { + const payload = yamlMode + ? { yaml_text: yamlText } + : { form: sanitizeWizardForm(specForm) }; + const result = await api(`/api/projects/${selectedProjectId}/spec`, { + method: "PUT", + body: JSON.stringify(payload), + }); + setYamlText(result.yaml_text || ""); + setSpecForm(result.form || {}); + return true; + } catch (err) { + setStatusMsg({ type: "error", text: `Autosave failed: ${err.message}` }); + return false; + } + } + + async function navigateBuildStep(stepId) { + if (!stepId || stepId === buildStep) return; + const saved = await autosaveSpecForNavigation(); + if (!saved) return; + setEditorTab("build"); + setBuildStep(stepId); + } + + async function navigateEditorTab(tabName) { + if (tabName === editorTab) return; + const saved = await autosaveSpecForNavigation(); + if (!saved) return; + setEditorTab(tabName); + if (tabName === "runs") { + setRunsSubTab("screening"); + } + } + + async function goBuildStepWithAutosave(direction) { + const nextIndex = Math.min( + Math.max(buildStepIndex + direction, 0), + BUILD_STEPS.length - 1 + ); + await navigateBuildStep(BUILD_STEPS[nextIndex][0]); + } + + async function validateSpec() { + if (!selectedProjectId) return; + try { + if (yamlMode) { + await saveSpec(); + } + const result = await api(`/api/projects/${selectedProjectId}/validate`, { + method: "POST", + }); + if (result.ok) { + setStatusMsg({ type: "ok", text: result.message || "Config valid." }); + } else { + setStatusMsg({ type: "error", text: result.message || "Config invalid." }); + } + } catch (err) { + setStatusMsg({ type: "error", text: err.message }); + } + } + + async function startRun() { + if (!selectedProjectId) return; + try { + const payload = { + ...runForm, + output_folder: runForm.output_folder || null, + copy_valid_cache_from: runForm.copy_valid_cache_from || null, + clear_cache: runForm.clear_cache || [], + }; + const run = await api(`/api/projects/${selectedProjectId}/runs`, { + method: "POST", + body: JSON.stringify(payload), + }); + setSelectedRunId(run.id); + setSelectedRun(run); + setLogs([]); + logOffsetRef.current = 0; + setLogOffset(0); + await refreshRuns(); + setStatusMsg({ + type: "ok", + text: run.branched_from_output_folder + ? "Run started in a new execution folder with valid cache copied forward." + : "Run started.", + }); + } catch (err) { + setStatusMsg({ type: "error", text: err.message }); + } + } + + async function startMetaRun() { + if (!selectedProjectId) return; + if (!selectedMetaSourceRun) { + setStatusMsg({ type: "error", text: "Select a NIMADS source before starting meta-analysis." }); + return; + } + try { + const payload = { + ...metaForm, + output_folder: selectedMetaSourceOutputFolder || metaForm.output_folder, + source_run_id: selectedMetaSourceRun.id, + include_ids: metaForm.include_ids || null, + }; + const run = await api(`/api/projects/${selectedProjectId}/meta-runs`, { + method: "POST", + body: JSON.stringify(payload), + }); + setSelectedRunId(run.id); + setSelectedRun(run); + setLogs([]); + logOffsetRef.current = 0; + setLogOffset(0); + await refreshRuns(); + setStatusMsg({ type: "ok", text: "Meta run started." }); + } catch (err) { + setStatusMsg({ type: "error", text: err.message }); + } + } + + async function cancelRun() { + const runIdToCancel = activeRunForActiveTab?.id || currentExecutionRun?.id || selectedRunId; + if (!runIdToCancel) return; + try { + await api(`/api/runs/${runIdToCancel}/cancel`, { method: "POST" }); + setStatusMsg({ type: "ok", text: "Cancel requested." }); + await refreshRuns(); + } catch (err) { + setStatusMsg({ type: "error", text: err.message }); + } + } + + async function saveSecrets() { + try { + await api("/api/settings/secrets", { + method: "PUT", + body: JSON.stringify(secrets), + }); + await refreshSecrets(); + setStatusMsg({ type: "ok", text: "Secrets saved to ~/.autonima.env." }); + } catch (err) { + setStatusMsg({ type: "error", text: err.message }); + } + } + + async function savePreferences() { + try { + const nextModels = parseLines(preferredModelsText); + const nextDefaultModel = pickPreferredDefaultModel(nextModels, globalPreferredModel); + const payload = { + preferred_models: nextModels, + default_model: nextDefaultModel, + }; + const data = await api("/api/settings/preferences", { + method: "PUT", + body: JSON.stringify(payload), + }); + const models = Array.isArray(data?.preferred_models) ? data.preferred_models : []; + const preferredDefault = typeof data?.default_model === "string" ? data.default_model : ""; + const resolvedDefault = pickPreferredDefaultModel(models, preferredDefault); + setPreferredModels(models); + setPreferredModelsText(stringifyLines(models)); + setGlobalPreferredModel(resolvedDefault); + setYamlModelSelection((prev) => (models.includes(prev) ? prev : (resolvedDefault || ""))); + setStatusMsg({ type: "ok", text: "Preferred models saved." }); + } catch (err) { + setStatusMsg({ type: "error", text: err.message }); + } + } + + return ( +
+ + {preferredModels.map((modelName) => ( + +
+
+ Autonima logo +
+
autonima
+
LLM-assisted neuroimaging review workflows
+
+
+
+ + +
+
+ + {statusMsg ? ( +
{statusMsg.text}
+ ) : null} + + {deletePreview ? ( +
+
e.stopPropagation()}> +

Delete Project

+

+ {deletePreview.project_name} +

+
+
Config Path
+
{deletePreview.config_path}
+
Run Metadata
+
{deletePreview.run_metadata_count}
+
Detected Outputs
+
{deletePreview.output_folders_detected?.length || 0}
+
+ + {deletePreview.has_active_runs ? ( +
+ Project has active runs ({(deletePreview.active_run_ids || []).join(", ")}). Stop runs before deleting. +
+ ) : null} + + {!deletePreview.config_deletable ? ( +
+ Config file is outside workspace boundary and will not be deleted. +
+ ) : null} + + {(deletePreview.output_folders_detected || []).length > 0 ? ( +
+ +
+ {(deletePreview.output_folders_detected || []).map((path) => { + const inWorkspace = (deletePreview.output_folders_deletable || []).includes(path); + return ( +
+ {path} + {!inWorkspace ? " (outside workspace, cannot delete)" : ""} +
+ ); + })} +
+
+ ) : null} + +
+ + +
+ +
+ + +
+
+
+ ) : null} + + {createModalOpen ? ( +
setCreateModalOpen(false)}> +
e.stopPropagation()}> +

Create New Project

+
+ + setCreateName(e.target.value)} + /> +
+
+ +