Clinical-Genomics
diff --git a/‎BALSAMIC/assets/scripts/immediate_submit.py‎
Lines changed: 0 additions & 78 deletions b/‎BALSAMIC/assets/scripts/immediate_submit.py‎
Lines changed: 0 additions & 78 deletions
diff --git a/‎BALSAMIC/assets/scripts/scan_finished_jobid_status.py‎
Lines changed: 171 additions & 0 deletions b/‎BALSAMIC/assets/scripts/scan_finished_jobid_status.py‎
Lines changed: 171 additions & 0 deletions
diff --git a/‎BALSAMIC/commands/config/case.py‎
Lines changed: 31 additions & 3 deletions b/‎BALSAMIC/commands/config/case.py‎
Lines changed: 31 additions & 3 deletions
diff --git a/‎BALSAMIC/commands/config/pon.py‎
Lines changed: 6 additions & 1 deletion b/‎BALSAMIC/commands/config/pon.py‎
Lines changed: 6 additions & 1 deletion
@@ -0,0 +1,171 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import logging
+import re
+import subprocess
+from pathlib import Path
+from typing import Dict, List, Tuple, Optional
+from datetime import datetime
+import click
+
+
+LOG = logging.getLogger(__name__)
+
+
+def find_job_logs(log_root: Path) -> Dict[str, Path]:
+    """
+    Recursively find *.log files whose basename is a numeric jobid.
+    Returns {jobid -> log_path}.
+    """
+    job_logs: Dict[str, Path] = {}
+    for p in log_root.rglob("*.log"):
+        if p.stem.isdigit():  # e.g. "9727982.log" -> "9727982"
+            job_logs[p.stem] = p
+        else:
+            LOG.debug(f"Skipping non-job log file: {p}")
+    LOG.info(f"Discovered {len(job_logs)} job logs under {log_root}")
+    return job_logs
+
+
+def get_job_state(jobid: str) -> Optional[str]:
+    """
+    Return raw output of `scontrol show job JOBID`, or None if the query fails.
+    """
+    try:
+        LOG.debug(f"Running show job scontrol {jobid}")
+        result = subprocess.run(
+            ["scontrol", "show", "job", jobid],
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+        return result.stdout
+    except FileNotFoundError:
+        LOG.error("scontrol executable not found: scontrol")
+        return None
+    except subprocess.CalledProcessError as e:
+        LOG.warning(f"Could not check job {jobid} (may not exist). rc={e.returncode}")
+        LOG.debug(f"scontrol stderr for {jobid} {e.stderr}")
+        return None
+
+
+def parse_state(scontrol_output: str) -> Optional[str]:
+    """
+    Extract JobState from scontrol text, e.g. 'JobState=FAILED'.
+    Returns the state string (e.g. 'FAILED') or None if not found.
+    """
+    m = re.search(r"JobState=(\S+)", scontrol_output)
+    state = m.group(1) if m else None
+    if state is None:
+        LOG.debug("JobState not found in scontrol output")
+    return state
+
+
+def write_results(
+    output_file: Path,
+    failed: List[Tuple[str, Path]],
+    cancelled: List[Tuple[str, Path]],
+    unknown: List[str],
+) -> None:
+    """
+    Append job results to output_file.
+    Each run is prefixed with a timestamp header.
+    """
+    output_file.parent.mkdir(parents=True, exist_ok=True)
+
+    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+    with output_file.open("a") as out_f:
+        out_f.write(f"=== Job status check at {timestamp} ===\n")
+
+        if failed:
+            out_f.write("Failed jobs:\n")
+            for jobid, log_path in failed:
+                out_f.write(f"{jobid}\t{log_path}\n")
+            out_f.write("\n")
+
+        if cancelled:
+            out_f.write("Cancelled jobs:\n")
+            for jobid, log_path in cancelled:
+                out_f.write(f"{jobid}\t{log_path}\n")
+            out_f.write("\n")
+
+        if unknown:
+            out_f.write("Unknown status jobs:\n")
+            for jobid in unknown:
+                out_f.write(f"{jobid}\tNA\n")
+            out_f.write("\n")
+
+        if not failed and not cancelled:
+            out_f.write("SUCCESSFUL\n\n")
+
+    LOG.info(
+        f"Appended results to {output_file} (failed={len(failed)}, cancelled={len(cancelled)} unknown={len(unknown)})"
+    )
+
+
+@click.command()
+@click.argument(
+    "log_dir", type=click.Path(exists=True, file_okay=False, path_type=Path)
+)
+@click.option(
+    "--output",
+    "-o",
+    required=True,
+    type=click.Path(writable=True, path_type=Path),
+    help="Path to output file for results (FAILED/CANCELLED or SUCCESS).",
+)
+@click.option(
+    "--log-level",
+    default="INFO",
+    show_default=True,
+    type=click.Choice(
+        ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"], case_sensitive=False
+    ),
+    help="Logging verbosity.",
+)
+def check_failed_jobs(log_dir: Path, output: Path, log_level: str) -> None:
+    """
+    Recursively scan LOG_DIR for SLURM *.log files (stdout+stderr combined),
+    extract job IDs from filenames, and check their states via `scontrol show job JOBID`.
+    """
+    logging.basicConfig(
+        level=getattr(logging, log_level.upper(), logging.INFO),
+        format="%(asctime)s %(levelname)s %(name)s: %(message)s",
+    )
+
+    LOG.info("Scanning logs under: %s", log_dir)
+    job_logs = find_job_logs(log_dir)
+
+    failed: List[Tuple[str, Path]] = []
+    cancelled: List[Tuple[str, Path]] = []
+    unknown: List[str] = []
+
+    if not job_logs:
+        LOG.warning("No job logs found (no files matching '*.log')")
+        return
+
+    for jobid in sorted(job_logs.keys(), key=int):
+        out_text = get_job_state(jobid)
+        if not out_text:
+            # Can't classify without job info; skip but note it.
+            LOG.warning(
+                f"Missing scontrol output for job {jobid} -- setting status UNKNOWN"
+            )
+            unknown.append(jobid)
+            continue
+
+        state = parse_state(out_text)
+        if state == "FAILED":
+            failed.append((jobid, job_logs[jobid]))
+        elif state == "CANCELLED":
+            cancelled.append((jobid, job_logs[jobid]))
+        else:
+            LOG.debug(f"Job {jobid} state is {state}")
+
+    write_results(output, failed, cancelled, unknown)
+
+
+if __name__ == "__main__":
+    check_failed_jobs()
@@ -40,7 +40,12 @@
     OPTION_TUMOR_SAMPLE_NAME,
     OPTION_UMI_MIN_READS,
 )
-from BALSAMIC.constants.analysis import BIOINFO_TOOL_ENV, AnalysisWorkflow, Gender
+from BALSAMIC.constants.analysis import (
+    BIOINFO_TOOL_ENV,
+    AnalysisWorkflow,
+    Gender,
+    LogFile,
+)
 from BALSAMIC.constants.cache import GenomeVersion
 from BALSAMIC.constants.constants import FileType
 from BALSAMIC.constants.paths import (
@@ -57,9 +62,11 @@
     get_panel_chrom,
     get_sample_list,
     get_gens_references,
+    get_snakefile,
 )
 from BALSAMIC.utils.io import read_json, write_json
 from BALSAMIC.utils.utils import get_absolute_paths_dict
+from BALSAMIC.utils.logging import add_file_logging
 
 LOG = logging.getLogger(__name__)
 
@@ -129,6 +136,19 @@ def case_config(
     tumor_sample_name: str,
     umi_min_reads: str | None,
 ):
+    """Configure BALSAMIC workflow based on input arguments."""
+
+    LOG.info(f"Starting configuring analysis case: {case_id}.")
+
+    LOG.info(f"Creating case analysis directory: {analysis_dir}/{case_id}.")
+    Path(analysis_dir, case_id).mkdir(exist_ok=True)
+
+    log_file = Path(analysis_dir, case_id, LogFile.LOGNAME).as_posix()
+    LOG.info(f"Setting BALSAMIC logfile path to: {log_file}.")
+    add_file_logging(log_file, logger_name=__name__)
+
+    LOG.info(f"Running BALSAMIC version {balsamic_version} -- CONFIG CASE")
+
     references_path: Path = Path(balsamic_cache, cache_version, genome_version)
     references: Dict[str, Path] = get_absolute_paths_dict(
         base_path=references_path,
@@ -154,7 +174,6 @@ def case_config(
                     if path is not None
                 }
             )
-
     variants_observations = {
         "artefact_snv_observations": artefact_snv_observations,
         "clinical_snv_observations": clinical_snv_observations,
@@ -176,6 +195,8 @@ def case_config(
     analysis_fastq_dir: str = get_analysis_fastq_files_directory(
         case_dir=Path(analysis_dir, case_id).as_posix(), fastq_path=fastq_path
     )
+    LOG.info(f"Prepared analysis fastq-dir: {analysis_fastq_dir}")
+
     result_dir: Path = Path(analysis_dir, case_id, "analysis")
     log_dir: Path = Path(analysis_dir, case_id, "logs")
     script_dir: Path = Path(analysis_dir, case_id, "scripts")
@@ -186,6 +207,8 @@ def case_config(
     for directory in [result_dir, log_dir, script_dir, benchmark_dir]:
         directory.mkdir(exist_ok=True)
 
+    LOG.info("Created analysis and log directories.")
+    LOG.info("Validating configuration data in pydantic model.")
     config_collection_dict = ConfigModel(
         sentieon={
             "sentieon_install_dir": sentieon_install_dir,
@@ -244,5 +267,10 @@ def case_config(
     write_json(json_obj=config_collection_dict, path=config_path)
     LOG.info(f"Config file saved successfully - {config_path}")
 
-    generate_graph(config_collection_dict, config_path)
+    snakefile = get_snakefile(
+        analysis_type=config_collection_dict["analysis"]["analysis_type"],
+        analysis_workflow=config_collection_dict["analysis"]["analysis_workflow"],
+    )
+
+    generate_graph(config_collection_dict, config_path, snakefile)
     LOG.info(f"BALSAMIC Workflow has been configured successfully!")
@@ -35,6 +35,7 @@
     get_analysis_fastq_files_directory,
     get_bioinfo_tools_version,
     get_pon_sample_list,
+    get_snakefile,
 )
 from BALSAMIC.utils.io import read_json, write_json
 from BALSAMIC.utils.utils import get_absolute_paths_dict
@@ -144,5 +145,9 @@ def pon_config(
     write_json(json_obj=config_collection_dict, path=config_path)
     LOG.info(f"PON config file saved successfully - {config_path}")
 
-    generate_graph(config_collection_dict, config_path)
+    snakefile = get_snakefile(
+        analysis_type=config_collection_dict["analysis"]["analysis_type"],
+        analysis_workflow=config_collection_dict["analysis"]["analysis_workflow"],
+    )
+    generate_graph(config_collection_dict, config_path, snakefile)
     LOG.info(f"BALSAMIC PON workflow has been configured successfully!")