1+ import time
2+
13import anndata as ad
24import mudata as mu
35import pandas as pd
2022from gretapy .tl ._predictive import _gset , _omics
2123from gretapy .tl ._prior import _grn , _tfm , _tfp
2224
25+ _SEP = "\u2550 " * 50
26+
2327
2428def _format_log_prefix (grn_name : str | None = None , dataset_name : str | None = None ) -> str :
2529 """Build the optional bracket prefix for log messages."""
@@ -33,6 +37,16 @@ def _format_log_prefix(grn_name: str | None = None, dataset_name: str | None = N
3337 return ""
3438
3539
40+ def _format_label (grn_name : str | None = None , dataset_name : str | None = None ) -> str :
41+ """Build a label string from available names."""
42+ parts = []
43+ if grn_name is not None :
44+ parts .append (grn_name )
45+ if dataset_name is not None :
46+ parts .append (dataset_name )
47+ return " | " .join (parts ) if parts else ""
48+
49+
3650def benchmark (
3751 organism : str ,
3852 grns : dict | pd .DataFrame ,
@@ -112,6 +126,12 @@ def benchmark(
112126 # Validate metrics
113127 _check_metrics (organism = organism , metrics = metrics )
114128 # Run benchmark
129+ n_grns = len (grns_dict )
130+ n_datasets = len (datasets_list )
131+ _log (_SEP , level = "info" , verbose = verbose )
132+ _log (f"Starting benchmark: { n_grns } GRN(s) x { n_datasets } dataset(s)" , level = "info" , verbose = verbose )
133+ _log (_SEP , level = "info" , verbose = verbose )
134+ t_start_bench = time .time ()
115135 all_results = []
116136 for grn_name , grn_df in grns_dict .items ():
117137 for dataset_name in datasets_list :
@@ -138,6 +158,10 @@ def benchmark(
138158 result .insert (0 , "grn" , grn_name if grn_name is not None else "grn" )
139159 result .insert (1 , "dataset" , dataset_name )
140160 all_results .append (result )
161+ elapsed = time .time () - t_start_bench
162+ _log (_SEP , level = "info" , verbose = verbose )
163+ _log (f"Benchmark complete ({ len (all_results )} result(s), { elapsed :.1f} s)" , level = "info" , verbose = verbose )
164+ _log (_SEP , level = "info" , verbose = verbose )
141165 if not all_results :
142166 return pd .DataFrame (columns = ["grn" , "dataset" , "category" , "metric" , "db" , "precision" , "recall" , "f01" ])
143167 return pd .concat (all_results , ignore_index = True )
@@ -241,6 +265,12 @@ def eval_grn_dataset(
241265 genes , peaks , adata = dataset .var_names .tolist (), [], dataset
242266 # Build log prefix
243267 prefix = _format_log_prefix (grn_name = grn_name , dataset_name = dataset_name )
268+ label = _format_label (grn_name = grn_name , dataset_name = dataset_name )
269+ label_suffix = f": { label } " if label else ""
270+ _log (_SEP , level = "info" , verbose = verbose )
271+ _log (f"Starting evaluation{ label_suffix } " , level = "info" , verbose = verbose )
272+ _log (_SEP , level = "info" , verbose = verbose )
273+ t_start_eval = time .time ()
244274 # Evaluate metrics
245275 results = []
246276 n_metrics = len (metrics_list )
@@ -267,6 +297,10 @@ def eval_grn_dataset(
267297 result = _run_metric (metric_type , db_name , grn , db , genes , peaks , cats , adata , verbose = verbose )
268298 if result is not None :
269299 results .append ([category , metric_type , db_name , * result ])
300+ elapsed = time .time () - t_start_eval
301+ _log (_SEP , level = "info" , verbose = verbose )
302+ _log (f"Evaluation complete{ label_suffix } ({ len (results )} metrics, { elapsed :.1f} s)" , level = "info" , verbose = verbose )
303+ _log (_SEP , level = "info" , verbose = verbose )
270304 return pd .DataFrame (results , columns = result_cols )
271305
272306
0 commit comments