Skip to content

Commit dd5a6ab

Browse files
committed
better logging on resume
1 parent 829c6e5 commit dd5a6ab

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

gluefactory/trainer.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,9 @@ def load_checkpoint(
318318
for metric in ["tot_it", "tot_n_samples"]:
319319
if metric in checkpoint:
320320
setattr(self, metric, checkpoint[metric])
321+
self.info(
322+
f"Loaded {metric}={getattr(self, metric)} ({checkpoint[metric]})"
323+
)
321324
self.info(f"Training state loaded. Resuming at epoch {self.epoch}.")
322325

323326
def maybe_load_checkpoint(self):
@@ -982,6 +985,9 @@ def launch_training(output_dir: Path, conf: DictConfig, device: torch.device):
982985
if conf.get("benchmarks") is None
983986
else conf.benchmarks.get(bench_name, {})
984987
)
988+
bench_conf = OmegaConf.merge(
989+
{"eval": conf.get("eval", {})}, OmegaConf.create(bench_conf)
990+
)
985991
trainer.register_benchmark(bench_name, bench_conf, every_epoch=every_epoch)
986992
# Maybe load experiment
987993
trainer.maybe_load_checkpoint()

0 commit comments

Comments
 (0)