Merge pull request #314 from jonbinney/multi-gpu

alejandromarcu · web-flow · commit ea0f2b268834 · 2025-11-08T18:31:39.000-08:00
Support for running in Multi-GPU setups
diff --git a/deep_quoridor/src/agents/alphazero/self_play_manager.py b/deep_quoridor/src/agents/alphazero/self_play_manager.py
@@ -10,7 +10,7 @@
 import torch
 import wandb
 from plugins.wandb_train import WandbParams
-from utils import set_deterministic
+from utils import my_device, set_deterministic
 from utils.timer import Timer
 
 from agents.alphazero.alphazero import AlphaZeroAgent, AlphaZeroParams
@@ -165,11 +165,11 @@ def run_self_play_games(
     set_deterministic(random_seed)
 
     print(
-        f"Worker {worker_id} starting, running {num_games} games ({num_parallel_games} in parallel) with random seed {random_seed}"
+        f"Worker {worker_id} starting ({my_device()}), running {num_games} games ({num_parallel_games} in parallel) with random seed {random_seed}"
     )
 
     wandb_run = None
-    if wandb_params is not None:
+    if wandb_params is not None and wandb_params.log_from_workers:
         run_id = f"{wandb_params.run_id()}-worker-{worker_id}"
         print(f"Wandb group: {wandb_params.run_id()} run: {run_id}")
         wandb_run = wandb.init(
diff --git a/deep_quoridor/src/plugins/wandb_train.py b/deep_quoridor/src/plugins/wandb_train.py
@@ -38,6 +38,9 @@ class WandbParams(SubargsBase):
     # How often to log training metrics
     log_every: int = 10
 
+    # Wether workers will also log to wandb (in separate runs)
+    log_from_workers: bool = True
+
     def run_id(self):
         return f"{self.prefix}-{self.suffix}"
 
@@ -124,7 +127,7 @@ def upload_model(self, model_file: str, extra_files: list[str] = []) -> str:
 
         artifact.save()
         logged_artifact = wandb.log_artifact(artifact)
-        logged_artifact.wait(60)
+        logged_artifact.wait(300)
         logged_artifact.aliases.extend([f"ep_{self.episode_count}-{self.run.id}"])
         logged_artifact.save()
 
diff --git a/deep_quoridor/src/train_alphazero.py b/deep_quoridor/src/train_alphazero.py
@@ -77,9 +77,9 @@ def train_alphazero(
         # because it calls the plugin's internal _intialize method which sets up metrics.
         wandb_train_plugin.start_game(game=args, agent1=training_agent, agent2=training_agent)
         training_agent.set_wandb_run(wandb_train_plugin.run)
-
         # Compute the tournament metrics with the initial model, possibly random initialized, to
         # be able to see how it evolves from there
+        wandb_train_plugin.episode_count = initial_epoch * args.games_per_epoch
         wandb_train_plugin.compute_tournament_metrics(str(current_filename))
 
     last_epoch = initial_epoch + args.epochs
@@ -122,16 +122,16 @@ def train_alphazero(
         current_filename = training_agent.save_model_with_suffix(f"_epoch_{epoch}")
         if wandb_train_plugin is not None:
             wandb_train_plugin.episode_count = game_num
-            # Compute the metrics periodically and in the last epoch
-            if (epoch + 1) % args.benchmarks_every == 0 or epoch == last_epoch - 1:
-                wandb_train_plugin.compute_tournament_metrics(str(current_filename))
-
             # Upload the model and training state
             with tempfile.TemporaryDirectory() as tmpdir:
                 training_state_filename = os.path.join(tmpdir, "training_state.gz")
                 save_training_state(training_state_filename, training_agent, wandb_train_plugin, epoch + 1, game_num)
                 wandb_train_plugin.upload_model(str(current_filename), [training_state_filename])
 
+            # Compute the metrics periodically and in the last epoch
+            if (epoch + 1) % args.benchmarks_every == 0 or epoch == last_epoch - 1:
+                wandb_train_plugin.compute_tournament_metrics(str(current_filename))
+
     Timer.log_totals()
 
     # Close the arena to finish wandb run
diff --git a/deep_quoridor/src/utils/misc.py b/deep_quoridor/src/utils/misc.py
@@ -1,5 +1,7 @@
 import argparse
+import os
 import random
+from functools import cache
 from glob import glob
 from pathlib import Path
 from typing import Optional
@@ -40,8 +42,14 @@ def get_initial_random_seed():
     return initial_random_seed
 
 
+@cache
 def my_device():
     if torch.cuda.is_available():
+        dc = torch.cuda.device_count()
+        if dc > 1:
+            gpu_n = os.getpid() % dc
+            return torch.device(f"cuda:{gpu_n}")
+
         return torch.device("cuda")
 
     if torch.backends.mps.is_available():