BartekCupial · e-dobrowolska · May 14, 2025 · May 19, 2025 · May 27, 2025
diff --git a/exp_runner.py b/exp_runner.py
@@ -0,0 +1,179 @@
+import os
+import sys
+import subprocess
+import importlib.util
+from pathlib import Path
+import argparse
+from typing import List, Tuple, Dict
+import json
+
+
+def load_experiment_config(config_file_path: str) -> List[Dict[str, str]]:
+    experiments = []
+
+    with open(config_file_path, 'r') as f:
+        for line_num, line in enumerate(f, 1):
+            line = line.strip()
+            try:
+                exp = json.loads(line)
+                experiments.append(exp)
+            except json.JSONDecodeError as e:
+                raise ValueError(f"Invalid JSON on line {line_num}: {e}")
+    return experiments
+
+
+def create_sbatch_script(
+    command: str,
+    script_path: str,
+    log_path: str,
+    job_name: str,
+    venv_path: str,
+    partition: str,
+    account: str,
+    time: str = "24:00:00",
+    nodes: int = 1,
+    ntasks: int = 1,
+    cpus: int = 8,
+    mem: str = "32G",
+    gpu: int = 1,
+) -> None:
+
+    sbatch_content = f"""#!/bin/bash -l
+#SBATCH --job-name={job_name}
+#SBATCH --output={log_path}
+#SBATCH --error={log_path}
+#SBATCH --time={time}
+#SBATCH --account={account}
+#SBATCH --partition={partition}
+#SBATCH --ntasks={ntasks}
+#SBATCH --nodes={nodes}
+#SBATCH --cpus-per-task={cpus}
+#SBATCH --mem={mem}
+#SBATCH --gres=gpu:{gpu}
+
+ml ML-bundle/24.06a
+
+export WANDB_API_KEY=...
+cd "$(dirname "$0")"
+
+"""
+
+    # Add virtual environment activation if specified
+    if venv_path:
+        sbatch_content += f"source {venv_path}/bin/activate"
+
+    # Add the actual command
+    sbatch_content += f"""
+
+{command}
+
+echo ""
+echo "Job finished at: $(date)"
+"""
+
+    with open(script_path, 'w') as f:
+        f.write(sbatch_content)
+
+    # Make the script executable
+    os.chmod(script_path, 0o755)
+
+
+def submit_job(sbatch_script_path: str, dry_run: bool = False) -> str:
+    if dry_run:
+        return
+
+    try:
+        subprocess.run(
+            ["sbatch", sbatch_script_path],
+            capture_output=True,
+            text=True,
+            check=True
+        )
+        return
+    except subprocess.CalledProcessError as e:
+        print(f"Error submitting job {sbatch_script_path}: {e}")
+        print(f"STDOUT: {e.stdout}")
+        print(f"STDERR: {e.stderr}")
+        return
+
+def main():
+    parser = argparse.ArgumentParser(description="Run experiments with SLURM")
+    parser.add_argument("config_file", help="Path to the experiment configs")
+    parser.add_argument("base_dir", default="...", help="Storage_dir")
+    parser.add_argument("--venv", default=".atari_venv", help="Path to virtual environment to activate")
+    parser.add_argument("--account", default="...", help="Account")
+    parser.add_argument("--partition", default="...", help="Partition")
+    parser.add_argument("--time", default="2880", help="Job time limit")
+    parser.add_argument("--nodes", type=int, default=1, help="Number of nodes (default: 1)")
+    parser.add_argument("--ntasks", type=int, default=1, help="Tasks per node (default: 1)")
+    parser.add_argument("--cpus", type=int, default=8, help="CPUs per task (default: 8)")
+    parser.add_argument("--mem", default="32G", help="Memory per node (default: 32G)")
+    parser.add_argument("--gpu", default="1", help="Number of gpus (default: 1)")
+    parser.add_argument("--dry-run", action="store_true", help="Don't actually submit jobs, just show what would be done")
+
+    args = parser.parse_args()
+
+    # Validate inputs
+    if not os.path.exists(args.config_file):
+        print(f"Error: Config file {args.config_file} does not exist")
+        sys.exit(1)
+
+    # Load experiment configuration
+    print(f"Loading experiment configuration from {args.config_file}...")
+    try:
+        experiments = load_experiment_config(args.config_file)
+    except Exception as e:
+        print(f"Error loading config: {e}")
+        sys.exit(1)
+
+    project_name = experiments[0].get('project_name')
+    unique_name = experiments[0].get('unique_name')
+
+    print(f"Running {len(experiments)} experiments")
+
+    # Create directory structure
+    base_path = Path(args.base_dir)
+    project_path = base_path / project_name
+    unique_path = project_path / unique_name
+
+    unique_path.mkdir(parents=True, exist_ok=True)
+
+    # Generate and submit jobs
+    job_ids = []
+
+    for i, exp in enumerate(experiments):
+        exp_name = f"{exp['name']}_{i}"
+        exp_dir = unique_path / exp_name
+        exp_dir.mkdir(exist_ok=True)
+
+        # Paths for script and log files
+        script_path = exp_dir / f"launch.sbatch"
+        log_path = exp_dir / f"log.out"
+
+        command = exp.get('command')
+
+        # Create SLURM batch script
+        create_sbatch_script(
+            command=command,
+            script_path=str(script_path),
+            log_path=str(log_path),
+            job_name=exp_name,
+            venv_path=args.venv,
+            partition=args.partition,
+            time=args.time,
+            nodes=args.nodes,
+            ntasks=args.ntasks,
+            cpus=args.cpus,
+            mem=args.mem,
+            gpu=args.gpu,
+            account=args.account
+        )
+
+        # Submit job
+        submit_job(str(script_path), dry_run=args.dry_run)
+
+    print(f"Experiment files created in: {unique_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/mrunner_exps/atari_reproduce_paper.py b/mrunner_exps/atari_reproduce_paper.py
@@ -5,7 +5,7 @@
 # params for all exps
 config = {
     "exp_tags": [name],
-    "train_for_env_steps": 200_000_000,
+    "train_for_env_steps": 1_000_000,
     "num_workers": 4,
     "num_envs_per_worker": 8,
     "num_batches_per_epoch": 16,
@@ -17,11 +17,11 @@
     "wandb_project": "atari",
     "wandb_group": "plasticity, reproduce paper -- 3",
     "wandb_tags": [name],
-    "with_wandb": True,
+    "with_wandb": False,
 }
 
 # params different between exps
-atari_games = ["breakout", "montezuma", "phoenix", "namethisgame"]
+atari_games = ["breakout"]
 
 params_grid = []
 
@@ -56,7 +56,7 @@
             "optimizer": ["adam"],
             "num_epochs": [8],
             "normalize_returns": [True],
-            "repeat_action_probability": [0.0, 0.25],
+            "repeat_action_probability": [0.0],
 
             # paper's params: plasticity
             "delta": [0.99],

diff --git a/mrunner_exps/cool_mujoco.py b/mrunner_exps/cool_mujoco.py
@@ -0,0 +1,62 @@
+from mrunner.helpers.specification_helper import create_experiments_helper
+import json
+
+name = globals()["script"][:-3]
+
+# params for all exps
+config = {
+    "train_for_env_steps": 1_000_000,
+    "num_workers": 16,
+    "num_envs_per_worker": 16,
+    "worker_num_splits": 2,
+    "rollout": 32,
+    "batch_size": 1024,  # this equals bs = 128, 128 * 32 = 4096
+    "async_rl": True,
+    "serial_mode": False,
+    "restart_behavior": "overwrite",
+    "device": "cpu",
+    # "with_wandb": True,
+    "wandb_user": "ideas-ncbr",
+    "wandb_project": "mujoco plasticity_ed",
+    "wandb_group": "cool simba",
+
+}
+
+# params different between exps
+params_grid = [
+    {
+        "seed": list(range(3)),
+        "env": ["mujoco_hopper"],
+        "actor_critic_share_weights": [True, False],
+        "model": ["bro"],
+    },
+]
+
+experiments_list = create_experiments_helper(
+    experiment_name=name,
+    project_name="sf2_mujoco",
+    with_neptune=False,
+    script="python3 mrunner_run.py",
+    python_path=".",
+    tags=[name],
+    base_config=config,
+    params_grid=params_grid,
+    mrunner_ignore=".mrunnerignore",
+)
+from mrunner.helpers.client_helper import get_configuration
+
+exps = []
+
+for i, exp in enumerate(experiments_list):
+    curr_config = {"project_name": exp.project, "unique_name": exp.unique_name, "name": exp.name}
+    params = exp.parameters
+    run_script = params.pop("run_script", "sf_examples.mujoco.train_mujoco")
+    key_pairs = [f"--{key}={value}" for key, value in params.items()]
+    cmd = ["python", "-m", run_script] + key_pairs
+    curr_config["command"] = " ".join(cmd)
+    exps.append(curr_config)
+
+
+with open("config.jsonl", "w") as f:
+    for item in exps:
+        f.write(json.dumps(item) + "\n")
diff --git a/mrunner_run.py b/mrunner_run.py
@@ -4,10 +4,12 @@
 
 if __name__ == "__main__":
     cfg = get_configuration(print_diagnostics=True, with_neptune=False)
-
+    # print("Configuration:", cfg)
     del cfg["experiment_id"]
-    run_script = cfg.pop("run_script", "sf_examples.atari.train_atari")
+    # run_script = cfg.pop("run_script", "sf_examples.atari.train_atari")
+    run_script = cfg.pop("run_script", "sf_examples.mujoco.train_mujoco")
 
     key_pairs = [f"--{key}={value}" for key, value in cfg.items()]
     cmd = ["python", "-m", run_script] + key_pairs
-    subprocess.run(cmd)
+    # subprocess.run(cmd)
+    # print("Running command:", " ".join(cmd))
diff --git a/sample_factory/algo/learning/learner.py b/sample_factory/algo/learning/learner.py
@@ -887,18 +887,18 @@ def _record_summaries(self, train_loop_vars) -> AttrDict:
         stats.value_loss = var.value_loss
         stats.exploration_loss = var.exploration_loss
 
-        stats.dead_neurons = var.dead_neurons
-        stats.effective_rank = var.effective_rank
-        stats.l2_init_loss = var.l2_init_loss
+        # stats.dead_neurons = var.dead_neurons
+        # stats.effective_rank = var.effective_rank
+        # stats.l2_init_loss = var.l2_init_loss
         if self.cfg.with_rnd:
             stats.int_rewards = var.int_rewards.mean()
             stats.curiosity_rewards = var.curiosity_rewards.mean()
             stats.predictor_loss = var.predictor_loss
             stats.int_value_loss = var.int_value_loss
 
-        if self.train_step % 200 == 0:
-            stats.per_layer_grad_norms = var.per_layer_grad_norms
-            stats.per_layer_param_norms = var.per_layer_param_norms
+        # if self.train_step % 200 == 0:
+            # stats.per_layer_grad_norms = var.per_layer_grad_norms
+            # stats.per_layer_param_norms = var.per_layer_param_norms
 
         # # Log dead neurons
         # for layer in var['dead_neurons_dict'].keys():
@@ -995,7 +995,6 @@ def _prepare_batch(self, batch: TensorDict) -> Tuple[TensorDict, int, int]:
         with torch.no_grad():
             # create a shallow copy so we can modify the dictionary
             # we still reference the same buffers though
-            print(f"Actions: {batch['actions'].shape}")
             buff = shallow_recursive_copy(batch)
 
             # ignore experience from other agents (i.e. on episode boundary) and from inactive agents
@@ -1165,10 +1164,10 @@ def _prepare_batch(self, batch: TensorDict) -> Tuple[TensorDict, int, int]:
                 # likewise, some invalid values of log_prob_actions can cause NaNs or infs
                 buff["log_prob_actions"][invalid_indices] = -1  # -1 seems like a safe value
 
-            if self.cfg.with_rnd:
-                log.debug(f"[RND] rewards={buff['rewards'].mean()}, curiosity_rewards={buff['curiosity_rewards'].mean()}, int_rewards={buff['int_rewards'].mean()}")
-            else:
-                log.debug(f"[OLD] rewards={buff['rewards'].mean()}")
+            # if self.cfg.with_rnd:
+            #     log.debug(f"[RND] rewards={buff['rewards'].mean()}, curiosity_rewards={buff['curiosity_rewards'].mean()}, int_rewards={buff['int_rewards'].mean()}")
+            # else:
+            #     log.debug(f"[OLD] rewards={buff['rewards'].mean()}")
             return buff, dataset_size, num_invalids
 
     def train(self, batch: TensorDict) -> Optional[Dict]: