Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 179 additions & 0 deletions exp_runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
import os
import sys
import subprocess
import importlib.util
from pathlib import Path
import argparse
from typing import List, Tuple, Dict
import json


def load_experiment_config(config_file_path: str) -> List[Dict[str, str]]:
experiments = []

with open(config_file_path, 'r') as f:
for line_num, line in enumerate(f, 1):
line = line.strip()
try:
exp = json.loads(line)
experiments.append(exp)
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON on line {line_num}: {e}")
return experiments


def create_sbatch_script(
command: str,
script_path: str,
log_path: str,
job_name: str,
venv_path: str,
partition: str,
account: str,
time: str = "24:00:00",
nodes: int = 1,
ntasks: int = 1,
cpus: int = 8,
mem: str = "32G",
gpu: int = 1,
) -> None:

sbatch_content = f"""#!/bin/bash -l
#SBATCH --job-name={job_name}
#SBATCH --output={log_path}
#SBATCH --error={log_path}
#SBATCH --time={time}
#SBATCH --account={account}
#SBATCH --partition={partition}
#SBATCH --ntasks={ntasks}
#SBATCH --nodes={nodes}
#SBATCH --cpus-per-task={cpus}
#SBATCH --mem={mem}
#SBATCH --gres=gpu:{gpu}

ml ML-bundle/24.06a

export WANDB_API_KEY=...
cd "$(dirname "$0")"

"""

# Add virtual environment activation if specified
if venv_path:
sbatch_content += f"source {venv_path}/bin/activate"

# Add the actual command
sbatch_content += f"""

{command}

echo ""
echo "Job finished at: $(date)"
"""

with open(script_path, 'w') as f:
f.write(sbatch_content)

# Make the script executable
os.chmod(script_path, 0o755)


def submit_job(sbatch_script_path: str, dry_run: bool = False) -> str:
if dry_run:
return

try:
subprocess.run(
["sbatch", sbatch_script_path],
capture_output=True,
text=True,
check=True
)
return
except subprocess.CalledProcessError as e:
print(f"Error submitting job {sbatch_script_path}: {e}")
print(f"STDOUT: {e.stdout}")
print(f"STDERR: {e.stderr}")
return

def main():
parser = argparse.ArgumentParser(description="Run experiments with SLURM")
parser.add_argument("config_file", help="Path to the experiment configs")
parser.add_argument("base_dir", default="...", help="Storage_dir")
parser.add_argument("--venv", default=".atari_venv", help="Path to virtual environment to activate")
parser.add_argument("--account", default="...", help="Account")
parser.add_argument("--partition", default="...", help="Partition")
parser.add_argument("--time", default="2880", help="Job time limit")
parser.add_argument("--nodes", type=int, default=1, help="Number of nodes (default: 1)")
parser.add_argument("--ntasks", type=int, default=1, help="Tasks per node (default: 1)")
parser.add_argument("--cpus", type=int, default=8, help="CPUs per task (default: 8)")
parser.add_argument("--mem", default="32G", help="Memory per node (default: 32G)")
parser.add_argument("--gpu", default="1", help="Number of gpus (default: 1)")
parser.add_argument("--dry-run", action="store_true", help="Don't actually submit jobs, just show what would be done")

args = parser.parse_args()

# Validate inputs
if not os.path.exists(args.config_file):
print(f"Error: Config file {args.config_file} does not exist")
sys.exit(1)

# Load experiment configuration
print(f"Loading experiment configuration from {args.config_file}...")
try:
experiments = load_experiment_config(args.config_file)
except Exception as e:
print(f"Error loading config: {e}")
sys.exit(1)

project_name = experiments[0].get('project_name')
unique_name = experiments[0].get('unique_name')

print(f"Running {len(experiments)} experiments")

# Create directory structure
base_path = Path(args.base_dir)
project_path = base_path / project_name
unique_path = project_path / unique_name

unique_path.mkdir(parents=True, exist_ok=True)

# Generate and submit jobs
job_ids = []

for i, exp in enumerate(experiments):
exp_name = f"{exp['name']}_{i}"
exp_dir = unique_path / exp_name
exp_dir.mkdir(exist_ok=True)

# Paths for script and log files
script_path = exp_dir / f"launch.sbatch"
log_path = exp_dir / f"log.out"

command = exp.get('command')

# Create SLURM batch script
create_sbatch_script(
command=command,
script_path=str(script_path),
log_path=str(log_path),
job_name=exp_name,
venv_path=args.venv,
partition=args.partition,
time=args.time,
nodes=args.nodes,
ntasks=args.ntasks,
cpus=args.cpus,
mem=args.mem,
gpu=args.gpu,
account=args.account
)

# Submit job
submit_job(str(script_path), dry_run=args.dry_run)

print(f"Experiment files created in: {unique_path}")


if __name__ == "__main__":
main()
8 changes: 4 additions & 4 deletions mrunner_exps/atari_reproduce_paper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# params for all exps
config = {
"exp_tags": [name],
"train_for_env_steps": 200_000_000,
"train_for_env_steps": 1_000_000,
"num_workers": 4,
"num_envs_per_worker": 8,
"num_batches_per_epoch": 16,
Expand All @@ -17,11 +17,11 @@
"wandb_project": "atari",
"wandb_group": "plasticity, reproduce paper -- 3",
"wandb_tags": [name],
"with_wandb": True,
"with_wandb": False,
}

# params different between exps
atari_games = ["breakout", "montezuma", "phoenix", "namethisgame"]
atari_games = ["breakout"]

params_grid = []

Expand Down Expand Up @@ -56,7 +56,7 @@
"optimizer": ["adam"],
"num_epochs": [8],
"normalize_returns": [True],
"repeat_action_probability": [0.0, 0.25],
"repeat_action_probability": [0.0],

# paper's params: plasticity
"delta": [0.99],
Expand Down
62 changes: 62 additions & 0 deletions mrunner_exps/cool_mujoco.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from mrunner.helpers.specification_helper import create_experiments_helper
import json

name = globals()["script"][:-3]

# params for all exps
config = {
"train_for_env_steps": 1_000_000,
"num_workers": 16,
"num_envs_per_worker": 16,
"worker_num_splits": 2,
"rollout": 32,
"batch_size": 1024, # this equals bs = 128, 128 * 32 = 4096
"async_rl": True,
"serial_mode": False,
"restart_behavior": "overwrite",
"device": "cpu",
# "with_wandb": True,
"wandb_user": "ideas-ncbr",
"wandb_project": "mujoco plasticity_ed",
"wandb_group": "cool simba",

}

# params different between exps
params_grid = [
{
"seed": list(range(3)),
"env": ["mujoco_hopper"],
"actor_critic_share_weights": [True, False],
"model": ["bro"],
},
]

experiments_list = create_experiments_helper(
experiment_name=name,
project_name="sf2_mujoco",
with_neptune=False,
script="python3 mrunner_run.py",
python_path=".",
tags=[name],
base_config=config,
params_grid=params_grid,
mrunner_ignore=".mrunnerignore",
)
from mrunner.helpers.client_helper import get_configuration

exps = []

for i, exp in enumerate(experiments_list):
curr_config = {"project_name": exp.project, "unique_name": exp.unique_name, "name": exp.name}
params = exp.parameters
run_script = params.pop("run_script", "sf_examples.mujoco.train_mujoco")
key_pairs = [f"--{key}={value}" for key, value in params.items()]
cmd = ["python", "-m", run_script] + key_pairs
curr_config["command"] = " ".join(cmd)
exps.append(curr_config)


with open("config.jsonl", "w") as f:
for item in exps:
f.write(json.dumps(item) + "\n")
8 changes: 5 additions & 3 deletions mrunner_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@

if __name__ == "__main__":
cfg = get_configuration(print_diagnostics=True, with_neptune=False)

# print("Configuration:", cfg)
del cfg["experiment_id"]
run_script = cfg.pop("run_script", "sf_examples.atari.train_atari")
# run_script = cfg.pop("run_script", "sf_examples.atari.train_atari")
run_script = cfg.pop("run_script", "sf_examples.mujoco.train_mujoco")

key_pairs = [f"--{key}={value}" for key, value in cfg.items()]
cmd = ["python", "-m", run_script] + key_pairs
subprocess.run(cmd)
# subprocess.run(cmd)
# print("Running command:", " ".join(cmd))
21 changes: 10 additions & 11 deletions sample_factory/algo/learning/learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -887,18 +887,18 @@ def _record_summaries(self, train_loop_vars) -> AttrDict:
stats.value_loss = var.value_loss
stats.exploration_loss = var.exploration_loss

stats.dead_neurons = var.dead_neurons
stats.effective_rank = var.effective_rank
stats.l2_init_loss = var.l2_init_loss
# stats.dead_neurons = var.dead_neurons
# stats.effective_rank = var.effective_rank
# stats.l2_init_loss = var.l2_init_loss
if self.cfg.with_rnd:
stats.int_rewards = var.int_rewards.mean()
stats.curiosity_rewards = var.curiosity_rewards.mean()
stats.predictor_loss = var.predictor_loss
stats.int_value_loss = var.int_value_loss

if self.train_step % 200 == 0:
stats.per_layer_grad_norms = var.per_layer_grad_norms
stats.per_layer_param_norms = var.per_layer_param_norms
# if self.train_step % 200 == 0:
# stats.per_layer_grad_norms = var.per_layer_grad_norms
# stats.per_layer_param_norms = var.per_layer_param_norms

# # Log dead neurons
# for layer in var['dead_neurons_dict'].keys():
Expand Down Expand Up @@ -995,7 +995,6 @@ def _prepare_batch(self, batch: TensorDict) -> Tuple[TensorDict, int, int]:
with torch.no_grad():
# create a shallow copy so we can modify the dictionary
# we still reference the same buffers though
print(f"Actions: {batch['actions'].shape}")
buff = shallow_recursive_copy(batch)

# ignore experience from other agents (i.e. on episode boundary) and from inactive agents
Expand Down Expand Up @@ -1165,10 +1164,10 @@ def _prepare_batch(self, batch: TensorDict) -> Tuple[TensorDict, int, int]:
# likewise, some invalid values of log_prob_actions can cause NaNs or infs
buff["log_prob_actions"][invalid_indices] = -1 # -1 seems like a safe value

if self.cfg.with_rnd:
log.debug(f"[RND] rewards={buff['rewards'].mean()}, curiosity_rewards={buff['curiosity_rewards'].mean()}, int_rewards={buff['int_rewards'].mean()}")
else:
log.debug(f"[OLD] rewards={buff['rewards'].mean()}")
# if self.cfg.with_rnd:
# log.debug(f"[RND] rewards={buff['rewards'].mean()}, curiosity_rewards={buff['curiosity_rewards'].mean()}, int_rewards={buff['int_rewards'].mean()}")
# else:
# log.debug(f"[OLD] rewards={buff['rewards'].mean()}")
return buff, dataset_size, num_invalids

def train(self, batch: TensorDict) -> Optional[Dict]:
Expand Down
Loading