-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_sweep.py
More file actions
107 lines (84 loc) · 3.14 KB
/
run_sweep.py
File metadata and controls
107 lines (84 loc) · 3.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env python3
"""
Script to initialize and run wandb sweep for DDCL hyperparameter tuning.
This script supports both single-agent and multi-agent parallel sweep execution.
Usage Examples:
1. Create sweep and run single agent:
python run_sweep.py
2. Create sweep only (for multi-agent setup):
python run_sweep.py --create-only
# Then in separate terminals/tmux/screen:
wandb agent <sweep_id> # Agent 1
wandb agent <sweep_id> # Agent 2
wandb agent <sweep_id> # Agent 3
3. Run specific number of jobs per agent:
python run_sweep.py --count 3
4. Use existing sweep:
python run_sweep.py --sweep-id <sweep_id>
Multi-Agent Parallel Execution:
For fastest results, run multiple agents in parallel. Each agent will
pull different hyperparameter combinations from the sweep queue.
With tmux:
tmux new-session -d -s sweep1 'wandb agent <sweep_id>'
tmux new-session -d -s sweep2 'wandb agent <sweep_id>'
tmux new-session -d -s sweep3 'wandb agent <sweep_id>'
With GNU parallel:
parallel -j 3 wandb agent ::: <sweep_id> <sweep_id> <sweep_id>
"""
import wandb
import yaml
def main():
import argparse
import sys
parser = argparse.ArgumentParser(
description="Initialize and run wandb sweep for DDCL hyperparameter tuning"
)
parser.add_argument(
"--create-only",
action="store_true",
help="Only create the sweep and print the ID, don't run agents"
)
parser.add_argument(
"--count",
type=int,
default=None,
help="Number of runs for this agent (default: run all sweep jobs)"
)
parser.add_argument(
"--sweep-id",
type=str,
help="Use existing sweep ID instead of creating a new one"
)
args = parser.parse_args()
# Create or use existing sweep
if args.sweep_id:
sweep_id = args.sweep_id
sweep_path = args.sweep_id
print(f"Using existing sweep: {sweep_id}")
else:
# Load sweep configuration
with open("sweep_config.yaml", "r") as f:
sweep_config = yaml.safe_load(f)
# Initialize sweep
sweep_id = wandb.sweep(sweep_config, project="ddcl-vae")
# Get entity (username) for full sweep path
entity = wandb.Api().default_entity
sweep_path = f"{entity}/ddcl-vae/{sweep_id}"
print(f"Created new sweep with ID: {sweep_id}")
print(f"Full sweep path: {sweep_path}")
print(f"To run agents manually (supports multiple parallel agents):")
print(f" wandb agent {sweep_path}")
print(f"\nFor parallel execution, run the above command in multiple terminals/tmux/screen sessions")
if args.create_only:
print("\n--create-only flag set. Exiting without running agent.")
sys.exit(0)
# Run the sweep agent
print(f"Starting sweep agent...")
if args.count:
print(f" Running {args.count} sweep jobs")
wandb.agent(sweep_path)
else:
print(f" Running all sweep jobs (30 total for current grid)")
wandb.agent(sweep_path)
if __name__ == "__main__":
main()