ddcl-applications/run_sweep.py at main · yashrb24/ddcl-applications · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env python3
"""
Script to initialize and run wandb sweep for DDCL hyperparameter tuning.

This script supports both single-agent and multi-agent parallel sweep execution.

Usage Examples:

1. Create sweep and run single agent:
    python run_sweep.py

2. Create sweep only (for multi-agent setup):
    python run_sweep.py --create-only
    # Then in separate terminals/tmux/screen:
    wandb agent <sweep_id>  # Agent 1
    wandb agent <sweep_id>  # Agent 2
    wandb agent <sweep_id>  # Agent 3

3. Run specific number of jobs per agent:
    python run_sweep.py --count 3

4. Use existing sweep:
    python run_sweep.py --sweep-id <sweep_id>

Multi-Agent Parallel Execution:
    For fastest results, run multiple agents in parallel. Each agent will
    pull different hyperparameter combinations from the sweep queue.

    With tmux:
        tmux new-session -d -s sweep1 'wandb agent <sweep_id>'
        tmux new-session -d -s sweep2 'wandb agent <sweep_id>'
        tmux new-session -d -s sweep3 'wandb agent <sweep_id>'

    With GNU parallel:
        parallel -j 3 wandb agent ::: <sweep_id> <sweep_id> <sweep_id>
"""

import wandb
import yaml


def main():
    import argparse
    import sys

    parser = argparse.ArgumentParser(
        description="Initialize and run wandb sweep for DDCL hyperparameter tuning"
    )
    parser.add_argument(
        "--create-only",
        action="store_true",
        help="Only create the sweep and print the ID, don't run agents"
    )
    parser.add_argument(
        "--count",
        type=int,
        default=None,
        help="Number of runs for this agent (default: run all sweep jobs)"
    )
    parser.add_argument(
        "--sweep-id",
        type=str,
        help="Use existing sweep ID instead of creating a new one"
    )

    args = parser.parse_args()

    # Create or use existing sweep
    if args.sweep_id:
        sweep_id = args.sweep_id
        sweep_path = args.sweep_id
        print(f"Using existing sweep: {sweep_id}")
    else:
        # Load sweep configuration
        with open("sweep_config.yaml", "r") as f:
            sweep_config = yaml.safe_load(f)

        # Initialize sweep
        sweep_id = wandb.sweep(sweep_config, project="ddcl-vae")

        # Get entity (username) for full sweep path
        entity = wandb.Api().default_entity
        sweep_path = f"{entity}/ddcl-vae/{sweep_id}"

        print(f"Created new sweep with ID: {sweep_id}")
        print(f"Full sweep path: {sweep_path}")

    print(f"To run agents manually (supports multiple parallel agents):")
    print(f"  wandb agent {sweep_path}")
    print(f"\nFor parallel execution, run the above command in multiple terminals/tmux/screen sessions")

    if args.create_only:
        print("\n--create-only flag set. Exiting without running agent.")
        sys.exit(0)

    # Run the sweep agent
    print(f"Starting sweep agent...")
    if args.count:
        print(f"  Running {args.count} sweep jobs")
        wandb.agent(sweep_path)
    else:
        print(f"  Running all sweep jobs (30 total for current grid)")
        wandb.agent(sweep_path)


if __name__ == "__main__":
    main()