Merge pull request #340 from jonbinney/overrides

alejandromarcu · web-flow · commit c828c56c78a6 · 2026-02-13T17:00:30.000-08:00
Override config from the command line
diff --git a/deep_quoridor/src/train_v2.py b/deep_quoridor/src/train_v2.py
@@ -12,15 +12,15 @@
     parser.add_argument("-r", "--runs-dir", type=str, default=None, help="Directory for runs")
     # TODO: implement this
     # parser.add_argument("-c", "--continue", dest="continue_run", action="store_true", help="Continue an existing run")
-    # parser.add_argument(
-    #     "-o", "--overrides", nargs="*", help="Configuration overrides (e.g., run_id=my_run alphazero.mcts_n=250)"
-    # )
+    parser.add_argument(
+        "-o", "--overrides", nargs="*", help="Configuration overrides (e.g., run_id=my_run alphazero.mcts_n=250)"
+    )
 
     args = parser.parse_args()
 
     runs_dir = args.runs_dir if args.runs_dir is not None else str(Path(__file__).parent.parent)
 
-    config = load_config_and_setup_run(args.config_file, runs_dir)
+    config = load_config_and_setup_run(args.config_file, runs_dir, overrides=args.overrides)
     mp.set_start_method("spawn", force=True)
 
     # Make sure we don't have the shutdown signal from a previous run
diff --git a/deep_quoridor/src/v2/TODO.md b/deep_quoridor/src/v2/TODO.md
@@ -1,7 +1,6 @@
 # V1 Parity
 
 - Replay buffer length: righ now we're not rolling out old games to respect the length
-- Overrides from the command line
 - Continuation
 
 # Other improvements and new features
diff --git a/deep_quoridor/src/v2/config.py b/deep_quoridor/src/v2/config.py
@@ -213,13 +213,85 @@ def _load_config_data(file: str) -> dict:
     return data
 
 
-def load_user_config(file: str) -> UserConfig:
+def _parse_override_value(value: str):
+    """Parse a string value into an appropriate Python type."""
+    if value.lower() == "none":
+        return None
+    if value.lower() == "true":
+        return True
+    if value.lower() == "false":
+        return False
+    if value.startswith("[") and value.endswith("]"):
+        inner = value[1:-1].strip()
+        if not inner:
+            return []
+        return [_parse_override_value(item.strip()) for item in inner.split(",")]
+    try:
+        return int(value)
+    except ValueError:
+        pass
+    try:
+        return float(value)
+    except ValueError:
+        pass
+    return value
+
+
+def _as_index(part: str) -> int:
+    try:
+        return int(part)
+    except ValueError:
+        raise ValueError(f"Expected a numeric index for list, got '{part}'")
+
+
+def _ensure_and_navigate(target, part: str):
+    """Navigate into an intermediate path part, creating a dict if the key is missing."""
+    if isinstance(target, list):
+        return target[_as_index(part)]
+    if part not in target:
+        target[part] = {}
+    return target[part]
+
+
+def _set_value(target, part: str, value):
+    """Set a value on a dict key or list index."""
+    if isinstance(target, list):
+        target[_as_index(part)] = value
+    else:
+        target[part] = value
+
+
+def _apply_overrides(data: dict, overrides: list[str]) -> dict:
+    """Apply dotted-key overrides (e.g. 'alphazero.mcts_n=250', 'wandb=None') to a config dict.
+
+    Supports numeric indices for lists: 'benchmarks.0.every=5m'
+    """
+    for override in overrides:
+        if "=" not in override:
+            raise ValueError(f"Invalid override format '{override}', expected 'key=value'")
+        key, value = override.split("=", 1)
+        parts = key.split(".")
+        parsed_value = _parse_override_value(value)
+
+        target = data
+        for part in parts[:-1]:
+            target = _ensure_and_navigate(target, part)
+        _set_value(target, parts[-1], parsed_value)
+
+    return data
+
+
+def load_user_config(file: str, overrides: list[str] | None = None) -> UserConfig:
     data = _load_config_data(file)
+    if overrides:
+        _apply_overrides(data, overrides)
     return UserConfig.model_validate(data)
 
 
-def load_config_and_setup_run(file: str, base_dir: str, create_dirs: bool = True) -> Config:
-    user_config = load_user_config(file)
+def load_config_and_setup_run(
+    file: str, base_dir: str, overrides: list[str] | None = None, create_dirs: bool = True
+) -> Config:
+    user_config = load_user_config(file, overrides=overrides)
     config = Config.from_user(user_config, base_dir, create_dirs=create_dirs)
 
     config_filename = config.paths.config_file
diff --git a/deep_quoridor/test/config_test.py b/deep_quoridor/test/config_test.py
@@ -0,0 +1,110 @@
+import pytest
+import yaml
+from v2.config import load_user_config
+
+EXAMPLE_CONFIG = {
+    "run_id": "test-run",
+    "quoridor": {"board_size": 5, "max_walls": 3, "max_steps": 50},
+    "alphazero": {"network": {"type": "mlp"}, "mcts_n": 300, "mcts_c_puct": 1.2},
+    "wandb": {"project": "example", "upload_model": {"every": "20 models", "when_max": ["raw_win_perc", "elo_score"]}},
+    "self_play": {"num_workers": 2, "parallel_games": 8, "alphazero": {"mcts_noise_epsilon": 0.25}},
+    "training": {
+        "games_per_training_step": 25.0,
+        "learning_rate": 0.001,
+        "batch_size": 256,
+        "weight_decay": 0.0001,
+        "replay_buffer_size": 1000000,
+    },
+    "benchmarks": [
+        {
+            "every": "10 models",
+            "jobs": [
+                {"type": "tournament", "prefix": "raw", "times": 10, "opponents": ["random", "greedy"]},
+                {"type": "dumb_score", "prefix": "raw"},
+            ],
+        },
+    ],
+}
+
+
+@pytest.fixture
+def config_file(tmp_path):
+    path = tmp_path / "config.yaml"
+    path.write_text(yaml.safe_dump(EXAMPLE_CONFIG, sort_keys=False))
+    return str(path)
+
+
+def test_no_overrides(config_file):
+    config = load_user_config(config_file)
+    assert config.wandb is not None
+    assert config.wandb.project == "example"
+    assert config.training.learning_rate == 0.001
+
+
+def test_override_none(config_file):
+    config = load_user_config(config_file, overrides=["wandb=None"])
+    assert config.wandb is None
+
+
+def test_override_boolean_true(config_file):
+    config = load_user_config(config_file, overrides=["training.model_save_timing=True"])
+    assert config.training.model_save_timing is True
+
+
+def test_override_boolean_false(config_file):
+    config = load_user_config(config_file, overrides=["training.save_pytorch=false"])
+    assert config.training.save_pytorch is False
+
+
+def test_override_int(config_file):
+    config = load_user_config(config_file, overrides=["alphazero.mcts_n=500"])
+    assert config.alphazero.mcts_n == 500
+
+
+def test_override_float(config_file):
+    config = load_user_config(config_file, overrides=["training.learning_rate=0.01"])
+    assert config.training.learning_rate == 0.01
+
+
+def test_override_string(config_file):
+    config = load_user_config(config_file, overrides=["run_id=my-custom-run"])
+    assert config.run_id == "my-custom-run"
+
+
+def test_override_list(config_file):
+    config = load_user_config(config_file, overrides=["wandb.upload_model.when_max=[dumb_score,tournament]"])
+    assert config.wandb.upload_model.when_max == ["dumb_score", "tournament"]
+
+
+def test_override_empty_list(config_file):
+    config = load_user_config(config_file, overrides=["wandb.upload_model.when_max=[]"])
+    assert config.wandb.upload_model.when_max == []
+
+
+def test_override_list_index(config_file):
+    config = load_user_config(config_file, overrides=["benchmarks.0.every=5 models"])
+    assert config.benchmarks[0].every == "5 models"
+
+
+def test_override_nested_list_index(config_file):
+    config = load_user_config(config_file, overrides=["benchmarks.0.jobs.0.times=20"])
+    assert config.benchmarks[0].jobs[0].times == 20
+
+
+def test_multiple_overrides(config_file):
+    config = load_user_config(
+        config_file, overrides=["alphazero.mcts_n=100", "training.learning_rate=0.05", "wandb=None"]
+    )
+    assert config.alphazero.mcts_n == 100
+    assert config.training.learning_rate == 0.05
+    assert config.wandb is None
+
+
+def test_invalid_override_format(config_file):
+    with pytest.raises(ValueError, match="Invalid override format"):
+        load_user_config(config_file, overrides=["no_equals_sign"])
+
+
+def test_invalid_key_rejected_by_pydantic(config_file):
+    with pytest.raises(Exception):
+        load_user_config(config_file, overrides=["nonexistent_key=value"])