alan-turing-institute
diff --git a/‎README.md‎
Lines changed: 24 additions & 1 deletion b/‎README.md‎
Lines changed: 24 additions & 1 deletion
diff --git a/‎configs/config.yaml‎
Lines changed: 1 addition & 0 deletions b/‎configs/config.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎configs/logging/wandb.yaml‎
Lines changed: 19 additions & 0 deletions b/‎configs/logging/wandb.yaml‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎configs/processor.yaml‎
Lines changed: 1 addition & 0 deletions b/‎configs/processor.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎notebooks/00_01_exploration_diffusion_reaction.ipynb‎
Lines changed: 14 additions & 6 deletions b/‎notebooks/00_01_exploration_diffusion_reaction.ipynb‎
Lines changed: 14 additions & 6 deletions
diff --git a/‎notebooks/00_exploration.ipynb‎
Lines changed: 48 additions & 23 deletions b/‎notebooks/00_exploration.ipynb‎
Lines changed: 48 additions & 23 deletions
@@ -33,4 +33,27 @@ uv run evaluate_processor \
 
 Evaluation writes a CSV of aggregate metrics to `--csv-path` (defaults to
 `<work-dir>/evaluation_metrics.csv`) and, when `--batch-index` is provided,
-stores rollout animations for the specified test batches.
+stores rollout animations for the specified test batches.
+
+## Experiment Tracking with Weights & Biases
+
+AutoCast now ships with an optional [Weights & Biases](https://wandb.ai/) integration that is
+fully driven by the Hydra config under `configs/logging/wandb.yaml`.
+
+- Enable logging for CLI workflows by overriding `logging.wandb.enabled=true` and
+	optionally providing `project`, `name`, or `tags` overrides:
+
+	```bash
+	uv run train_processor \
+		--config-path=configs \
+		--override logging.wandb.enabled=true \
+		--override logging.wandb.project=autocast-experiments \
+		--override logging.wandb.name=processor-baseline
+	```
+
+- The autoencoder/processor training CLIs pass the configured `WandbLogger` directly into Lightning so that metrics, checkpoints, and artifacts are synchronized automatically.
+- The evaluation CLI reports aggregate test metrics to the same run when logging is enabled, making it easy to compare training and evaluation outputs in one dashboard.
+- All notebooks contain a dedicated cell that instantiates a `wandb_logger` via `autocast.logging.create_wandb_logger`. Toggle the `enabled` flag in that cell to control tracking when experimenting interactively.
+
+When `enabled` remains `false` (the default), the logger is skipped entirely, so the stack can
+be used without a W&B account.
@@ -2,6 +2,7 @@ defaults:
   - data: reaction_diffusion
   - model: ae
   - trainer: default
+  - logging: wandb
   - _self_
 
 seed: 42
 
@@ -0,0 +1,19 @@
+wandb:
+  enabled: false
+  project: autocast
+  entity: null
+  name: null
+  group: null
+  job_type: ${experiment_name}
+  tags: []
+  notes: null
+  mode: online
+  resume: null
+  id: null
+  log_model: false
+  save_dir: null
+  settings: {}
+  config: {}
+  watch:
+    log: null
+    log_freq: 100
@@ -4,6 +4,7 @@ defaults:
   - decoder: channels_last
   - processor: fno
   - trainer: default
+  - logging: wandb
   - _self_
 
 seed: 42
 
@@ -87,7 +87,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "combined_data[\"test\"].keys()"
+    "from autocast.logging import create_wandb_logger, maybe_watch_model\n",
+    "from autocast.logging.wandb import create_notebook_logger\n",
+    "\n",
+    "logger, watch = create_notebook_logger(\n",
+    "    project=\"autocast-notebooks\",\n",
+    "    name=f\"00_01_exploration_{simulation_name}\",\n",
+    "    tags=[\"notebook\", simulation_name]\n",
+    ")"
    ]
   },
   {
@@ -193,7 +200,7 @@
     "    hid_blocks=(2, 2, 2),\n",
     "    spatial=2,\n",
     "    periodic=False,\n",
-    ")\n",
+    "    )\n",
     "\n",
     "if processor_name == \"flow_matching\":\n",
     "    processor = FlowMatchingProcessor(\n",
@@ -203,7 +210,7 @@
     "        n_channels_out=n_channels,\n",
     "        stride=n_steps_output,\n",
     "        flow_ode_steps=4,\n",
-    "    )\n",
+    "        )\n",
     "else:\n",
     "    from autocast.processors.diffusion import DiffusionProcessor\n",
     "\n",
@@ -213,7 +220,7 @@
     "        n_steps_output=n_steps_output,\n",
     "        n_channels_out=n_channels,\n",
     "        stride=n_steps_output,\n",
-    "    )\n",
+    "        )\n",
     "\n",
     "encoder = IdentityEncoder()\n",
     "decoder = IdentityDecoder()\n",
@@ -224,7 +231,8 @@
     "    train_processor_only=True,\n",
     "    # learning_rate=1e-5,\n",
     "    learning_rate=1e-4,\n",
-    ")"
+    "    )\n",
+    "maybe_watch_model(logger, model, watch)"
    ]
   },
   {
@@ -256,7 +264,7 @@
     "\n",
     "device = \"mps\"  # \"cpu\"\n",
     "# device = \"cpu\"\n",
-    "trainer = L.Trainer(max_epochs=4, accelerator=device, log_every_n_steps=10)\n",
+    "trainer = L.Trainer(max_epochs=4, accelerator=device, log_every_n_steps=10, logger=logger)\n",
     "trainer.fit(model, datamodule.train_dataloader(), datamodule.val_dataloader())\n",
     "trainer.save_checkpoint(f\"./{simulation_name}_{processor_name}_model.ckpt\")"
    ]
 
@@ -66,17 +66,41 @@
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "id": "3",
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "from autocast.logging import create_wandb_logger, maybe_watch_model\n",
+    "\n",
+    "logging_cfg = {\n",
+    "    \"wandb\": {\n",
+    "        \"enabled\": True,  # Set to False to disable wandb for this run.\n",
+    "        \"project\": \"autocast-notebooks\",\n",
+    "        \"name\": \"00_exploration\",\n",
+    "        \"tags\": [\"notebook\", \"00-exploration\"],\n",
+    "    },\n",
+    "}\n",
+    "wandb_logger, wandb_watch = create_wandb_logger(\n",
+    "    logging_cfg,\n",
+    "    experiment_name=\"00_exploration\",\n",
+    "    job_type=\"notebook\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4",
+   "metadata": {},
    "source": [
     "### Read combined data into datamodule\n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4",
+   "id": "5",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -97,7 +121,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "5",
+   "id": "6",
    "metadata": {},
    "source": [
     "### Example batch\n"
@@ -106,7 +130,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6",
+   "id": "7",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -118,7 +142,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "7",
+   "id": "8",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -137,21 +161,22 @@
     "    hidden_channels=64,\n",
     "    stride=n_steps_output,\n",
     "    max_rollout_steps=100,\n",
-    ")\n",
+    "    )\n",
     "encoder = PermuteConcat(with_constants=False)\n",
     "decoder = ChannelsLast(output_channels=n_channels, time_steps=n_steps_output)\n",
     "\n",
     "model = EncoderProcessorDecoder(\n",
     "    encoder_decoder=EncoderDecoder(encoder=encoder, decoder=decoder),\n",
     "    processor=processor,\n",
     "    stride=stride,\n",
-    ")"
+    "    )\n",
+    "maybe_watch_model(wandb_logger, model, wandb_watch)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "8",
+   "id": "9",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -160,7 +185,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "9",
+   "id": "10",
    "metadata": {},
    "source": [
     "### Run trainer\n"
@@ -169,21 +194,21 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "10",
+   "id": "11",
    "metadata": {},
    "outputs": [],
    "source": [
     "import lightning as L\n",
     "\n",
     "device = \"mps\"  # \"cpu\"\n",
     "# device = \"cpu\"\n",
-    "trainer = L.Trainer(max_epochs=1, accelerator=device, log_every_n_steps=10)\n",
+    "trainer = L.Trainer(max_epochs=1, accelerator=device, log_every_n_steps=10, logger=wandb_logger)\n",
     "trainer.fit(model, datamodule.train_dataloader(), datamodule.val_dataloader())"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "11",
+   "id": "12",
    "metadata": {},
    "source": [
     "### Run the evaluation\n"
@@ -192,7 +217,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "12",
+   "id": "13",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -201,7 +226,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "13",
+   "id": "14",
    "metadata": {},
    "source": [
     "### Example rollout\n"
@@ -210,7 +235,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "14",
+   "id": "15",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -221,7 +246,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "15",
+   "id": "16",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -234,7 +259,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "16",
+   "id": "17",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -245,7 +270,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "17",
+   "id": "18",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -259,7 +284,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "18",
+   "id": "19",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -269,7 +294,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "19",
+   "id": "20",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -280,7 +305,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "20",
+   "id": "21",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -293,7 +318,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "21",
+   "id": "22",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -311,7 +336,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "22",
+   "id": "23",
    "metadata": {},
    "outputs": [],
    "source": []