alan-turing-institute
diff --git a/‎notebooks/00_01_exploration_diffusion_reaction.ipynb‎
Lines changed: 98 additions & 84 deletions b/‎notebooks/00_01_exploration_diffusion_reaction.ipynb‎
Lines changed: 98 additions & 84 deletions
diff --git a/‎src/autocast/data/__init__.py‎
Lines changed: 9 additions & 0 deletions b/‎src/autocast/data/__init__.py‎
Lines changed: 9 additions & 0 deletions
@@ -36,115 +36,128 @@
     "from autocast.data.advection_diffusion import (\n",
     "    AdvectionDiffusion as AdvectionDiffusionMultichannel,\n",
     ")\n",
+    "from autocast.data.datamodule import SpatioTemporalDataModule, TheWellDataModule\n",
     "from autocast.metrics.spatiotemporal import MAE, MSE, RMSE\n",
     "\n",
-    "# simulation_name = \"reaction_diffusion\"\n",
-    "# simulation_name = \"advection_diffusion\"\n",
-    "simulation_name = \"advection_diffusion_multichannel\"\n",
-    "\n",
-    "if simulation_name == \"advection_diffusion_multichannel\":\n",
-    "    # Override to use multichannel version\n",
-    "    Sim = AdvectionDiffusionMultichannel\n",
-    "if simulation_name == \"reaction_diffusion\":\n",
-    "    Sim = ReactionDiffusion\n",
-    "if simulation_name == \"advection_diffusion\":\n",
-    "    Sim = AdvectionDiffusion\n",
-    "\n",
-    "sim = Sim(return_timeseries=True, log_level=\"error\")\n",
-    "\n",
-    "def generate_split(simulator, n_train: int = 200, n_valid: int = 20, n_test: int = 20):\n",
-    "    \"\"\"Generate training, validation, and test splits from the simulator.\"\"\"\n",
-    "    train = simulator.forward_samples_spatiotemporal(n_train)\n",
-    "    valid = simulator.forward_samples_spatiotemporal(n_valid)\n",
-    "    test = simulator.forward_samples_spatiotemporal(n_test)\n",
-    "    return {\"train\": train, \"valid\": valid, \"test\": test}\n",
-    "\n",
-    "\n",
-    "# Cache file path\n",
-    "cache_file = Path(f\"{simulation_name}_cache.pkl\")\n",
-    "\n",
-    "# Load from cache if it exists, otherwise generate and save\n",
-    "if cache_file.exists():\n",
-    "    print(f\"Loading cached simulation data from {cache_file}\")\n",
-    "    with open(cache_file, \"rb\") as f:\n",
-    "        combined_data = pickle.load(f)\n",
-    "        for key in ['data', 'constant_scalars', 'constant_fields']:\n",
-    "            combined_data[\"test\"][key] = (\n",
-    "                combined_data[\"test\"][key][:5]\n",
-    "                if combined_data[\"test\"][key] is not None else None\n",
-    "            )\n",
-    "else:\n",
-    "    print(\"Generating simulation data...\")\n",
-    "    combined_data = generate_split(sim)\n",
-    "    print(f\"Saving simulation data to {cache_file}\")\n",
-    "    with open(cache_file, \"wb\") as f:\n",
-    "        pickle.dump(combined_data, f)\n"
+    "THE_WELL = True\n",
+    "n_steps_input = 1\n",
+    "n_steps_output = 4\n",
+    "stride = n_steps_output"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3",
+   "metadata": {},
+   "source": [
+    "### Read combined data into datamodule"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "3",
+   "id": "4",
    "metadata": {},
    "outputs": [],
    "source": [
-    "from autocast.logging import create_wandb_logger, maybe_watch_model\n",
-    "from autocast.logging.wandb import create_notebook_logger\n",
     "\n",
-    "logger, watch = create_notebook_logger(\n",
-    "    project=\"autocast-notebooks\",\n",
-    "    name=f\"00_01_exploration_{simulation_name}\",\n",
-    "    tags=[\"notebook\", simulation_name]\n",
-    ")"
+    "if not THE_WELL:\n",
+    "    # simulation_name = \"reaction_diffusion\"\n",
+    "    # simulation_name = \"advection_diffusion\"\n",
+    "    simulation_name = \"advection_diffusion_multichannel\"\n",
+    "\n",
+    "    if simulation_name == \"advection_diffusion_multichannel\":\n",
+    "        # Override to use multichannel version\n",
+    "        Sim = AdvectionDiffusionMultichannel\n",
+    "    if simulation_name == \"reaction_diffusion\":\n",
+    "        Sim = ReactionDiffusion\n",
+    "    if simulation_name == \"advection_diffusion\":\n",
+    "        Sim = AdvectionDiffusion\n",
+    "\n",
+    "    sim = Sim(return_timeseries=True, log_level=\"error\")\n",
+    "\n",
+    "    def generate_split(\n",
+    "        simulator, n_train: int = 200, n_valid: int = 20, n_test: int = 20\n",
+    "    ):\n",
+    "        \"\"\"Generate training, validation, and test splits from the simulator.\"\"\"\n",
+    "        train = simulator.forward_samples_spatiotemporal(n_train)\n",
+    "        valid = simulator.forward_samples_spatiotemporal(n_valid)\n",
+    "        test = simulator.forward_samples_spatiotemporal(n_test)\n",
+    "        return {\"train\": train, \"valid\": valid, \"test\": test}\n",
+    "\n",
+    "    # Cache file path\n",
+    "    cache_file = Path(f\"{simulation_name}_cache.pkl\")\n",
+    "\n",
+    "    # Load from cache if it exists, otherwise generate and save\n",
+    "    if cache_file.exists():\n",
+    "        print(f\"Loading cached simulation data from {cache_file}\")\n",
+    "        with open(cache_file, \"rb\") as f:\n",
+    "            combined_data = pickle.load(f)\n",
+    "            for key in [\"data\", \"constant_scalars\", \"constant_fields\"]:\n",
+    "                combined_data[\"test\"][key] = (\n",
+    "                    combined_data[\"test\"][key][:5]\n",
+    "                    if combined_data[\"test\"][key] is not None\n",
+    "                    else None\n",
+    "                )\n",
+    "    else:\n",
+    "        print(\"Generating simulation data...\")\n",
+    "        combined_data = generate_split(sim)\n",
+    "        print(f\"Saving simulation data to {cache_file}\")\n",
+    "        with open(cache_file, \"wb\") as f:\n",
+    "            pickle.dump(combined_data, f)\n",
+    "\n",
+    "    datamodule = SpatioTemporalDataModule(\n",
+    "        data=combined_data,\n",
+    "        data_path=None,\n",
+    "        n_steps_input=n_steps_input,\n",
+    "        n_steps_output=n_steps_output,\n",
+    "        stride=n_steps_output,\n",
+    "        batch_size=16,\n",
+    "    )\n",
+    "else:\n",
+    "    simulation_name = \"turbulent_radiative_layer_2D\"\n",
+    "    datamodule = TheWellDataModule(\n",
+    "        well_base_path=\"../../autocast/datasets/\",\n",
+    "        well_dataset_name=simulation_name,\n",
+    "        n_steps_input=n_steps_input,\n",
+    "        n_steps_output=n_steps_output,\n",
+    "        min_dt_stride=1,\n",
+    "        use_normalization=True,\n",
+    "    )\n"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "4",
+   "id": "5",
    "metadata": {},
    "source": [
-    "### Read combined data into datamodule\n"
+    "### Set-up logging"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5",
+   "id": "6",
    "metadata": {},
    "outputs": [],
    "source": [
     "\n",
-    "from autocast.data.datamodule import SpatioTemporalDataModule\n",
+    "from autocast.logging import create_wandb_logger, maybe_watch_model\n",
+    "from autocast.logging.wandb import create_notebook_logger\n",
     "\n",
-    "n_steps_input = 1\n",
-    "n_steps_output = 4\n",
-    "stride = n_steps_output\n",
-    "datamodule = SpatioTemporalDataModule(\n",
-    "    data=combined_data,\n",
-    "    data_path=None,\n",
-    "    n_steps_input=n_steps_input,\n",
-    "    n_steps_output=n_steps_output,\n",
-    "    stride=n_steps_output,\n",
-    "    batch_size=16,\n",
+    "logger, watch = create_notebook_logger(\n",
+    "    project=\"autocast-notebooks\",\n",
+    "    name=f\"00_01_exploration_{simulation_name}\",\n",
+    "    tags=[\"notebook\", simulation_name],\n",
     ")"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "6",
-   "metadata": {},
-   "source": [
-    "### Example batch\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
    "id": "7",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "len(datamodule.train_dataset) / 50\n"
+    "### Example shape and batch\n"
    ]
   },
   {
@@ -164,7 +177,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "\n",
     "batch = next(iter(datamodule.train_dataloader()))\n",
     "\n",
     "batch.input_fields.shape"
@@ -201,7 +213,7 @@
     "    hid_blocks=(2, 2, 2),\n",
     "    spatial=2,\n",
     "    periodic=False,\n",
-    "    )\n",
+    ")\n",
     "\n",
     "if processor_name == \"flow_matching\":\n",
     "    processor = FlowMatchingProcessor(\n",
@@ -211,7 +223,7 @@
     "        n_channels_out=n_channels,\n",
     "        stride=n_steps_output,\n",
     "        flow_ode_steps=4,\n",
-    "        )\n",
+    "    )\n",
     "else:\n",
     "    from autocast.processors.diffusion import DiffusionProcessor\n",
     "\n",
@@ -221,7 +233,7 @@
     "        n_steps_output=n_steps_output,\n",
     "        n_channels_out=n_channels,\n",
     "        stride=n_steps_output,\n",
-    "        )\n",
+    "    )\n",
     "\n",
     "encoder = IdentityEncoder()\n",
     "decoder = IdentityDecoder()\n",
@@ -233,7 +245,7 @@
     "    # learning_rate=1e-5,\n",
     "    learning_rate=1e-4,\n",
     "    #test_metrics = [MSE(), MAE(), RMSE()]\n",
-    "    )\n",
+    ")\n",
     "maybe_watch_model(logger, model, watch)"
    ]
   },
@@ -266,7 +278,9 @@
     "\n",
     "device = \"mps\"  # \"cpu\"\n",
     "# device = \"cpu\"\n",
-    "trainer = L.Trainer(max_epochs=4, accelerator=device, log_every_n_steps=10, logger=logger)\n",
+    "trainer = L.Trainer(\n",
+    "    max_epochs=4, accelerator=device, log_every_n_steps=10, logger=logger\n",
+    ")\n",
     "trainer.fit(model, datamodule.train_dataloader(), datamodule.val_dataloader())\n",
     "trainer.save_checkpoint(f\"./{simulation_name}_{processor_name}_model.ckpt\")"
    ]
@@ -379,13 +393,13 @@
     "\n",
     "batch_idx = 0\n",
     "if simulation_name == \"advection_diffusion_multichannel\":\n",
-    "    channel_names=[\"vorticity\", \"velocity_x\", \"velocity_y\", \"streamfunction\"]\n",
+    "    channel_names = [\"vorticity\", \"velocity_x\", \"velocity_y\", \"streamfunction\"]\n",
     "elif simulation_name == \"advection_diffusion\":\n",
-    "    channel_names=[\"vorticity\"]\n",
+    "    channel_names = [\"vorticity\"]\n",
     "elif simulation_name == \"reaction_diffusion\":\n",
-    "    channel_names=[\"U\", \"V\"]\n",
+    "    channel_names = [\"U\", \"V\"]\n",
     "else:\n",
-    "    channel_names=None\n",
+    "    channel_names = None\n",
     "\n",
     "anim = plot_spatiotemporal_video(\n",
     "    pred=preds,\n",
 
@@ -0,0 +1,9 @@
+from autocast.data.datamodule import SpatioTemporalDataModule, TheWellDataModule
+from autocast.data.dataset import SpatioTemporalDataset, TheWell
+
+__all__ = [
+    "SpatioTemporalDataModule",
+    "SpatioTemporalDataset",
+    "TheWell",
+    "TheWellDataModule",
+]