Project-MONAI
diff --git a/‎generation/maisi/configs/config_infer.json
Lines changed: 3 additions & 2 deletions b/‎generation/maisi/configs/config_infer.json
Lines changed: 3 additions & 2 deletions
diff --git a/‎generation/maisi/configs/config_maisi3d-ddpm.json
Lines changed: 3 additions & 2 deletions b/‎generation/maisi/configs/config_maisi3d-ddpm.json
Lines changed: 3 additions & 2 deletions
diff --git a/‎generation/maisi/configs/config_maisi3d-rflow.json
Lines changed: 3 additions & 2 deletions b/‎generation/maisi/configs/config_maisi3d-rflow.json
Lines changed: 3 additions & 2 deletions
diff --git a/‎generation/maisi/configs/config_maisi_controlnet_train.json
Lines changed: 4 additions & 2 deletions b/‎generation/maisi/configs/config_maisi_controlnet_train.json
Lines changed: 4 additions & 2 deletions
diff --git a/‎generation/maisi/configs/config_maisi_diff_model.json
Lines changed: 2 additions & 1 deletion b/‎generation/maisi/configs/config_maisi_diff_model.json
Lines changed: 2 additions & 1 deletion
diff --git a/‎generation/maisi/maisi_inference_tutorial.ipynb
Lines changed: 161 additions & 244 deletions b/‎generation/maisi/maisi_inference_tutorial.ipynb
Lines changed: 161 additions & 244 deletions
diff --git a/‎generation/maisi/maisi_train_controlnet_tutorial.ipynb
Lines changed: 48 additions & 56 deletions b/‎generation/maisi/maisi_train_controlnet_tutorial.ipynb
Lines changed: 48 additions & 56 deletions
@@ -3,7 +3,7 @@
     "body_region": ["abdomen"],
     "anatomy_list": ["liver","hepatic tumor"],
     "controllable_anatomy_size": [],
-    "num_inference_steps": 1000,
+    "num_inference_steps": 30,
     "mask_generation_num_inference_steps": 1000,
     "output_size": [
         256,
@@ -23,5 +23,6 @@
     "diffusion_unet": "$@diffusion_unet_def",
     "autoencoder": "$@autoencoder_def",
     "mask_generation_autoencoder": "$@mask_generation_autoencoder_def",
-    "mask_generation_diffusion": "$@mask_generation_diffusion_def"
+    "mask_generation_diffusion": "$@mask_generation_diffusion_def",
+    "modality": 1
 }
@@ -2,6 +2,7 @@
     "spatial_dims": 3,
     "image_channels": 1,
     "latent_channels": 4,
+    "include_body_region": true,
     "mask_generation_latent_shape": [
         4,
         64,
@@ -60,8 +61,8 @@
         ],
         "num_res_blocks": 2,
         "use_flash_attention": true,
-        "include_top_region_index_input": true,
-        "include_bottom_region_index_input": true,
+        "include_top_region_index_input": "@include_body_region",
+        "include_bottom_region_index_input": "@include_body_region",
         "include_spacing_input": true
     },
     "controlnet_def": {
 
@@ -2,6 +2,7 @@
     "spatial_dims": 3,
     "image_channels": 1,
     "latent_channels": 4,
+    "include_body_region": false,
     "mask_generation_latent_shape": [
         4,
         64,
@@ -55,8 +56,8 @@
         ],
         "num_res_blocks": 2,
         "use_flash_attention": true,
-        "include_top_region_index_input": false,
-        "include_bottom_region_index_input": false,
+        "include_top_region_index_input": "@include_body_region",
+        "include_bottom_region_index_input": "@include_body_region",
         "include_spacing_input": true,
         "num_class_embeds": 128,
         "resblock_updown": true,
 
@@ -9,7 +9,9 @@
         "weighted_loss": 100
     },
     "controlnet_infer": {
-       "num_inference_steps": 1000,
-       "autoencoder_sliding_window_infer_size": [96, 96, 96]
+       "num_inference_steps": 10,
+       "autoencoder_sliding_window_infer_size": [80, 80, 80],
+       "autoencoder_sliding_window_infer_overlap": 0.4,
+       "modality": 1
     }
 }
@@ -29,6 +29,7 @@
             0
         ],
         "random_seed": 0,
-        "num_inference_steps": 10
+        "num_inference_steps": 10,
+        "modality": 1
     }
 }
@@ -141,20 +141,22 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[2025-03-11 23:38:43.304][ INFO](notebook) - Using MAISI version maisi3d-ddpm. Will need body region as data input.\n"
+      "[2025-03-12 22:27:22.838][ INFO](notebook) - MAISI version is maisi3d-ddpm, whether to use body_region is True\n"
      ]
     }
    ],
    "source": [
     "maisi_version = \"maisi3d-ddpm\"\n",
     "if maisi_version == \"maisi3d-ddpm\":\n",
-    "    include_body_region = True\n",
-    "    logger.info(\"Using MAISI version maisi3d-ddpm. Will need body region as data input.\")\n",
+    "    model_def_path = \"./configs/config_maisi3d-ddpm.json\"\n",
     "elif maisi_version == \"maisi3d-rflow\":\n",
-    "    include_body_region = False\n",
-    "    logger.info(\"Using MAISI version maisi3d-rflow. Does not need body region as data input.\")\n",
+    "    model_def_path = \"./configs/config_maisi3d-rflow.json\"\n",
     "else:\n",
-    "    raise ValueError(f\"maisi_version has to be chosen from ['maisi3d-ddpm', 'maisi3d-rflow'], yet got {maisi_version}.\")"
+    "    raise ValueError(f\"maisi_version has to be chosen from ['maisi3d-ddpm', 'maisi3d-rflow'], yet got {maisi_version}.\")\n",
+    "with open(model_def_path, \"r\") as f:\n",
+    "    model_def = json.load(f)\n",
+    "include_body_region = model_def[\"include_body_region\"]\n",
+    "logger.info(f\"MAISI version is {maisi_version}, whether to use body_region is {include_body_region}\")"
    ]
   },
   {
@@ -246,9 +248,9 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[2025-03-11 23:38:45.473][ INFO](notebook) - Generated simulated images.\n",
-      "[2025-03-11 23:38:45.474][ INFO](notebook) - img_emb shape: (64, 64, 32, 4)\n",
-      "[2025-03-11 23:38:45.475][ INFO](notebook) - label shape: (256, 256, 128)\n"
+      "[2025-03-12 22:27:25.046][ INFO](notebook) - Generated simulated images.\n",
+      "[2025-03-12 22:27:25.047][ INFO](notebook) - img_emb shape: (64, 64, 32, 4)\n",
+      "[2025-03-12 22:27:25.048][ INFO](notebook) - label shape: (256, 256, 128)\n"
      ]
     }
    ],
@@ -320,20 +322,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[2025-03-11 23:38:45.489][ INFO](notebook) - files and folders under work_dir: ['config_maisi.json', 'models', 'config_maisi_controlnet_train.json', 'outputs', 'sim_dataroot', 'environment_maisi_controlnet_train.json', 'sim_datalist.json'].\n",
-      "[2025-03-11 23:38:45.490][ INFO](notebook) - number of GPUs: 1.\n"
+      "[2025-03-12 22:27:25.062][ INFO](notebook) - files and folders under work_dir: ['config_maisi.json', 'models', 'config_maisi_controlnet_train.json', 'outputs', 'sim_dataroot', 'environment_maisi_controlnet_train.json', 'sim_datalist.json'].\n",
+      "[2025-03-12 22:27:25.063][ INFO](notebook) - number of GPUs: 1.\n"
      ]
     }
    ],
    "source": [
     "env_config_path = \"./configs/environment_maisi_controlnet_train.json\"\n",
     "train_config_path = \"./configs/config_maisi_controlnet_train.json\"\n",
-    "if maisi_version == \"maisi3d-ddpm\":\n",
-    "    model_def_path = \"./configs/config_maisi3d-ddpm.json\"\n",
-    "elif maisi_version == \"maisi3d-rflow\":\n",
-    "    model_def_path = \"./configs/config_maisi3d-rflow.json\"\n",
-    "else:\n",
-    "    raise ValueError(f\"maisi_version has to be chosen from ['maisi3d-ddpm', 'maisi3d-rflow'], yet got {maisi_version}.\")\n",
     "\n",
     "# Load environment configuration, model configuration and model definition\n",
     "with open(env_config_path, \"r\") as f:\n",
@@ -472,29 +468,29 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[2025-03-11 23:38:45.501][ INFO](notebook) - Training the model...\n"
+      "[2025-03-12 22:27:25.074][ INFO](notebook) - Training the model...\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[2025-03-11 23:38:54.835][ INFO](maisi.controlnet.training) - Number of GPUs: 8\n",
-      "[2025-03-11 23:38:54.835][ INFO](maisi.controlnet.training) - World_size: 1\n",
-      "[2025-03-11 23:38:56.401][ INFO](maisi.controlnet.training) - trained diffusion model is not loaded.\n",
-      "[2025-03-11 23:38:56.401][ INFO](maisi.controlnet.training) - set scale_factor -> 1.0.\n",
-      "2025-03-11 23:38:56,899 - INFO - 'dst' model updated: 158 of 206 variables.\n",
-      "[2025-03-11 23:38:56.903][ INFO](maisi.controlnet.training) - train controlnet model from scratch.\n",
-      "[2025-03-11 23:38:56.925][ INFO](maisi.controlnet.training) - total number of training steps: 4.0.\n",
-      "[2025-03-11 23:38:58.871][ INFO](maisi.controlnet.training) -\n",
-      "[Epoch 1/2] [Batch 1/2] [LR: 0.00000563] [loss: 0.7972] ETA: 0:00:01.944427\n",
-      "[2025-03-11 23:38:59.018][ INFO](maisi.controlnet.training) -\n",
-      "[Epoch 1/2] [Batch 2/2] [LR: 0.00000250] [loss: 0.7981] ETA: 0:00:00\n",
-      "[2025-03-11 23:38:59.775][ INFO](maisi.controlnet.training) - best loss -> 0.7976870536804199.\n",
-      "[2025-03-11 23:39:00.998][ INFO](maisi.controlnet.training) -\n",
-      "[Epoch 2/2] [Batch 1/2] [LR: 0.00000063] [loss: 0.7971] ETA: 0:00:01.979231\n",
-      "[2025-03-11 23:39:01.129][ INFO](maisi.controlnet.training) -\n",
-      "[Epoch 2/2] [Batch 2/2] [LR: 0.00000000] [loss: 0.7994] ETA: 0:00:00\n",
+      "[2025-03-12 22:27:33.707][ INFO](maisi.controlnet.training) - Number of GPUs: 8\n",
+      "[2025-03-12 22:27:33.708][ INFO](maisi.controlnet.training) - World_size: 1\n",
+      "[2025-03-12 22:27:35.410][ INFO](maisi.controlnet.training) - trained diffusion model is not loaded.\n",
+      "[2025-03-12 22:27:35.410][ INFO](maisi.controlnet.training) - set scale_factor -> 1.0.\n",
+      "2025-03-12 22:27:35,902 - INFO - 'dst' model updated: 158 of 206 variables.\n",
+      "[2025-03-12 22:27:35.907][ INFO](maisi.controlnet.training) - train controlnet model from scratch.\n",
+      "[2025-03-12 22:27:35.930][ INFO](maisi.controlnet.training) - total number of training steps: 4.0.\n",
+      "[2025-03-12 22:27:38.006][ INFO](maisi.controlnet.training) -\n",
+      "[Epoch 1/2] [Batch 1/2] [LR: 0.00000563] [loss: 0.7976] ETA: 0:00:02.073507\n",
+      "[2025-03-12 22:27:38.147][ INFO](maisi.controlnet.training) -\n",
+      "[Epoch 1/2] [Batch 2/2] [LR: 0.00000250] [loss: 0.7985] ETA: 0:00:00\n",
+      "[2025-03-12 22:27:38.683][ INFO](maisi.controlnet.training) - best loss -> 0.7980280518531799.\n",
+      "[2025-03-12 22:27:39.955][ INFO](maisi.controlnet.training) -\n",
+      "[Epoch 2/2] [Batch 1/2] [LR: 0.00000063] [loss: 0.7992] ETA: 0:00:01.807460\n",
+      "[2025-03-12 22:27:40.086][ INFO](maisi.controlnet.training) -\n",
+      "[Epoch 2/2] [Batch 2/2] [LR: 0.00000000] [loss: 0.7980] ETA: 0:00:00\n",
       "\n"
      ]
     }
@@ -512,8 +508,6 @@
     "    \"--training-config\",\n",
     "    train_config_filepath,\n",
     "]\n",
-    "if include_body_region:\n",
-    "    module_args.append(\"--include_body_region\")\n",
     "\n",
     "run_torchrun(module, module_args, num_gpus=num_gpus)"
    ]
@@ -539,32 +533,32 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[2025-03-11 23:39:03.635][ INFO](notebook) - Inference...\n"
+      "[2025-03-12 22:27:42.632][ INFO](notebook) - Inference...\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[2025-03-11 23:39:13.628][ INFO](maisi.controlnet.infer) - Number of GPUs: 8\n",
-      "[2025-03-11 23:39:13.628][ INFO](maisi.controlnet.infer) - World_size: 1\n",
-      "[2025-03-11 23:39:14.205][ INFO](maisi.controlnet.infer) - trained autoencoder model is not loaded.\n",
-      "[2025-03-11 23:39:15.418][ INFO](maisi.controlnet.infer) - trained diffusion model is not loaded.\n",
-      "[2025-03-11 23:39:15.418][ INFO](maisi.controlnet.infer) - set scale_factor -> 1.0.\n",
-      "2025-03-11 23:39:15,917 - INFO - 'dst' model updated: 158 of 206 variables.\n",
-      "[2025-03-11 23:39:15.922][ INFO](maisi.controlnet.infer) - trained controlnet is not loaded.\n",
-      "[2025-03-11 23:39:16.582][ INFO](root) - `controllable_anatomy_size` is not provided.\n",
-      "[2025-03-11 23:39:16.584][ INFO](root) - ---- Start generating latent features... ----\n",
-      "[2025-03-11 23:39:17.178][ INFO](root) - ---- Latent features generation time: 0.5939664840698242 seconds ----\n",
-      "[2025-03-11 23:39:17.180][ INFO](root) - ---- Start decoding latent features into images... ----\n",
-      "[2025-03-11 23:39:18.003][ INFO](root) - ---- Image decoding time: 0.8231167793273926 seconds ----\n",
-      "2025-03-11 23:39:18,299 INFO image_writer.py:197 - writing: temp_work_dir_controlnet_train_demo/outputs/sample_20250311_233918_283950_image.nii.gz\n",
-      "2025-03-11 23:39:18,649 INFO image_writer.py:197 - writing: temp_work_dir_controlnet_train_demo/outputs/sample_20250311_233918_283950_label.nii.gz\n",
+      "[2025-03-12 22:27:53.399][ INFO](maisi.controlnet.infer) - Number of GPUs: 8\n",
+      "[2025-03-12 22:27:53.400][ INFO](maisi.controlnet.infer) - World_size: 1\n",
+      "[2025-03-12 22:27:54.101][ INFO](maisi.controlnet.infer) - trained autoencoder model is not loaded.\n",
+      "[2025-03-12 22:27:55.286][ INFO](maisi.controlnet.infer) - trained diffusion model is not loaded.\n",
+      "[2025-03-12 22:27:55.286][ INFO](maisi.controlnet.infer) - set scale_factor -> 1.0.\n",
+      "2025-03-12 22:27:55,756 - INFO - 'dst' model updated: 158 of 206 variables.\n",
+      "[2025-03-12 22:27:55.761][ INFO](maisi.controlnet.infer) - trained controlnet is not loaded.\n",
+      "[2025-03-12 22:27:56.340][ INFO](root) - `controllable_anatomy_size` is not provided.\n",
+      "[2025-03-12 22:27:56.344][ INFO](root) - ---- Start generating latent features... ----\n",
+      "[2025-03-12 22:27:58.065][ INFO](root) - ---- Latent features generation time: 1.7215001583099365 seconds ----\n",
+      "[2025-03-12 22:27:58.066][ INFO](root) - ---- Start decoding latent features into images... ----\n",
+      "[2025-03-12 22:27:58.838][ INFO](root) - ---- Image decoding time: 0.7712326049804688 seconds ----\n",
+      "2025-03-12 22:27:59,142 INFO image_writer.py:197 - writing: temp_work_dir_controlnet_train_demo/outputs/sample_20250312_222759_124463_image.nii.gz\n",
+      "2025-03-12 22:27:59,487 INFO image_writer.py:197 - writing: temp_work_dir_controlnet_train_demo/outputs/sample_20250312_222759_124463_label.nii.gz\n",
       "\n",
       "\n",
-      "  0%|                                                                                   | 0/1 [00:00<?, ?it/s]\n",
-      "100%|███████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.92it/s]\n",
-      "100%|███████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.92it/s]\n",
+      "  0%|          | 0/1 [00:00<?, ?it/s]\n",
+      "100%|██████████| 1/1 [00:01<00:00,  1.62s/it]\n",
+      "100%|██████████| 1/1 [00:01<00:00,  1.62s/it]\n",
       "\n"
      ]
     }
@@ -582,8 +576,6 @@
     "    \"--training-config\",\n",
     "    train_config_filepath,\n",
     "]\n",
-    "if include_body_region:\n",
-    "    module_args.append(\"--include_body_region\")\n",
     "\n",
     "run_torchrun(module, module_args, num_gpus=num_gpus)"
    ]
Original file line number	Diff line number	Diff line change
`@@ -9,7 +9,9 @@`
`9`	`9`	`"weighted_loss": 100`
`10`	`10`	`},`
`11`	`11`	`"controlnet_infer": {`
`12`		`- "num_inference_steps": 1000,`
`13`		`- "autoencoder_sliding_window_infer_size": [96, 96, 96]`
	`12`	`+ "num_inference_steps": 10,`
	`13`	`+ "autoencoder_sliding_window_infer_size": [80, 80, 80],`
	`14`	`+ "autoencoder_sliding_window_infer_overlap": 0.4,`
	`15`	`+ "modality": 1`
`14`	`16`	`}`
`15`	`17`	`}`
Original file line number	Diff line number	Diff line change
`@@ -29,6 +29,7 @@`
`29`	`29`	`0`
`30`	`30`	`],`
`31`	`31`	`"random_seed": 0,`
`32`		`- "num_inference_steps": 10`
	`32`	`+ "num_inference_steps": 10,`
	`33`	`+ "modality": 1`
`33`	`34`	`}`
`34`	`35`	`}`