Add benchmark & test files for the compiled clay encoder

srmsoumya · srmsoumya · commit c403c47acf12 · 2024-07-18T17:37:52.000+05:30
diff --git a/src/benchmark_encoder.py b/src/benchmark_encoder.py
@@ -0,0 +1,80 @@
+import argparse
+import time
+import warnings
+
+import torch
+
+warnings.filterwarnings("ignore")
+
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+
+def get_data():
+    """
+    Generate random data tensors for model input.
+    """
+    cube = torch.randn(128, 3, 256, 256).to(DEVICE)
+    timestep = torch.randn(128, 4).to(DEVICE)
+    latlon = torch.randn(128, 4).to(DEVICE)
+    waves = torch.randn(3).to(DEVICE)
+    gsd = torch.randn(1).to(DEVICE)
+    return cube, timestep, latlon, waves, gsd
+
+
+def load_exported_model(eager=True):
+    """
+    Load the exported model from a file.
+
+    Args:
+        eager (bool): Flag to decide whether to use eager mode or compiled mode.
+    """
+    print("Loading exported model")
+    ep = torch.export.load("checkpoints/compiled/encoder.pt")
+    if eager:
+        model = ep.module()
+    else:
+        model = torch.compile(ep.module(), backend="inductor")
+    return model
+
+
+def benchmark_model(model):
+    """
+    Benchmark the model by running inference on randomly generated data.
+
+    Args:
+        model: The model to benchmark.
+    """
+    print("Benchmarking model")
+    start = time.time()
+    for i in range(20):
+        cube, timestep, latlon, waves, gsd = get_data()
+        with torch.inference_mode():
+            out = model(cube, timestep, latlon, waves, gsd)
+            print(
+                f"Iteration {i}: Output shapes - {out[0].shape}, {out[1].shape}, {out[2].shape}, {out[3].shape}"  # noqa E501
+            )
+    print("Time taken for inference: ", time.time() - start)
+
+
+def run(eager=True):
+    """
+    Run the exported model and benchmark it.
+
+    Args:
+        eager (bool): Flag to decide whether to use eager mode or compiled mode.
+    """
+    print("Running model")
+    model = load_exported_model(eager=eager)
+    benchmark_model(model)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Run benchmark for the exported model."
+    )
+    parser.add_argument(
+        "--eager", action="store_true", help="Use eager mode for running the model."
+    )
+    args = parser.parse_args()
+
+    run(args.eager)
diff --git a/src/export.py b/src/export.py
@@ -1,42 +1,52 @@
+import warnings
 from pathlib import Path
 
 import torch
 from torch.export import Dim
 
 from src.model import ClayMAEModule
 
+warnings.filterwarnings("ignore")
+
 CHECKPOINT_PATH = "checkpoints/clay-v1-base.ckpt"
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# device = torch.device("cpu")
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+CHIP_SIZE = 256
 
 
 def get_data():
-    # Load data
-    cube = torch.randn(128, 3, 224, 224).to(device)
-    time = torch.randn(128, 4).to(device)
-    latlon = torch.randn(128, 4).to(device)
-    waves = torch.randn(3).to(device)
-    gsd = torch.randn(1).to(device)
-    return cube, time, latlon, waves, gsd
+    """
+    Generate random data tensors for model input.
+    """
+    cube = torch.randn(128, 3, CHIP_SIZE, CHIP_SIZE).to(DEVICE)
+    timestep = torch.randn(128, 4).to(DEVICE)
+    latlon = torch.randn(128, 4).to(DEVICE)
+    waves = torch.randn(3).to(DEVICE)
+    gsd = torch.randn(1).to(DEVICE)
+    return cube, timestep, latlon, waves, gsd
 
 
 def load_model():
-    module = ClayMAEModule.load_from_checkpoint(CHECKPOINT_PATH)
-    encoder = module.model.encoder  # Get the encoder
-    encoder = encoder.to(device)  # Move to device
+    """
+    Load the model from a checkpoint and prepare it for evaluation.
+    """
+    module = ClayMAEModule.load_from_checkpoint(
+        CHECKPOINT_PATH, shuffle=False, mask_ratio=0.0
+    )
+    encoder = module.model.encoder.eval()  # Get the encoder in eval mode
+    encoder = encoder.to(DEVICE)  # Move to the appropriate device
     return encoder
 
 
-def main():
-    # Load data
-    cube, time, latlon, waves, gsd = get_data()
-
-    # Load model
+def export_model():
+    """
+    Export the model with dynamic shapes for deployment.
+    """
+    cube, timestep, latlon, waves, gsd = get_data()
     encoder = load_model()
 
     # Define dynamic shapes for model export
-    batch_size = Dim("batch_size", min=2, max=128)  # Define batch size range
-    channel_bands = Dim("channel_bands", min=1, max=12)  # Define channel bands range
+    batch_size = Dim("batch_size", min=32, max=1200)
+    channel_bands = Dim("channel_bands", min=1, max=10)
 
     dynamic_shapes = {
         "cube": {0: batch_size, 1: channel_bands},
@@ -47,22 +57,17 @@ def main():
     }
 
     # Export model
-    exp_compiled_encoder = torch.export.export(
+    ep = torch.export.export(
         mod=encoder,
-        args=(cube, time, latlon, waves, gsd),
+        args=(cube, timestep, latlon, waves, gsd),
         dynamic_shapes=dynamic_shapes,
-        strict=False,
+        strict=True,
     )
 
-    # tensortrt compiled model
-    # trt_encoder = torch_tensorrt.dynamo.compile(
-    #     exp_compiled_encoder, [cube, time, latlon, waves, gsd]
-    # )
-
-    # Save model
+    # Save the exported model
     Path("checkpoints/compiled").mkdir(parents=True, exist_ok=True)
-    torch.export.save(exp_compiled_encoder, "checkpoints/compiled/encoder.pt")
+    torch.export.save(ep, "checkpoints/compiled/encoder.pt")
 
 
 if __name__ == "__main__":
-    main()
+    export_model()
diff --git a/src/model.py b/src/model.py
@@ -39,6 +39,10 @@ def __init__(  # noqa: PLR0913
         self.dim = dim
         self.cls_token = nn.Parameter(torch.randn(1, 1, dim) * 0.02)
 
+        # Required to compile & export the model
+        self.grid_size = 256 // 8
+        self.num_patches = self.grid_size**2
+
         self.patch_embedding = DynamicEmbedding(
             wave_dim=128,
             num_latent_tokens=128,
@@ -64,8 +68,9 @@ def add_encodings(self, patches, time, latlon, gsd):
         """Add position encoding to the patches"""
         B, L, D = patches.shape
 
-        grid_size = int(math.sqrt(L))
-        self.num_patches = grid_size**2
+        # grid_size = int(math.sqrt(L))
+        # self.num_patches = grid_size**2
+        grid_size = self.grid_size
 
         pos_encoding = (
             posemb_sincos_2d_with_gsd(
diff --git a/src/test_encoder.py b/src/test_encoder.py
@@ -0,0 +1,86 @@
+import torch
+
+from src.datamodule import ClayDataModule
+
+# Load the pre-trained Clay encoder model
+clay_encoder = torch.export.load("checkpoints/compiled/encoder.pt").module()
+
+
+def load_batch():
+    # Initialize the data module with appropriate parameters
+    dm = ClayDataModule(
+        data_dir="/home/ubuntu/data",
+        size=256,
+        metadata_path="configs/metadata.yaml",
+        batch_size=1,
+        num_workers=1,
+    )
+
+    # Setup the data module for the 'fit' stage
+    dm.setup(stage="fit")
+    metadata = dm.metadata
+
+    # Get the training data loader and create an iterator
+    trn_dl = dm.train_dataloader()
+    iter_dl = iter(trn_dl)
+
+    return iter_dl, metadata
+
+
+def prepare_data(sensor, metadata, device):
+    """
+    Load data from the sensor and transfer it to the specified device.
+
+    Args:
+    - sensor (dict): Sensor data containing 'pixels', 'time', 'latlon', and 'platform'.
+    - metadata (dict): Metadata information for different platforms.
+    - device (torch.device): The device to which the data should be transferred.
+
+    Returns:
+    - tuple: Transferred cube, timestep, latlon, waves, and gsd tensors.
+    """
+    cube = sensor["pixels"]
+    timestep = sensor["time"]
+    latlon = sensor["latlon"]
+    platform = sensor["platform"][0]
+
+    # Get wavelengths and ground sampling distance (gsd) from metadata
+    waves = torch.tensor(list(metadata[platform].bands.wavelength.values()))
+    gsd = torch.tensor([metadata[platform].gsd])
+
+    # Transfer data to the specified device
+    cube, timestep, latlon, waves, gsd = map(
+        lambda x: x.to(device), (cube, timestep, latlon, waves, gsd)
+    )
+    return cube, timestep, latlon, waves, gsd
+
+
+def main():
+    dl, metadata = load_batch()
+
+    # Fetch samples from the data loader
+    l8_c2l1 = next(dl)
+    l8_c2l2 = next(dl)
+    linz = next(dl)
+    naip = next(dl)
+    s1 = next(dl)
+    s2 = next(dl)
+
+    # Perform inference with the Clay encoder model
+    with torch.no_grad():
+        for sensor in (l8_c2l1, l8_c2l2, linz, naip, s1, s2):
+            # Load data and transfer to GPU
+            batch = prepare_data(sensor, metadata, torch.device("cuda"))
+
+            # Get patch embeddings from the encoder model
+            patch_embeddings, *_ = clay_encoder(*batch)
+
+            # Extract the class (CLS) embedding
+            cls_embedding = patch_embeddings[:, 0, :]
+
+            # Print the platform and the shape of the CLS embedding
+            print(sensor["platform"][0], cls_embedding.shape)
+
+
+if __name__ == "__main__":
+    main()