diff --git a/vla/benchmarks/class_distribution_using_segmentation/DinoV2Features.py b/vla/benchmarks/class_distribution_using_segmentation/DinoV2Features.py new file mode 100644 index 0000000..bb1630c --- /dev/null +++ b/vla/benchmarks/class_distribution_using_segmentation/DinoV2Features.py @@ -0,0 +1,31 @@ +import cv2 +import torch +import numpy as np +import torchvision.transforms as T + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +# ===================================================================== +# Load DINOv2 +# ===================================================================== +dinov2 = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14').eval().to(device) + +# ===================================================================== +# Preprocessing +# ===================================================================== +preprocessor = T.Compose([ + T.ToTensor(), + T.Resize((504, 504)), + T.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), +]) + + +def extract_full_features(img): + with torch.no_grad(): + full_feats = dinov2.get_intermediate_layers(img, n=1)[0] + return full_feats + +class Dinov2Features_full: + def encode(self, img): + return extract_full_features(img) \ No newline at end of file diff --git a/vla/benchmarks/class_distribution_using_segmentation/__init__.py b/vla/benchmarks/class_distribution_using_segmentation/__init__.py new file mode 100644 index 0000000..294ff01 --- /dev/null +++ b/vla/benchmarks/class_distribution_using_segmentation/__init__.py @@ -0,0 +1 @@ +from .train import benchmark diff --git a/vla/benchmarks/class_distribution_using_segmentation/builders.py b/vla/benchmarks/class_distribution_using_segmentation/builders.py new file mode 100644 index 0000000..948817d --- /dev/null +++ b/vla/benchmarks/class_distribution_using_segmentation/builders.py @@ -0,0 +1,97 @@ +import json +import os +import random +from abc import ABC, abstractmethod +from pathlib import Path +from typing import List, Tuple, Dict, Any, Optional +from PIL import Image +import numpy as np +from tqdm import tqdm + +# Helpers + + +def safe_load_json(path: str) -> Optional[dict]: + if not os.path.exists(path): + return None + try: + with open(path, "r") as f: + return json.load(f) + except Exception: + return None + + +# Base Builder (Abstract) + + +class DatasetBuilder(ABC): + def __init__(self, directory: str, dataset_size: int = 200, train_split: float = 0.25, random_seed=None, full_folder=False): + self.directory = directory + self.dataset_size = dataset_size + self.train_split = train_split + self.random_seed = random_seed + self.full_folder = full_folder + if self.random_seed is not None: + random.seed(self.random_seed) + + @abstractmethod + def extract_samples(self) -> List[Tuple[List, str]]: + """ + Must return: + [([class_distribution_probs], image_path)] + """ + ... + + @abstractmethod + def build(self, json_name: str) -> Tuple[str, str, list, list]: + """ + Must return: + (train_json_path, test_json_path) + """ + ... + +# Class Distribution Builder + +class ClassDistributionDatasetBuilder(DatasetBuilder): + + def extract_samples(self) -> List[Tuple[List, str]]: + """Gather (class_probs, image_stem_path).""" + paths = [] + basepath = Path(self.directory) + images_subfolders = list(basepath.rglob("images")) + + for img_folder_path in images_subfolders: + distrs_path = img_folder_path.parent / "distributions" + images_folder = list(img_folder_path.rglob("*.png")) + for img_path in images_folder: + distr_path = (distrs_path / img_path.stem).with_suffix(".npy") + if not distr_path.exists(): + continue + paths.append((img_path, distr_path)) + return paths + + def build(self, json_name: str) -> Tuple[str, str, list, list]: + samples = self.extract_samples() + if not samples: + raise Exception("No valid samples for type dataset.") + + if self.full_folder: + train_split = 0 + else: + train_split = int(self.train_split * len(samples)) + + random.shuffle(samples) + + train_out = samples[:train_split] + test_out = samples[train_split:] + + if self.random_seed is not None: + return "", "", train_out, test_out + + train_file = f"train_{json_name}.json" + test_file = f"test_{json_name}.json" + + json.dump(train_out, open(train_file, "w")) + json.dump(test_out, open(test_file, "w")) + + return train_file, test_file, [], [] diff --git a/vla/benchmarks/class_distribution_using_segmentation/config.py b/vla/benchmarks/class_distribution_using_segmentation/config.py new file mode 100644 index 0000000..1d54b36 --- /dev/null +++ b/vla/benchmarks/class_distribution_using_segmentation/config.py @@ -0,0 +1,32 @@ +from enum import Enum +import warnings + +class ConfigPaths: + path_to_raw_data = None + feature_store_path = "./precomputed" + +class TrainConfig: + learning_rate = 1e-3 + momentum = 0.9 + epochs = 10 + output_activation = "none" # "none", "softmax", "sigmoid" + num_layers = 1 + layers_sizes = [512] + +def construct_configs(path_to_raw_data=None, feature_store_path=None, + learning_rate=1e-3, momentum=0.9, epochs=10, output_activation="none", num_layers=1, layers_sizes=[512]): + TrainConfig.learning_rate = learning_rate + TrainConfig.momentum = momentum + TrainConfig.epochs = epochs + TrainConfig.output_activation = output_activation + TrainConfig.num_layers = num_layers + TrainConfig.layers_sizes = layers_sizes + if not path_to_raw_data: + raise ValueError("Path to raw data must be set") + else: + ConfigPaths.path_to_raw_data = path_to_raw_data + if not feature_store_path: + warnings.warn("Feature store path is not set by config. If benchmark will be ran with use_precomputed_features " + "default folder ./precomputed will be used to store features") + else: + ConfigPaths.feature_store_path = feature_store_path \ No newline at end of file diff --git a/vla/benchmarks/class_distribution_using_segmentation/dataset.py b/vla/benchmarks/class_distribution_using_segmentation/dataset.py new file mode 100644 index 0000000..ac36877 --- /dev/null +++ b/vla/benchmarks/class_distribution_using_segmentation/dataset.py @@ -0,0 +1,34 @@ +import torch +from torch.utils.data import Dataset, DataLoader +import numpy as np + +from PIL import Image + +# PyTorch Dataset + + +class LOSSegmDataset(Dataset): + def __init__(self, items, transform=None, feature_store=None): + self.items = items + self.transform = transform + self.feature_store = feature_store + + def __len__(self): + return len(self.items) + + def load_image(self, img_path): + image = Image.open(img_path).convert("RGB") + + if self.transform: + image = self.transform(image) + return image + + def __getitem__(self, idx): + img_path, distr_path = self.items[idx] + distribution = torch.from_numpy(np.load(distr_path)) + feature_path = str(img_path).strip(".png").replace("/", "_") + if self.feature_store and self.feature_store.exists(feature_path): + features = self.feature_store.load(feature_path) + return distribution, features + + return distribution, self.load_image(img_path) diff --git a/vla/benchmarks/class_distribution_using_segmentation/example_config.json b/vla/benchmarks/class_distribution_using_segmentation/example_config.json new file mode 100644 index 0000000..f310443 --- /dev/null +++ b/vla/benchmarks/class_distribution_using_segmentation/example_config.json @@ -0,0 +1,10 @@ +{ + "path_to_raw_data" : "./2026_LOS_SEGM", + "feature_store_path" : "./precomputed", + "learning_rate" : 1e-3, + "momentum" : 0.9, + "epochs" : 20, + "output_activation" : "none", + "num_layers" : 2, + "layers_sizes" : [128, 128] +} diff --git a/vla/benchmarks/class_distribution_using_segmentation/features.py b/vla/benchmarks/class_distribution_using_segmentation/features.py new file mode 100644 index 0000000..fdd3b22 --- /dev/null +++ b/vla/benchmarks/class_distribution_using_segmentation/features.py @@ -0,0 +1,25 @@ +import os +import torch +from pathlib import Path +from tqdm import tqdm + +class FeatureStore: + """ + Saves and loads precomputed features to avoid running DINO inside dataloaders. + """ + + def __init__(self, root="./precomputed_features"): + self.root = Path(root) + self.root.mkdir(parents=True, exist_ok=True) + + def feature_path(self, img_stem: str) -> Path: + return self.root / f"{Path(img_stem).name}.pt" + + def exists(self, img_stem: str) -> bool: + return self.feature_path(img_stem).exists() + + def save(self, img_stem: str, tensor: torch.Tensor): + torch.save(tensor.cpu(), self.feature_path(img_stem)) + + def load(self, img_stem: str) -> torch.Tensor: + return torch.load(self.feature_path(img_stem)) \ No newline at end of file diff --git a/vla/benchmarks/class_distribution_using_segmentation/images_to_distribution.py b/vla/benchmarks/class_distribution_using_segmentation/images_to_distribution.py new file mode 100644 index 0000000..7a701d0 --- /dev/null +++ b/vla/benchmarks/class_distribution_using_segmentation/images_to_distribution.py @@ -0,0 +1,37 @@ +# This file is used to convert segmented images to npy files which contains class distribution +import numpy as np +from pathlib import Path +from PIL import Image + +paths = [] +unique_colors_list = [] +basepath = Path("./2026_LOS_SEGM") +segmentation_subfolders = list(basepath.rglob("segmentation")) + +stub = 0 + +for segm_path in segmentation_subfolders: + images_folder = list(segm_path.rglob("*.png")) + for segm_image_path in images_folder: + segm_img = np.array(Image.open(segm_image_path)) + unique = list(np.unique(segm_img.reshape(-1, segm_img.shape[2]), axis=0)) + for unique_item in unique: + unique_item = list(unique_item) + if unique_item not in unique_colors_list: + unique_colors_list.append(unique_item) + +unique_colors_dict = {str(value): index for index, value in enumerate(unique_colors_list)} +samples = [] +num_classes = len(unique_colors_dict) +for segm_path in segmentation_subfolders: + distribution_path = segm_path.parent / 'distributions' + distribution_path.mkdir(parents=True, exist_ok=True) + images_folder = list(segm_path.rglob("*.png")) + for segm_image_path in images_folder: + segm_img = np.array(Image.open(segm_image_path)) + class_probs = [0] * num_classes + color_list = list(segm_img.reshape(-1, segm_img.shape[2])) + for color in color_list: + class_probs[unique_colors_dict[str(list(color))]] += 1 + class_probs = np.array(class_probs) / len(color_list) + np.save(f'{str(distribution_path)}/{segm_image_path.stem}.npy', class_probs) diff --git a/vla/benchmarks/class_distribution_using_segmentation/manager.py b/vla/benchmarks/class_distribution_using_segmentation/manager.py new file mode 100644 index 0000000..707a800 --- /dev/null +++ b/vla/benchmarks/class_distribution_using_segmentation/manager.py @@ -0,0 +1,57 @@ +import json +from torch.utils.data import DataLoader +from builders import ClassDistributionDatasetBuilder +from dataset import LOSSegmDataset +import numpy as np + + +class DatasetManager: + def __init__(self, directory, dataset_size=200, train_split=0.25, + random_seed=None, full_folder=False): + self.random_seed = random_seed + + self.builder = ClassDistributionDatasetBuilder(directory, dataset_size=dataset_size, + train_split=train_split, random_seed=self.random_seed, + full_folder=full_folder) + + def create(self, name): + if self.random_seed is not None: + _, _, train_set, test_set = self.builder.build(name) + return train_set, test_set + train_file, test_file, [], [] = self.builder.build(name) + print(f"Train set: {train_file}") + print(f"Test set: {test_file}") + return train_file, test_file + + +def load_json(path): + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + + +def make_dataloaders(train_json, test_json, transform=None, feature_store=None, batch=32, workers=2, dataset_manager=None, + generalization_dataset_manager=None): + + if dataset_manager is not None: + train_items, test_items = dataset_manager.create("") + else: + train_items = load_json(train_json) + test_items = load_json(test_json) + + generalization_items = [] + if generalization_dataset_manager is not None: + _, generalization_items = generalization_dataset_manager.create("") + + nc = len(np.load(train_items[0][1])) + + train_ds = LOSSegmDataset(train_items, transform, feature_store) + test_ds = LOSSegmDataset(test_items, transform, feature_store) + generalization_ds = LOSSegmDataset(generalization_items, transform, feature_store) + + return ( + DataLoader(train_ds, batch_size=batch, shuffle=True, num_workers=workers), + DataLoader(test_ds, batch_size=batch, shuffle=False, num_workers=workers), + DataLoader(test_ds, batch_size=1, shuffle=False), + DataLoader(generalization_ds, batch_size=1, shuffle=False), + nc + ) diff --git a/vla/benchmarks/class_distribution_using_segmentation/metrics.py b/vla/benchmarks/class_distribution_using_segmentation/metrics.py new file mode 100644 index 0000000..ce400fb --- /dev/null +++ b/vla/benchmarks/class_distribution_using_segmentation/metrics.py @@ -0,0 +1,72 @@ +import torch +from tqdm import tqdm +import torch.nn.functional as F + +def infer_model(classifier, device, img, loader, model, ttype): + if loader.dataset.feature_store is None: + feats = model.encode(img.to(device)) + else: + feats = img.to(device) + feats = feats.to(ttype) + out = classifier(feats) + return out + +def score_loss(model, classifier, loader, device, ttype): + criterion = torch.nn.KLDivLoss(reduction='batchmean') + with torch.no_grad(): + sum_loss = 0 + for y, img in tqdm(loader): + out = infer_model(classifier, device, img, loader, model, ttype) + log_probs = F.log_softmax(out, dim=1) + loss = criterion(log_probs, y.to(device)) + sum_loss += loss.item() + sum_loss /= len(loader) + + print("Average loss:", sum_loss) + + +def symmetric_topk_mass_recall(logits, targets, k=10): + + probs = F.softmax(logits, dim=-1) + + topk_pred_idx = probs.topk(k, dim=-1).indices + target_mass_in_pred_topk = torch.gather(targets, dim=-1, index=topk_pred_idx) + recall_p_to_q = target_mass_in_pred_topk.sum(dim=-1) + + topk_target_idx = targets.topk(k, dim=-1).indices + pred_mass_in_target_topk = torch.gather(probs, dim=-1, index=topk_target_idx) + recall_q_to_p = pred_mass_in_target_topk.sum(dim=-1) + + symmetric_recall = 0.5 * (recall_p_to_q + recall_q_to_p) + + return symmetric_recall.mean() + +def symmetric_topk_recall_score(model, classifier, loader, device, ttype): + with torch.no_grad(): + average_recall = 0 + for y, img in tqdm(loader): + out = infer_model(classifier, device, img, loader, model, ttype).squeeze() + y = y.squeeze().to(device) + recall = symmetric_topk_mass_recall(out, y, k=17) + average_recall += recall.item() + average_recall /= len(loader) + print("Average symmetric top-k recall:", average_recall) + +def distribution_iou(logits, targets): + probs = F.softmax(logits, dim=-1) + + intersection = torch.minimum(probs, targets).sum(dim=-1) + union = torch.maximum(probs, targets).sum(dim=-1) + + return (intersection / (union + 1e-8)).mean() + +def IoU_score(model, classifier, loader, device, ttype): + with torch.no_grad(): + average_recall = 0 + for y, img in tqdm(loader): + out = infer_model(classifier, device, img, loader, model, ttype).squeeze() + y = y.squeeze().to(device) + recall = distribution_iou(out, y) + average_recall += recall.item() + average_recall /= len(loader) + print("Average IoU score:", average_recall) \ No newline at end of file diff --git a/vla/benchmarks/class_distribution_using_segmentation/model.py b/vla/benchmarks/class_distribution_using_segmentation/model.py new file mode 100644 index 0000000..3806053 --- /dev/null +++ b/vla/benchmarks/class_distribution_using_segmentation/model.py @@ -0,0 +1,57 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class SimpleClassifier(nn.Module): + def __init__( + self, + num_features, + output_size, + output_activation, + hidden_size=[512], + num_hidden_layers=1 + ): + """ + num_features — input feature size + output_size — number of classes OR 1 (regression) + hidden_size — size of each hidden layer + num_hidden_layers — 0, 1, 2, ... + """ + super().__init__() + + self.output_activation = output_activation + + layers = [] + + if num_hidden_layers == 0: + # direct linear mapping + layers.append(nn.Linear(num_features, output_size)) + else: + # first layer + layers.append(nn.Linear(num_features, hidden_size[0])) + layers.append(nn.ReLU()) + + # intermediate hidden layers + for i in range(num_hidden_layers - 1): + layers.append(nn.Linear(hidden_size[i], hidden_size[i+1])) + layers.append(nn.ReLU()) + + # output layer + layers.append(nn.Linear(hidden_size[-1], output_size)) + + self.model = nn.Sequential(*layers) + + def forward(self, x): + x = torch.flatten(x, 1) + x = self.model(x) + + # activation logic + if self.output_activation == "softmax": + return F.softmax(x, dim=1) + elif self.output_activation == "sigmoid": + return torch.sigmoid(x) + elif self.output_activation == "none": + return x + else: + raise ValueError(f"Unknown output activation: {self.output_activation}") \ No newline at end of file diff --git a/vla/benchmarks/class_distribution_using_segmentation/precompute.py b/vla/benchmarks/class_distribution_using_segmentation/precompute.py new file mode 100644 index 0000000..542ee1f --- /dev/null +++ b/vla/benchmarks/class_distribution_using_segmentation/precompute.py @@ -0,0 +1,30 @@ +import torch +from tqdm import tqdm + +def precompute_features(model, dataset, store, device): + """ + Run the model on all images once and cache features. + dataset.items = list of (label, path_stem) + """ + + # model = model.to(device) + # model.eval() + + print("\n[Precompute] Starting feature extraction…\n") + + with torch.no_grad(): + for img_path, _ in tqdm(dataset.items): + feature_path = str(img_path).strip(".png").replace("/", "_") + if store.exists(feature_path): + continue + + # load image + img = dataset.load_image(img_path).to(device) + + # run feature extractor + features = model.encode(img.unsqueeze(0)).squeeze() + + # save features + store.save(feature_path, features) + + print("\n[Precompute] Feature extraction complete.\n") diff --git a/vla/benchmarks/class_distribution_using_segmentation/test_dino_example.py b/vla/benchmarks/class_distribution_using_segmentation/test_dino_example.py new file mode 100644 index 0000000..fe8d8c8 --- /dev/null +++ b/vla/benchmarks/class_distribution_using_segmentation/test_dino_example.py @@ -0,0 +1,14 @@ +from DinoV2Features import Dinov2Features_full, preprocessor +from train import benchmark + + +if __name__ == "__main__": + + benchmark( + Dinov2Features_full(), + preprocessor, + random_seed=10, + generalization_set_folder="./2026_LOS_SEGM/Night_clear/Mountain_Range", + config_path="example_config.json", + use_precomputed_features=True + ) diff --git a/vla/benchmarks/class_distribution_using_segmentation/test_eagle_example.py b/vla/benchmarks/class_distribution_using_segmentation/test_eagle_example.py new file mode 100755 index 0000000..9d1b913 --- /dev/null +++ b/vla/benchmarks/class_distribution_using_segmentation/test_eagle_example.py @@ -0,0 +1,40 @@ +from transformers import AutoProcessor, AutoModel +import torch +import numpy as np +from train import benchmark + +def preprocess(frame): + frame = np.resize(np.asarray(frame).astype(np.float32), (504, 504, 3)) + frame = torch.tensor(frame) + return frame + +model_id = "./models/Eagle2.5-8B" + +class ModelWrapper: + def __init__(self): + self.model = None + self.processor = None + def encode(self, x): + # Load on demand + if self.model is None: + self.model = AutoModel.from_pretrained(model_id, + trust_remote_code=True, torch_dtype=torch.bfloat16) + #attn_implementation="sdpa") + self.model = self.model.to("cuda") + self.processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True, use_fast=True) + self.processor.tokenizer.padding_side = "left" + text_list = ['<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n<|im_end|>\n<|im_start|>assistant\n'] + inputs = self.processor(text=text_list, images=[x], return_tensors="pt", padding=True) + with torch.no_grad(): + features = self.model.vision_model(inputs['pixel_values']).last_hidden_state + return features + + +benchmark( + ModelWrapper(), + preprocess, + random_seed=10, + generalization_set_folder="./2026_LOS_SEGM/Night_clear/Mountain_Range", + config_path="example_config.json", + use_precomputed_features=False +) diff --git a/vla/benchmarks/class_distribution_using_segmentation/test_llava_example.py b/vla/benchmarks/class_distribution_using_segmentation/test_llava_example.py new file mode 100755 index 0000000..949bec4 --- /dev/null +++ b/vla/benchmarks/class_distribution_using_segmentation/test_llava_example.py @@ -0,0 +1,44 @@ +import torch +import numpy as np +from transformers import LlavaNextVideoProcessor, LlavaNextVideoForConditionalGeneration +from train import benchmark +import json +import os +os.environ["TOKENIZERS_PARALLELISM"] = "true" + +def preprocess(frame): + frame = np.resize(np.asarray(frame).astype(np.float32), (320, 240, 3)) + frame = torch.tensor(frame) + return frame + +model_id = "./models/LLaVA-NeXT-Video-7B-hf" + +class ModelWrapper: + def __init__(self): + self.model = None + self.processor = None + def encode(self, x): + # Load on demand + if self.model is None: + self.model = LlavaNextVideoForConditionalGeneration.from_pretrained( + model_id, + torch_dtype=torch.float16, + low_cpu_mem_usage=True, + ) + self.model = self.model.to("cuda") + self.processor = LlavaNextVideoProcessor.from_pretrained(model_id) + prompt = "USER: \nASSISTANT:" + inputs = self.processor(text=prompt, images=[x], padding=True, return_tensors="pt").to(self.model.device) + #inputs = inputs.to("cuda") + with torch.no_grad(): + features = torch.stack(self.model.get_image_features(inputs['pixel_values'], image_sizes=inputs['image_sizes'])) + return features + +benchmark( + ModelWrapper(), + preprocess, + random_seed=10, + generalization_set_folder="./2026_LOS_SEGM/Night_clear/Mountain_Range", + config_path="example_config.json", + use_precomputed_features=True +) diff --git a/vla/benchmarks/class_distribution_using_segmentation/test_resnet_example.py b/vla/benchmarks/class_distribution_using_segmentation/test_resnet_example.py new file mode 100644 index 0000000..9dda7dc --- /dev/null +++ b/vla/benchmarks/class_distribution_using_segmentation/test_resnet_example.py @@ -0,0 +1,37 @@ +from train import benchmark + +import numpy as np +import torchvision.transforms as T +import torchvision.models as models +import torch + +def preprocess(frame): + transform = T.Compose([ + T.ToTensor(), + T.Resize((640, 640)), + ]) + return transform(np.asarray(frame).astype(np.float32)) / 255.0 + +class ResnetWrapper: + def __init__(self): + self.model = None + def encode(self, x): + # Load on demand + if self.model is None: + self.model = models.resnet152(pretrained=True) + self.model.fc = torch.nn.Identity() + self.model.cuda().eval() + with torch.no_grad(): + features = self.model(x) + return features + +if __name__ == "__main__": + + benchmark( + ResnetWrapper(), + preprocess, + random_seed=10, + generalization_set_folder="./2026_LOS_SEGM/Night_clear/Mountain_Range", + config_path="example_config.json", + use_precomputed_features=True + ) \ No newline at end of file diff --git a/vla/benchmarks/class_distribution_using_segmentation/test_siglip_example.py b/vla/benchmarks/class_distribution_using_segmentation/test_siglip_example.py new file mode 100644 index 0000000..d955338 --- /dev/null +++ b/vla/benchmarks/class_distribution_using_segmentation/test_siglip_example.py @@ -0,0 +1,35 @@ +from train import benchmark + +import torch +from transformers import AutoModel, AutoProcessor + +ckpt = "./models/siglip2-so400m-patch14-384" + +processor = AutoProcessor.from_pretrained(ckpt) + +def preprocess(frame): + inputs = processor(images=[frame], return_tensors="pt").to("cuda") + return inputs.data["pixel_values"].squeeze() + +class SiglipWrapper: + def __init__(self): + self.model = None + def encode(self, x): + # Load on demand + if self.model is None: + self.model = AutoModel.from_pretrained(ckpt, device_map="auto").cuda().eval() + with torch.no_grad(): + features = self.model.get_image_features(x) + return features + + +if __name__ == "__main__": + + benchmark( + SiglipWrapper(), + preprocess, + random_seed=10, + generalization_set_folder="./2026_LOS_SEGM/Night_clear/Mountain_Range", + config_path="example_config.json", + use_precomputed_features=True + ) \ No newline at end of file diff --git a/vla/benchmarks/class_distribution_using_segmentation/test_yolo_example.py b/vla/benchmarks/class_distribution_using_segmentation/test_yolo_example.py new file mode 100644 index 0000000..77d47db --- /dev/null +++ b/vla/benchmarks/class_distribution_using_segmentation/test_yolo_example.py @@ -0,0 +1,34 @@ +from train import benchmark +from ultralytics import YOLO +import numpy as np +import torchvision.transforms as T +import torch + +def preprocess(frame): + transform = T.Compose([ + T.ToTensor(), + T.Resize((640, 640)) + ]) + return transform(np.asarray(frame).astype(np.float32)) / 255.0 + +class Yolo11Wrapper: + def __init__(self): + self.model = None + def encode(self, x): + # Load on demand + if self.model is None: + self.model = YOLO("./models/YOLO11x-cls/yolo11x-cls.pt").model.model[:10].to("cuda") + with torch.no_grad(): + features = self.model(x) + return features + +if __name__ == "__main__": + + benchmark( + Yolo11Wrapper(), + preprocess, + random_seed=10, + generalization_set_folder="./2026_LOS_SEGM/Night_clear/Mountain_Range", + config_path="example_config.json", + use_precomputed_features=True + ) \ No newline at end of file diff --git a/vla/benchmarks/class_distribution_using_segmentation/train.py b/vla/benchmarks/class_distribution_using_segmentation/train.py new file mode 100644 index 0000000..fd70659 --- /dev/null +++ b/vla/benchmarks/class_distribution_using_segmentation/train.py @@ -0,0 +1,190 @@ +import os +import json +import torch +from torch import nn +import torch.nn.functional as F +from tqdm import tqdm +from config import TrainConfig, ConfigPaths, construct_configs +from manager import DatasetManager, make_dataloaders +from model import SimpleClassifier +from features import FeatureStore +from precompute import precompute_features +from metrics import score_loss, IoU_score, symmetric_topk_recall_score + +ttype = torch.float32 + +def extract_features_size(model, train_loader, device): + for _, data in enumerate(train_loader, 0): + _, img = data + img = img[0, :, :, :].unsqueeze(0).to(device) + features = model.encode(img) + return features.flatten().size()[0] + + +def train_classifier( + model, + classifier, + train_loader, + test_loader, + device, + save_dir="./checkpoints" +): + os.makedirs(save_dir, exist_ok=True) + + optimizer = torch.optim.SGD( + classifier.parameters(), + lr=TrainConfig.learning_rate, + momentum=TrainConfig.momentum + ) + + criterion = nn.KLDivLoss(reduction='batchmean') + + best_loss = float("inf") + best_path = os.path.join(save_dir, "best_classifier.pt") + last_path = os.path.join(save_dir, "last_classifier.pt") + + for epoch in range(TrainConfig.epochs): + classifier.train() + total_loss = 0 + torch.cuda.empty_cache() + for labels, images in tqdm(train_loader, desc=f"Epoch {epoch+1}", leave=False): + images, labels = images.to(device), labels.to(ttype).to(device) + optimizer.zero_grad() + + if train_loader.dataset.feature_store is None: + feats = model.encode(images) + else: + feats = images + feats = feats.to(ttype) + out = classifier(feats) + + log_probs = F.log_softmax(out, dim=1) + loss = criterion(log_probs, labels) + loss.backward() + optimizer.step() + + total_loss += loss.item() + + optimizer.zero_grad() + torch.cuda.empty_cache() + avg_train = total_loss / len(train_loader) + print(f"[Epoch {epoch+1}] Train Loss: {avg_train}") + + if epoch % 5 == 0: + print(f"Testing, epoch {epoch+1}") + classifier.eval() + total_test = 0 + with torch.no_grad(): + for labels, images in test_loader: + images, labels = images.to(device), labels.to(device) + if test_loader.dataset.feature_store is None: + feats = model.encode(images) + else: + feats = images + feats = feats.to(ttype) + out = classifier(feats) + log_probs = F.log_softmax(out, dim=1) + total_test += criterion(log_probs, labels).item() + + avg_test = total_test / len(test_loader) + print(f"[Epoch {epoch+1}] Test Loss: {avg_test}") + + if avg_test < best_loss: + best_loss = avg_test + torch.save({ + "epoch": epoch, + "model_state": classifier.state_dict(), + "optimizer_state": optimizer.state_dict(), + "test_loss": best_loss + }, best_path) + print(f"Saved BEST checkpoint at epoch {epoch+1} → {best_path}") + + # Save last epoch (always) + torch.save({ + "epoch": epoch, + "model_state": classifier.state_dict(), + "optimizer_state": optimizer.state_dict(), + "test_loss": avg_test + }, last_path) + + print(f"Training finished. Best test loss = {best_loss}") + print(f"Best checkpoint: {best_path}") + print(f"Last checkpoint: {last_path}") + + return classifier + +def benchmark(model, preprocessor, train_json="train_los_dataset.json", test_json="test_los_dataset.json", + use_precomputed_features=True, random_seed=None, + generalization_set_folder="", config_path="example_config.json"): + + with open(config_path, 'r') as file: + config_dict = json.load(file) + construct_configs(**config_dict) + print(f"{TrainConfig.learning_rate=}; {TrainConfig.momentum=}; {TrainConfig.epochs=}; " + f"{TrainConfig.output_activation=}; {TrainConfig.num_layers=}; {TrainConfig.layers_sizes=}\n") + dataset_manager = None + if random_seed is not None: + dataset_manager = DatasetManager(ConfigPaths.path_to_raw_data, random_seed=random_seed) + + generalization_dataset_manager = None + if generalization_set_folder != "": + generalization_dataset_manager = DatasetManager(generalization_set_folder, random_seed=random_seed, + full_folder=True) + + store = FeatureStore(os.path.join(ConfigPaths.feature_store_path, type(model).__name__)) + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + train_loader, test_loader, score_loader, generalization_loader, num_classes = \ + make_dataloaders( + train_json, + test_json, + preprocessor, + feature_store=(store if use_precomputed_features else None), + workers=8, + dataset_manager=dataset_manager, + generalization_dataset_manager=generalization_dataset_manager, + batch=32 + ) + + if use_precomputed_features: + print("[Benchmark] Precomputing features...") + precompute_features( + model=model, + dataset=train_loader.dataset, + store=store, + device=device + ) + precompute_features( + model=model, + dataset=test_loader.dataset, + store=store, + device=device + ) + if generalization_dataset_manager is not None: + precompute_features( + model=model, + dataset=generalization_loader.dataset, + store=store, + device=device + ) + + if use_precomputed_features: + feat_dim = train_loader.dataset[0][1].flatten().shape[0] + else: + feat_dim = extract_features_size(model, train_loader, device) + + classifier = SimpleClassifier(feat_dim, num_classes, TrainConfig.output_activation, + num_hidden_layers=TrainConfig.num_layers, + hidden_size=TrainConfig.layers_sizes).to(device).to(ttype) + + classifier = train_classifier(model, classifier, train_loader, test_loader, device) + print("Computing score on test set:\n") + symmetric_topk_recall_score(model, classifier, score_loader, device, ttype) + IoU_score(model, classifier, score_loader, device, ttype) + score_loss(model, classifier, score_loader, device, ttype) + if generalization_dataset_manager is not None: + print("Computing score on generalization set:\n") + symmetric_topk_recall_score(model, classifier, generalization_loader, device, ttype) + IoU_score(model, classifier, generalization_loader, device, ttype) + score_loss(model, classifier, generalization_loader, device, ttype)