Skip to content

Commit 414d56c

Browse files
authored
Merge pull request #21 from DaddyWesker/main
Class distribution benchmark added
2 parents 904c6ce + d2dd603 commit 414d56c

19 files changed

+877
-0
lines changed
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import cv2
2+
import torch
3+
import numpy as np
4+
import torchvision.transforms as T
5+
6+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
7+
8+
# =====================================================================
9+
# Load DINOv2
10+
# =====================================================================
11+
dinov2 = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14').eval().to(device)
12+
13+
# =====================================================================
14+
# Preprocessing
15+
# =====================================================================
16+
preprocessor = T.Compose([
17+
T.ToTensor(),
18+
T.Resize((504, 504)),
19+
T.Normalize(mean=[0.485, 0.456, 0.406],
20+
std=[0.229, 0.224, 0.225]),
21+
])
22+
23+
24+
def extract_full_features(img):
25+
with torch.no_grad():
26+
full_feats = dinov2.get_intermediate_layers(img, n=1)[0]
27+
return full_feats
28+
29+
class Dinov2Features_full:
30+
def encode(self, img):
31+
return extract_full_features(img)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .train import benchmark
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import json
2+
import os
3+
import random
4+
from abc import ABC, abstractmethod
5+
from pathlib import Path
6+
from typing import List, Tuple, Dict, Any, Optional
7+
from PIL import Image
8+
import numpy as np
9+
from tqdm import tqdm
10+
11+
# Helpers
12+
13+
14+
def safe_load_json(path: str) -> Optional[dict]:
15+
if not os.path.exists(path):
16+
return None
17+
try:
18+
with open(path, "r") as f:
19+
return json.load(f)
20+
except Exception:
21+
return None
22+
23+
24+
# Base Builder (Abstract)
25+
26+
27+
class DatasetBuilder(ABC):
28+
def __init__(self, directory: str, dataset_size: int = 200, train_split: float = 0.25, random_seed=None, full_folder=False):
29+
self.directory = directory
30+
self.dataset_size = dataset_size
31+
self.train_split = train_split
32+
self.random_seed = random_seed
33+
self.full_folder = full_folder
34+
if self.random_seed is not None:
35+
random.seed(self.random_seed)
36+
37+
@abstractmethod
38+
def extract_samples(self) -> List[Tuple[List, str]]:
39+
"""
40+
Must return:
41+
[([class_distribution_probs], image_path)]
42+
"""
43+
...
44+
45+
@abstractmethod
46+
def build(self, json_name: str) -> Tuple[str, str, list, list]:
47+
"""
48+
Must return:
49+
(train_json_path, test_json_path)
50+
"""
51+
...
52+
53+
# Class Distribution Builder
54+
55+
class ClassDistributionDatasetBuilder(DatasetBuilder):
56+
57+
def extract_samples(self) -> List[Tuple[List, str]]:
58+
"""Gather (class_probs, image_stem_path)."""
59+
paths = []
60+
basepath = Path(self.directory)
61+
images_subfolders = list(basepath.rglob("images"))
62+
63+
for img_folder_path in images_subfolders:
64+
distrs_path = img_folder_path.parent / "distributions"
65+
images_folder = list(img_folder_path.rglob("*.png"))
66+
for img_path in images_folder:
67+
distr_path = (distrs_path / img_path.stem).with_suffix(".npy")
68+
if not distr_path.exists():
69+
continue
70+
paths.append((img_path, distr_path))
71+
return paths
72+
73+
def build(self, json_name: str) -> Tuple[str, str, list, list]:
74+
samples = self.extract_samples()
75+
if not samples:
76+
raise Exception("No valid samples for type dataset.")
77+
78+
if self.full_folder:
79+
train_split = 0
80+
else:
81+
train_split = int(self.train_split * len(samples))
82+
83+
random.shuffle(samples)
84+
85+
train_out = samples[:train_split]
86+
test_out = samples[train_split:]
87+
88+
if self.random_seed is not None:
89+
return "", "", train_out, test_out
90+
91+
train_file = f"train_{json_name}.json"
92+
test_file = f"test_{json_name}.json"
93+
94+
json.dump(train_out, open(train_file, "w"))
95+
json.dump(test_out, open(test_file, "w"))
96+
97+
return train_file, test_file, [], []
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from enum import Enum
2+
import warnings
3+
4+
class ConfigPaths:
5+
path_to_raw_data = None
6+
feature_store_path = "./precomputed"
7+
8+
class TrainConfig:
9+
learning_rate = 1e-3
10+
momentum = 0.9
11+
epochs = 10
12+
output_activation = "none" # "none", "softmax", "sigmoid"
13+
num_layers = 1
14+
layers_sizes = [512]
15+
16+
def construct_configs(path_to_raw_data=None, feature_store_path=None,
17+
learning_rate=1e-3, momentum=0.9, epochs=10, output_activation="none", num_layers=1, layers_sizes=[512]):
18+
TrainConfig.learning_rate = learning_rate
19+
TrainConfig.momentum = momentum
20+
TrainConfig.epochs = epochs
21+
TrainConfig.output_activation = output_activation
22+
TrainConfig.num_layers = num_layers
23+
TrainConfig.layers_sizes = layers_sizes
24+
if not path_to_raw_data:
25+
raise ValueError("Path to raw data must be set")
26+
else:
27+
ConfigPaths.path_to_raw_data = path_to_raw_data
28+
if not feature_store_path:
29+
warnings.warn("Feature store path is not set by config. If benchmark will be ran with use_precomputed_features "
30+
"default folder ./precomputed will be used to store features")
31+
else:
32+
ConfigPaths.feature_store_path = feature_store_path
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import torch
2+
from torch.utils.data import Dataset, DataLoader
3+
import numpy as np
4+
5+
from PIL import Image
6+
7+
# PyTorch Dataset
8+
9+
10+
class LOSSegmDataset(Dataset):
11+
def __init__(self, items, transform=None, feature_store=None):
12+
self.items = items
13+
self.transform = transform
14+
self.feature_store = feature_store
15+
16+
def __len__(self):
17+
return len(self.items)
18+
19+
def load_image(self, img_path):
20+
image = Image.open(img_path).convert("RGB")
21+
22+
if self.transform:
23+
image = self.transform(image)
24+
return image
25+
26+
def __getitem__(self, idx):
27+
img_path, distr_path = self.items[idx]
28+
distribution = torch.from_numpy(np.load(distr_path))
29+
feature_path = str(img_path).strip(".png").replace("/", "_")
30+
if self.feature_store and self.feature_store.exists(feature_path):
31+
features = self.feature_store.load(feature_path)
32+
return distribution, features
33+
34+
return distribution, self.load_image(img_path)
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"path_to_raw_data" : "./2026_LOS_SEGM",
3+
"feature_store_path" : "./precomputed",
4+
"learning_rate" : 1e-3,
5+
"momentum" : 0.9,
6+
"epochs" : 20,
7+
"output_activation" : "none",
8+
"num_layers" : 2,
9+
"layers_sizes" : [128, 128]
10+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import os
2+
import torch
3+
from pathlib import Path
4+
from tqdm import tqdm
5+
6+
class FeatureStore:
7+
"""
8+
Saves and loads precomputed features to avoid running DINO inside dataloaders.
9+
"""
10+
11+
def __init__(self, root="./precomputed_features"):
12+
self.root = Path(root)
13+
self.root.mkdir(parents=True, exist_ok=True)
14+
15+
def feature_path(self, img_stem: str) -> Path:
16+
return self.root / f"{Path(img_stem).name}.pt"
17+
18+
def exists(self, img_stem: str) -> bool:
19+
return self.feature_path(img_stem).exists()
20+
21+
def save(self, img_stem: str, tensor: torch.Tensor):
22+
torch.save(tensor.cpu(), self.feature_path(img_stem))
23+
24+
def load(self, img_stem: str) -> torch.Tensor:
25+
return torch.load(self.feature_path(img_stem))
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# This file is used to convert segmented images to npy files which contains class distribution
2+
import numpy as np
3+
from pathlib import Path
4+
from PIL import Image
5+
6+
paths = []
7+
unique_colors_list = []
8+
basepath = Path("./2026_LOS_SEGM")
9+
segmentation_subfolders = list(basepath.rglob("segmentation"))
10+
11+
stub = 0
12+
13+
for segm_path in segmentation_subfolders:
14+
images_folder = list(segm_path.rglob("*.png"))
15+
for segm_image_path in images_folder:
16+
segm_img = np.array(Image.open(segm_image_path))
17+
unique = list(np.unique(segm_img.reshape(-1, segm_img.shape[2]), axis=0))
18+
for unique_item in unique:
19+
unique_item = list(unique_item)
20+
if unique_item not in unique_colors_list:
21+
unique_colors_list.append(unique_item)
22+
23+
unique_colors_dict = {str(value): index for index, value in enumerate(unique_colors_list)}
24+
samples = []
25+
num_classes = len(unique_colors_dict)
26+
for segm_path in segmentation_subfolders:
27+
distribution_path = segm_path.parent / 'distributions'
28+
distribution_path.mkdir(parents=True, exist_ok=True)
29+
images_folder = list(segm_path.rglob("*.png"))
30+
for segm_image_path in images_folder:
31+
segm_img = np.array(Image.open(segm_image_path))
32+
class_probs = [0] * num_classes
33+
color_list = list(segm_img.reshape(-1, segm_img.shape[2]))
34+
for color in color_list:
35+
class_probs[unique_colors_dict[str(list(color))]] += 1
36+
class_probs = np.array(class_probs) / len(color_list)
37+
np.save(f'{str(distribution_path)}/{segm_image_path.stem}.npy', class_probs)
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import json
2+
from torch.utils.data import DataLoader
3+
from builders import ClassDistributionDatasetBuilder
4+
from dataset import LOSSegmDataset
5+
import numpy as np
6+
7+
8+
class DatasetManager:
9+
def __init__(self, directory, dataset_size=200, train_split=0.25,
10+
random_seed=None, full_folder=False):
11+
self.random_seed = random_seed
12+
13+
self.builder = ClassDistributionDatasetBuilder(directory, dataset_size=dataset_size,
14+
train_split=train_split, random_seed=self.random_seed,
15+
full_folder=full_folder)
16+
17+
def create(self, name):
18+
if self.random_seed is not None:
19+
_, _, train_set, test_set = self.builder.build(name)
20+
return train_set, test_set
21+
train_file, test_file, [], [] = self.builder.build(name)
22+
print(f"Train set: {train_file}")
23+
print(f"Test set: {test_file}")
24+
return train_file, test_file
25+
26+
27+
def load_json(path):
28+
with open(path, "r", encoding="utf-8") as f:
29+
return json.load(f)
30+
31+
32+
def make_dataloaders(train_json, test_json, transform=None, feature_store=None, batch=32, workers=2, dataset_manager=None,
33+
generalization_dataset_manager=None):
34+
35+
if dataset_manager is not None:
36+
train_items, test_items = dataset_manager.create("")
37+
else:
38+
train_items = load_json(train_json)
39+
test_items = load_json(test_json)
40+
41+
generalization_items = []
42+
if generalization_dataset_manager is not None:
43+
_, generalization_items = generalization_dataset_manager.create("")
44+
45+
nc = len(np.load(train_items[0][1]))
46+
47+
train_ds = LOSSegmDataset(train_items, transform, feature_store)
48+
test_ds = LOSSegmDataset(test_items, transform, feature_store)
49+
generalization_ds = LOSSegmDataset(generalization_items, transform, feature_store)
50+
51+
return (
52+
DataLoader(train_ds, batch_size=batch, shuffle=True, num_workers=workers),
53+
DataLoader(test_ds, batch_size=batch, shuffle=False, num_workers=workers),
54+
DataLoader(test_ds, batch_size=1, shuffle=False),
55+
DataLoader(generalization_ds, batch_size=1, shuffle=False),
56+
nc
57+
)

0 commit comments

Comments
 (0)