Flaw2Appaulse/utils.py at main · CUHK-AIM-Group/Flaw2Appaulse · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
import numpy as np
import torch
from medpy import metric
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import kl
from matplotlib import pyplot as plt
from scipy.optimize import linear_sum_assignment

def multi_annotator_loss(pred_masks, label_four, temperature=0.1, alpha=0.5):
    """
    多标注者损失函数，用于训练Cross-Attention与四个标签对应
    Args:
        pred_masks: 预测的掩码 (B, N, H, W) - N是预测数量
        label_four: 四个标注者的标签 (B, 4, H, W)
        temperature: 温度参数，控制匹配的软硬程度
        alpha: 平衡参数，控制匹配损失和分割损失的权重
    """
    B, N, H, W = pred_masks.shape
    _, num_annotators, _, _ = label_four.shape

    # 1. 计算每个预测与每个标注者的Dice系数
    dice_matrix = torch.zeros(B, N, num_annotators).to(pred_masks.device)

    for i in range(N):
        for j in range(num_annotators):
            pred = pred_masks[:, i]  # (B, H, W)
            label = label_four[:, j]  # (B, H, W)
            dice_matrix[:, i, j] = dice_score_cal(pred, label)

    # 2. 计算最优匹配（匈牙利算法）
    total_matching_loss = 0
    total_segmentation_loss = 0

    for b in range(B):
        # 获取当前样本的Dice矩阵
        dice_mat = dice_matrix[b]  # (N, 4)

        # 转换为成本矩阵（1 - Dice）
        cost_matrix = 1 - dice_mat

        # 使用匈牙利算法找到最优匹配
        from scipy.optimize import linear_sum_assignment
        row_ind, col_ind = linear_sum_assignment(cost_matrix.cpu().numpy())

        # 计算匹配损失
        matched_dice = dice_mat[row_ind, col_ind]
        matching_loss = 1 - matched_dice.mean()

        # 计算分割损失（与最佳匹配的标注者）
        best_annotator_idx = col_ind[matched_dice.argmax()]
        best_label = label_four[b, best_annotator_idx]

        # 计算与最佳标注者的分割损失
        seg_loss = 0
        for i in range(N):
            pred = pred_masks[b, i]
            seg_loss += (1 - dice_score_cal(pred, best_label))
        seg_loss /= N

        total_matching_loss += matching_loss
        total_segmentation_loss += seg_loss

    # 3. 计算一致性损失（预测之间的一致性）
    consistency_loss = 0
    for b in range(B):
        preds = pred_masks[b]  # (N, H, W)
        # 计算预测之间的平均Dice系数
        for i in range(N):
            for j in range(i+1, N):
                consistency_loss += (1 - dice_score_cal(preds[i], preds[j]))

    consistency_loss /= (B * N * (N-1) / 2)

    # 4. 组合损失
    total_loss = alpha * (total_matching_loss / B) + (1 - alpha) * (total_segmentation_loss / B) + 0.1 * consistency_loss

    return total_loss, {
        'matching_loss': (total_matching_loss / B).item(),
        'segmentation_loss': (total_segmentation_loss / B).item(),
        'consistency_loss': consistency_loss.item()
    }

def cross_attention_consistency_loss(enhanced_sparse, enhanced_dense, original_sparse, original_dense, label_four):
    """
    Cross-Attention一致性损失，确保增强后的特征与原始特征保持一致性
    """
    # 1. 特征一致性损失
    sparse_consistency = F.mse_loss(enhanced_sparse, original_sparse)
    dense_consistency = F.mse_loss(enhanced_dense.flatten(1), original_dense.flatten(1))

    # 2. 语义一致性损失（通过标签指导）
    semantic_consistency = 0
    for i in range(label_four.shape[1]):
        label = label_four[:, i]  # (B, H, W)
        # 计算增强特征与原始特征在标签区域的差异
        label_mask = label.unsqueeze(1)  # (B, 1, H, W)
        semantic_consistency += F.mse_loss(
            enhanced_dense * label_mask,
            original_dense * label_mask
        )
    semantic_consistency /= label_four.shape[1]

    return sparse_consistency + dense_consistency + 0.1 * semantic_consistency

def diversity_loss(pred_masks, temperature=0.1):
    """
    多样性损失，鼓励生成多样化的预测
    """
    B, N, H, W = pred_masks.shape

    # 计算预测之间的相似度
    similarity_matrix = torch.zeros(B, N, N).to(pred_masks.device)

    for i in range(N):
        for j in range(N):
            if i != j:
                similarity_matrix[:, i, j] = dice_score_cal(pred_masks[:, i], pred_masks[:, j])

    # 鼓励预测之间的多样性（降低相似度）
    diversity_loss = similarity_matrix.mean()

    return diversity_loss

class FocalLoss(nn.Module):
    def __init__(self, alpha=0.25, gamma=2, num_classes=3, size_average=True):
        super(FocalLoss, self).__init__()
        self.size_average = size_average
        if isinstance(alpha, list):
            assert len(alpha) == num_classes
            print(f'Focal loss alpha={alpha}, will assign alpha values for each class')
            self.alpha = torch.Tensor(alpha)
        else:
            assert alpha < 1
            print(f'Focal loss alpha={alpha}, will shrink the impact in background')
            self.alpha = torch.zeros(num_classes)
            self.alpha[0] = alpha
            self.alpha[1:] = 1 - alpha
        self.gamma = gamma
        self.num_classes = num_classes

    def forward(self, preds, labels):
        self.alpha = self.alpha.to(preds.device)
        preds = preds.permute(0, 2, 3, 1).contiguous()
        preds = preds.view(-1, preds.size(-1))
        B, H, W = labels.shape
        assert B * H * W == preds.shape[0]
        assert preds.shape[-1] == self.num_classes
        preds_logsoft = F.log_softmax(preds, dim=1)
        preds_softmax = torch.exp(preds_logsoft)

        preds_softmax = preds_softmax.gather(1, labels.view(-1, 1))
        preds_logsoft = preds_logsoft.gather(1, labels.view(-1, 1))
        alpha = self.alpha.gather(0, labels.view(-1))
        loss = -torch.mul(torch.pow((1 - preds_softmax), self.gamma), preds_logsoft)

        loss = torch.mul(alpha, loss.t())
        if self.size_average:
            loss = loss.mean()
        else:
            loss = loss.sum()
        return loss

class DiceLoss(nn.Module):
    def __init__(self, n_classes):
        super(DiceLoss, self).__init__()
        self.n_classes = n_classes

    def _one_hot_encoder(self, input_tensor):
        tensor_list = []
        for i in range(self.n_classes):
            temp_prob = input_tensor == i
            tensor_list.append(temp_prob.unsqueeze(1))
        output_tensor = torch.cat(tensor_list, dim=1)
        return output_tensor.float()

    def _dice_loss(self, score, target):
        target = target.float()
        smooth = 1e-5
        intersect = torch.sum(score * target)
        y_sum = torch.sum(target * target)
        z_sum = torch.sum(score * score)
        loss = (2 * intersect + smooth) / (z_sum + y_sum + smooth)
        loss = 1 - loss
        return loss

    def forward(self, inputs, target, weight=None, softmax=False):
        if softmax:
            inputs = torch.softmax(inputs, dim=1)
        target = self._one_hot_encoder(target)
        if weight is None:
            weight = [1] * self.n_classes
        assert inputs.size() == target.size(), 'predict {} & target {} shape do not match'.format(inputs.size(), target.size())
        class_wise_dice = []
        loss = 0.0
        for i in range(0, self.n_classes):
            dice = self._dice_loss(inputs[:, i], target[:, i])
            class_wise_dice.append(1.0 - dice.item())
            loss += dice * weight[i]
        return loss / self.n_classes

def calculate_metric_percase(pred, gt):
    pred[pred > 0] = 1
    gt[gt > 0] = 1
    if pred.sum() > 0 and gt.sum() > 0:
        dice = metric.binary.dc(pred, gt)
        hd95 = metric.binary.hd95(pred, gt)
        return dice, hd95
    elif pred.sum() > 0 and gt.sum() == 0:
        return 1, 0
    else:
        return 0, 0

def l2_regularisation(m):
    l2_reg = None
    for W in m.parameters():
        if l2_reg is None:
            l2_reg = W.norm(2)
        else:
            l2_reg = l2_reg + W.norm(2)
    return l2_reg

def iou_score_cal(prediction, groundtruth):
    prediction = prediction.detach().cpu().numpy()
    groundtruth = groundtruth.detach().cpu().numpy()
    intersection = np.logical_and(groundtruth, prediction)
    union = np.logical_or(groundtruth, prediction)
    if np.sum(union) == 0:
        return 1
    iou_score = np.sum(intersection) / np.sum(union)
    return iou_score

def mask_IoU(prediction, groundtruth):
    prediction = prediction.detach().cpu().numpy()
    groundtruth = groundtruth.detach().cpu().numpy()
    intersection = np.logical_and(groundtruth, prediction)
    union = np.logical_or(groundtruth, prediction)
    if np.sum(union) == 0:
        return 1
    iou_score = np.sum(intersection) / np.sum(union)
    return iou_score

def generalized_energy_distance_iou(predictions, masks):
    n = predictions.shape[0]
    m = masks.shape[0]
    d1 = d2 = d3 = 0
    for i in range(n):
        for j in range(m):
            d1 += (1 - mask_IoU(predictions[i], masks[j]))

    for i in range(n):
        for j in range(n):
            d2 += (1 - mask_IoU(predictions[i], predictions[j]))

    for i in range(m):
        for j in range(m):
            d3 += (1 - mask_IoU(masks[i], masks[j]))

    d1 = d1 * (2 / (n * m))
    d2 = d2 * (1 / (n * n))
    d3 = d3 * (1 / (m * m))

    ed = d1 - d2 - d3
    scores = mask_IoU(predictions[0], masks[0])

    return ed, scores

def dice_score_cal(pred, targs):
    pred = (pred > 0).float()
    intersection = (pred * targs).sum()
    union = pred.sum() + targs.sum()
    if union == 0:
        return 1.0
    dice_score = 2. * intersection / union
    return dice_score

def dice_coef_cal(output, target):
    smooth = 1e-5
    output = output.view(-1).data.cpu().numpy()
    target = target.view(-1).data.cpu().numpy()
    intersection = (output * target).sum()
    return (2. * intersection + smooth) / (output.sum() + target.sum() + smooth)

def iou(pred, true):
    pred_bool = pred.bool().detach().cpu()
    true_bool = true.bool().detach().cpu()
    intersection = (pred_bool & true_bool).float().sum()
    union = (pred_bool | true_bool).float().sum()
    if union == 0 and intersection == 0:
        return 1
    else:
        return intersection / union

def hm_iou_cal(preds, trues):
    num_preds = len(preds)
    num_trues = len(trues)
    cost_matrix = torch.zeros((num_preds, num_trues))
    for i, pred in enumerate(preds):
        for j, true in enumerate(trues):
            cost_matrix[i, j] = 1 - iou(pred, true)
    row_ind, col_ind = linear_sum_assignment(cost_matrix.numpy())
    matched_iou = [iou(preds[i], trues[j]) for i, j in zip(row_ind, col_ind)]
    avg_iou = torch.FloatTensor(matched_iou).mean().item()
    return avg_iou

def calculate_dice_loss(inputs, targets, num_masks=5):
    inputs = inputs.sigmoid()
    numerator = 2 * (inputs * targets).sum(-1)
    denominator = inputs.sum(-1) + targets.sum(-1)
    loss = 1 - (numerator + 1) / (denominator + 1)
    return loss.sum() / num_masks

def calculate_sigmoid_focal_loss(inputs, targets, num_masks=5, alpha: float = 0.25, gamma: float = 2):
    prob = inputs.sigmoid()
    ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none")
    p_t = prob * targets + (1 - prob) * (1 - targets)
    loss = ce_loss * ((1 - p_t) ** gamma)

    if alpha >= 0:
        alpha_t = alpha * targets + (1 - alpha) * (1 - targets)
        loss = alpha_t * loss

    return loss.mean(1).sum() / num_masks

def dice_max_cal1(pred_eval, label_four):
    dice_max = 0
    for i in range(pred_eval.shape[0]):
        dice_max_iter = 0
        for j in range(label_four.shape[0]):
            dice_score_iter = dice_score_cal(pred_eval[i].to(dtype=torch.float).squeeze().cpu().detach(), label_four[j].squeeze(0).cpu().detach())
            if j == 0:
                dice_max_iter = dice_score_iter
            else:
                if dice_score_iter > dice_max_iter:
                    dice_max_iter = dice_score_iter
        dice_max += dice_max_iter
    return dice_max / pred_eval.shape[0]

def dice_max_cal2(pred_eval, label_four):
    dice_max = -1
    for i in range(pred_eval.shape[0]):
        for j in range(label_four.shape[0]):
            dice_score_iter = dice_score_cal(pred_eval[i].to(dtype=torch.float).squeeze().cpu().detach(), label_four[j].squeeze(0).cpu().detach())
            if dice_score_iter > dice_max:
                dice_max = dice_score_iter
    return dice_max

def dice_avg_cal(pred_list, label_four):
    dice_all = 0
    pred_stack = torch.stack(pred_list)
    pred_avg = torch.mean(pred_stack, dim=0)
    pred_avg = (pred_avg > 0).cpu().detach()
    pred_avg = torch.where(pred_avg, torch.tensor(1), torch.tensor(0))

    for i in range(label_four.shape[0]):
        dice_score_iter = dice_score_cal(pred_avg.to(dtype=torch.float).squeeze().cpu().detach(), label_four[i].squeeze(0).cpu().detach())
        dice_all += dice_score_iter
    return dice_all / label_four.shape[0]

def kl_divergence(posterior_latent_space, prior_latent_space, analytic=True, calculate_posterior=False, z_posterior=None):
    if analytic:
        kl_div = kl.kl_divergence(posterior_latent_space, prior_latent_space)
    else:
        if calculate_posterior:
            z_posterior = posterior_latent_space.rsample()
        log_posterior_prob = posterior_latent_space.log_prob(z_posterior)
        log_prior_prob = prior_latent_space.log_prob(z_posterior)
        kl_div = log_posterior_prob - log_prior_prob
    return kl_div

def show_mask(mask, ax, color):
    h, w = mask.shape[-2:]
    color = np.array(color + [0.5])
    mask_image = np.zeros((h, w, 4))
    for i in range(3):
        mask_image[:, :, i] = mask.squeeze() * color[i]
    mask_image[:, :, 3] = (mask.squeeze() > 0) * color[3]
    ax.imshow(mask_image)

def show_box(box, ax, color):
    x0, y0 = box[0], box[1]
    w, h = box[2] - box[0], box[3] - box[1]
    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor=color, facecolor='none', lw=2))

def truncated_normal_(tensor, mean=0, std=1):
    size = tensor.shape
    tmp = tensor.new_empty(size + (4,)).normal_()
    valid = (tmp < 2) & (tmp > -2)
    ind = valid.max(-1, keepdim=True)[1]
    tensor.data.copy_(tmp.gather(-1, ind).squeeze(-1))
    tensor.data.mul_(std).add_(mean)

def init_weights(m):
    if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
        nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
        truncated_normal_(m.bias, mean=0, std=0.001)

def init_weights_orthogonal_normal(m):
    if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
        nn.init.orthogonal_(m.weight)
        truncated_normal_(m.bias, mean=0, std=0.001)