vision/model_vet.py at master · crasanders/vision · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import pickle
import numpy as np
import pandas as pd
import itertools
from scipy.spatial.distance import cdist
from scipy.special import logsumexp

with open('resnet50_features_cleaned_scaled.pkl', 'rb') as file:
    clean_features = pickle.load(file)

with open('resnet50_features_scaled.pkl', 'rb') as file:
    unclean_features = pickle.load(file)

features = {True: unclean_features, False: clean_features}


def model(row, layer, metric, c, background, aggregation):
    trial_exemplars = []
    for j in range(1, 7):
        ex = row['exemplar{}'.format(j)]
        ex_features = features[background][ex][layer]
        trial_exemplars.append(ex_features)
    trial_exemplars = np.array(trial_exemplars)

    if aggregation == 'none':
        pass
    if aggregation == 'mean':
        trial_exemplars = trial_exemplars.mean(axis=0).reshape(1, -1)
    if aggregation == 'sum':
        trial_exemplars = trial_exemplars.sum(axis=0).reshape(1, -1)
    if aggregation == 'max':
        trial_exemplars = trial_exemplars.max(axis=0).reshape(1, -1)

    target = row['tarname']
    dist1 = row['dist1name']
    dist2 = row['dist2name']

    tarloc = row['tarloc'] - 1

    target_features = features[background][target][layer]
    dist1_features = features[background][dist1][layer]
    dist2_features = features[background][dist2][layer]

    trial_choices = [dist1_features, dist2_features]
    trial_choices.insert(tarloc, target_features)

    distances = cdist(trial_exemplars, trial_choices, metric=metric)
    # sims = np.exp(-c * distances)
    # sumsims = sims.sum(axis=0)
    # probs = sumsims / sumsims.sum()

    scaled_dist = distances * -c
    probs = np.exp(logsumexp(scaled_dist, axis=0) - logsumexp(scaled_dist))

    responses = np.array(row['resp1':'resp3'])
    # nll = -np.sum(np.log(probs) * responses)
    acc = probs[tarloc]
    det_acc = np.argmax(probs) == tarloc

    result = dict(row)
    result.update(
        {'Distance{},{}'.format(i + 1, j + 1): distances[i][j] for i in range(distances.shape[0]) for j in range(3)})
    # result.update({'Similarity{},{}'.format(i + 1, j + 1): sims[i][j] for i in range(sims.shape[0]) for j in range(3)})
    # result.update({'SumSim{}'.format(i + 1): sumsims[i] for i in range(3)})
    result.update({'Prob{}'.format(i + 1): probs[i] for i in range(3)})
    # result.update({'nll': nll, 'prob_acc': acc, 'det_acc': det_acc})
    result.update({'prob_acc': acc, 'det_acc': det_acc})
    result.update({'Layer': layer, 'Metric': metric, 'Aggregation': aggregation, 'c': c, 'log_c': np.log10(c)})
    result.update({'Background': background})

    return result


vet_trials = pd.read_csv('vet_trials.csv').query('Task == "Different"')

layers = range(50)
metrics = ['euclidean', 'cityblock', 'cosine', 'jaccard']
aggregations = ['none', 'mean', 'max']
cs = np.logspace(0, 3, 20)
backgrounds = [True, False]

nrows = len(layers) * len(metrics) * len(cs) * len(backgrounds) * len(aggregations) * len(vet_trials)

results = []
for layer, metric, c, background, aggregation in itertools.product(layers, metrics, cs, backgrounds, aggregations):
    print(layer, metric, c, background, aggregation)
    for i, row in vet_trials.iterrows():
        result = model(row, layer, metric, c, background, aggregation)
        results.append(result)

results = pd.DataFrame(results)
for col in results.columns:
    if results[col].dtype == 'object':
        results[col] = results[col].astype('category')

results.to_pickle('model_simulations.pkl')

# results.to_csv('model_simulations.csv', index=False)