-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgoldilocks_dpm.py
More file actions
110 lines (88 loc) · 2.99 KB
/
goldilocks_dpm.py
File metadata and controls
110 lines (88 loc) · 2.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env python3
#
# Goldilocks-DPM framework
#
# Neil Oxtoby, UCL, 2023
#
from abc import ABC, abstractmethod
# from tqdm.auto import tqdm
import numpy as np
import scipy.stats as stats
from matplotlib import pyplot as plt
import statsmodels.api as sm
# import matplotlib.colors as mcolors
# from pathlib import Path
# import pickle
# import csv
# import os
# import multiprocessing
# from functools import partial, partialmethod
#
# import time
# import pathos
#= Abstractions: define your own implementations
#************
class dpm_data(ABC):
@abstractmethod
def __init__(self):
pass
@abstractmethod
def get_num_samples(self):
pass
@abstractmethod
def get_num_biomarkers(self):
pass
#************
class goldilocks_dpm(ABC):
def __init__(self,
dpmData,
classes,
output_folder,
robust_zscores = True,
case_label = 1,
ctrl_label = 0,
direction_abnormal = None,
biomarker_labels = None
):
# The initializer for the abstract class
# Parameters:
# dpmData - an instance of the dpm_data class
assert(isinstance(dpmData, dpm_data))
self.__dpmData = dpmData
self.classes = classes
self.output_folder = output_folder
self.robust_zscores = robust_zscores
self.case_label = case_label
self.ctrl_label = ctrl_label
self.direction_abnormal = direction_abnormal
self.biomarker_labels = biomarker_labels
def run_goldilocks(self, plot=True, plot_format="png", verbose=True, **kwargs):
pass
# ********************* STATIC METHODS
@staticmethod
def zscore_robust(X,y,ctrl_label,abnormal_direction):
c = y==ctrl_label
avg = np.nanmedian(X[c,:],axis=0)
spread = stats.median_abs_deviation(X[c,:],axis=0)
avg = np.tile(avg, (X.shape[0],1))
spread = np.tile(spread, (X.shape[0],1))
Z = abnormal_direction*(X - avg)/spread
return Z, avg, spread
@staticmethod
def zscore(X,y,ctrl_label,abnormal_direction):
c = y==ctrl_label
avg = np.nanmean(X[c,:],axis=0)
spread = np.std(X[c,:],axis=0)
avg = np.tile(avg, (X.shape[0],1))
spread = np.tile(spread, (X.shape[0],1))
Z = abnormal_direction*(X - avg)/spread
return Z, avg, spread
# Synthesize z-score data
# - Sample uniformly across multivariate space => should produce uniform staging in SuStaIn subtypes
# Start with default waypoints z=1,2,3
# Calculate biomarker Goldilocks Zones and new waypoints
# => z_min, z_max from Goldilocks event horizons
# => z_mid recommendations:
# 1. Geometric Midpoint
# 2. Vogel-style mixture modelling (as optional input parameter for the goldilocks() function)
# Move the waypoints and output as Z matrix for pySuStaIn