forked from neurodata/r-mgc
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdiscrStat.R
More file actions
110 lines (104 loc) · 4.39 KB
/
discrStat.R
File metadata and controls
110 lines (104 loc) · 4.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#' Reliability Density Function
#'
#' A function for computing the reliability density function of a dataset.
#'
#' @param D \code{[n, n]} a distance matrix for n samples.
#' @param ids \code{[n]} a vector containing the label ids for each sample.
#' @return \code{[n]} vector of the reliability per sample.
#' @author Eric Bridgeford
discr.rdf <- function(D, ids) {
N <- dim(D)[1]
if (is.null((N))) {
stop('Invalid datatype for N')
}
uniqids <- unique(as.character(ids))
countvec <- vector(mode="numeric",length=length(uniqids))
for (i in 1:length(uniqids)) {
countvec[i] <- sum(uniqids[i] == ids) # total number of scans for the particular id
}
scans <- max(countvec) # assume that we will worst case have the most
rdf <- array(NaN, N*(scans-1)) # initialize empty ra
count <- 1
for (i in 1:N) {
ind <- which(ids[i] == ids) # all the indices that are the same subject, but different scan
for (j in ind) {
if (!isTRUE(all.equal(j, i))) { # if j != i, then we want j to have a close distance to i, and estimate where it ranks
di <- D[i,] # get the entire ra for the particular scan
di[ind] <- Inf # don't want to consider the particular scan itself
d <- D[i,j] # the distance between the particular scan of a subject and another scan of the subject
rdf[count] <- 1 - (sum(di[!is.nan(di)] < d) + 0.5*sum(di[!is.nan(di)] == d)) / (N-length(ind)) # 1 for less than, .5 if equal, then average
count <- count + 1
}
}
}
return(rdf[1:count-1]) # return only the occupied portion
}
#' Mean Normalized Rank
#'
#' A function for computing the mnr from an rdf.
#'
#' @param rdf \code{[n]} the reliability density function for \code{n} samples.
#' @param remove_outliers boolean indicating whether to ignore samples with rdf below a certain cutoff. Defaults to \code{FALSE}.
#' @param thresh the threshold below for \code{rdf} which to ignore samples. Defaults to \code{0}.
#' @param output a boolean indicating whether to ignore output. Defaults to \code{False}.
#' @return \code{discr} the discriminability statistic.
#' @author Eric Bridgeford
discr.mnr <- function(rdf, remove_outliers=FALSE, thresh=0, output=FALSE) {
if (remove_outliers) {
discr <- mean(rdf[which(rdf[!is.nan(rdf)] > thresh)]) # mean of the rdf
ol <- length(which(rdf<thresh))
if (output) {
print(paste('Graphs with reliability <',thresh,'(outliers):', ol))
}
} else {
ol <- 0
discr <- mean(rdf[!is.nan(rdf)])
}
nopair <- length(rdf[is.nan(rdf)])
if (output) {
print(paste('Graphs with unique ids:',nopair))
print(paste('Graphs available for reliability analysis:', length(rdf)-ol-nopair))
print(paste('discr:', discr))
}
return(discr)
}
#' Discriminability Statistic
#'
#' A function for computing the discriminability from a distance matrix and a set of associated labels.
#'
#' @param X is interpreted as:
#' \describe{
#' \item{a \code{[n x n]} distance matrix}{X is a square matrix with zeros on diagonal for \code{n} samples.}
#' \item{a \code{[n x d]} data matrix}{X is a data matrix with \code{n} samples in \code{d} dimensions.}
#' }
#' @param ids \code{[n]} a vector containing the labels for our \code{n} samples.
#' @param remove_outliers boolean indicating whether to ignore observations with rdf below a certain cutoff. Defaults to \code{FALSE}.
#' @param thresh the threshold below which to ignore observations. If thresh > 0, ignores observations where the rdf is < thresh in the discriminability computation. Defaults to \code{0}.
#' @param verbose a boolean indicating whether to print output. Defaults to \code{FALSE}.
#' @return discr the discriminability statistic.
#'
#' @section Details:
#' For more details see the help vignette:
#' \code{vignette("discriminability", package = "mgc")}
#'
#' @examples
#'
#' nsrc <- 5
#' nobs <- 10
#' d <- 20
#' set.seed(12345)
#' src_id <- array(1:nsrc)
#' labs <- sample(rep(src_id, nobs))
#' dat <- t(sapply(labs, function(lab) rnorm(d, mean=lab, sd=1)))
#' discr.stat(dat, labs)
#'
#' @author Eric Bridgeford
#' @export
discr.stat <- function(X, ids, remove_outliers=FALSE, thresh=0, verbose=FALSE) {
X <- as.matrix(X)
# Use the data size and diagonal element to determine if the given data is a distance matrix or not
if (nrow(X) != ncol(X) | sum(diag(X)^2) > 0){
X <- discr.distance(X)
}
return(discr.mnr(discr.rdf(X, ids), remove_outliers=remove_outliers, thresh=thresh, output=(verbose)))
}