-
Notifications
You must be signed in to change notification settings - Fork 61
Expand file tree
/
Copy pathanalyzeSampling.m
More file actions
executable file
·173 lines (157 loc) · 5.89 KB
/
Copy pathanalyzeSampling.m
File metadata and controls
executable file
·173 lines (157 loc) · 5.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
function scores=analyzeSampling(Tex, df, solutionsA, solutionsB, printResults)
% analyzeSampling
% Compares the significance of change in flux between two conditions with
% the significance of change in gene expression
%
% Tex a vector of t-scores for the change in gene expression
% for each reaction. This score could be the Student t
% between the two conditions, or you can calculate it from
% a p-value (by computing the inverse of the so called error
% function). If you choose the second alternative you should
% be aware that the transcripts that increased in expression
% level should have positive values and those who decreased
% in expression level should have negative values (the
% p-values only tell you if the fluxes changed or not but
% not in which direction)
% df the degrees of freedom in the t-test
% solutionsA random solutions for the reference condition (as
% generated by randomSampling)
% solutionsB random solutions for the test condition (as generated
% by randomSampling)
% printResults prints the most significant reactions in each category
% (optional, default false)
%
% scores a Nx3 column matrix with the probabilities of a reaction:
% 1) changing both in flux and expression in the same direction
% 2) changing in expression but not in flux
% 3) changing in flux but not in expression or changing
% in opposed directions in flux and expression.
%
% Usage: scores=analyzeSampling(Tex, df, solutionsA, solutionsB, printResults)
if nargin<5
printResults=false;
end
nRxns=numel(Tex);
pM=zeros(nRxns,1);
pH=zeros(nRxns,1);
pR=zeros(nRxns,1);
%Check that the number of reactions is the same in both expression and flux
if nRxns~=size(solutionsA,1)
EM='The number of reactions must be the same in Tex as in solutionsA';
dispEM(EM);
end
%Get the Z-score and mean for the solutions
mA=mean(solutionsA,2);
mB=mean(solutionsB,2);
Zf=getFluxZ(solutionsA, solutionsB);
%Clear up the tex if there are elements that are NaN or +/- Inf.
I=isnan(Tex) | isinf(Tex);
if any(I)
EM='There are t-scores that are NaN or +/- Inf. These values are changed to 0.0';
dispEM(EM,false);
end
Tex(I)=0;
for i=1:nRxns
%Check how the flux has changed. The means are needed because to
%differentiate between a positive flux changing to a smaller flux and a
%negative flux changing to a more negative flux (which is a larger
%flux)
I=mB(i)/mA(i);
if I<0
pM(i)=erf(abs(Zf(i)));
pH(i)=(1-pM(i))*(2*tcdf(abs(Tex(i)),df)-1);
pR(i)=0;
else
if mB(i)<0
Zf(i)=Zf(i)*-1;
end
end
I=Zf(i)/Tex(i);
if I<0
pM(i)=erf(abs(Zf(i)));
pH(i)=(1-pM(i))*(2*tcdf(abs(Tex(i)),df)-1);
pR(i)=0;
else
pR(i)=erf(abs(Zf(i)))*(2*tcdf(abs(Tex(i)),df)-1);
pH(i)=(2*tcdf(abs(Tex(i)),df)-1)*(1-erf(abs(Zf(i))));
pM(i)=erf(abs(Zf(i)))*(1-(2*tcdf(abs(Tex(i)),df)-1));
end
end
scores=[pR pH pM];
if printResults==true
fprintf('TOP SCORING REACTIONS\n\n');
%The top 10 hits in the first category
[I, J]=sort(pR,'descend');
fprintf('Reactions which change both in flux and expression in the same direction\nReaction\tProbability\n');
for i=1:10
fprintf([num2str(J(i)) '\t' num2str(I(i)) '\n']);
end
%The top 10 hits in the first category
[I, J]=sort(pH,'descend');
fprintf('\nReactions which change in expression but not in flux\nReaction\tProbability\n');
for i=1:10
fprintf([num2str(J(i)) '\t' num2str(I(i)) '\n']);
end
%The top 10 hits in the first category
[I, J]=sort(pM,'descend');
fprintf('\nReactions which change in flux but not in expression, or in opposed directions in flux and expression\nReaction\tProbability\n');
for i=1:10
fprintf([num2str(J(i)) '\t' num2str(I(i)) '\n']);
end
end
end
function p = tcdf(t,v,uflag)
% TCDF Student's T cumulative distribution function (cdf).
%
% P = TCDF(X,V) computes the cdf for Student's T distribution
% with V degrees of freedom, at the values in X. V must be a
% scalar or have the same size as T.
%
% P = TCDF(X,V,'upper') computes it for the upper tail instead
% of the lower tail.
%
% This is an alternative to the TCDF function that is implemented
% in the Matlab statistics toolbox. This version originates from
% http://www.statsci.org/matlab/statbox.html and originally was called TP.
% It has been renamed to TCDF for drop-in compatibility with the Matlab
% version.
%
% Gordon Smyth, University of Queensland, gks@maths.uq.edu.au
% 3 Apr 97
%
% NaN compatible - Markus Bauer and Eric Maris, FCDC
% 27 Jan 2005
%
% fixed bug concerning NaN compatibility
% 21 Aug 2006, Markus Siegel
%
% added support for upper tail, see http://bugzilla.fieldtriptoolbox.org/show_bug.cgi?id=3045
% 13 Jan 2016, Robert Oostenveld
%
% taken from https://github.com/fieldtrip/fieldtrip/blob/master/external/stats/tcdf.m
% 05 May 2022
if v <= 0, error('Degrees of freedom must be positive.'); end
% resize v if necessary
if all(size(v)==1)
v = ones(size(t))*v;
end
%check for NaN's - don't do calculations on them, give those out as NaNs
if any( not(isfinite(t(:))) | not(isfinite(v(:))) )
sel = find(isfinite(t) & isfinite(v));
x=nan(size(t));
p=nan(size(t));
x(sel) = t(sel).^2 ./ (v(sel) + t(sel).^2) ;
p(sel) = 0.5 .* ( 1 + sign(t(sel)) .* betainc( x(sel), 0.5, 0.5*v(sel) ) );
else
x = t.^2 ./ (v + t.^2) ;
p = 0.5 .* ( 1 + sign(t) .* betainc( x, 0.5, 0.5*v ) );
end
if nargin==3
if strcmp(uflag, 'upper')
% compute it for the upper tail instead of the lower tail
p = 1-p;
else
error('incorrect specification of uflag');
end
end
end