Skip to content

Commit 19d7eb1

Browse files
authored
Merge pull request #82 from netZoo/devel
Devel
2 parents fe08fd1 + 9d8718d commit 19d7eb1

9 files changed

+196
-574
lines changed

.travis.yml

+3-2
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ matrix:
3737
- octave -q --eval "pkg install io-2.4.12;pkg install nan-3.1.4;pkg install statistics-1.4.1;"
3838
- cd netZooM
3939
- os: osx
40+
osx_image: xcode9.3
4041
before_install:
4142
- brew update
4243
- brew install octave > /dev/null
@@ -48,8 +49,8 @@ matrix:
4849
- rm -rf MOcov;
4950
- git clone https://github.com/MOcov/MOcov.git;
5051
- make -C MOcov install; #Install Octave coverage report
51-
- wget https://github.com/arq5x/bedtools2/releases/download/v2.28.0/bedtools-2.28.0.tar.gz
52-
- tar -zxvf bedtools-2.28.0.tar.gz
52+
- curl -OL https://github.com/arq5x/bedtools2/releases/download/v2.28.0/bedtools-2.28.0.tar.gz
53+
- tar -xzf bedtools-2.28.0.tar.gz
5354
- cd bedtools2
5455
- make
5556
- cd ..

netZooM/panda/panda_run.m

+9-44
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
function AgNet=panda_run(lib_path, exp_file, motif_file, ppi_file, panda_out, save_temp, alpha, save_pairs)
1+
function AgNet=panda_run(lib_path, exp_file, motif_file, ppi_file, panda_out, save_temp, alpha, save_pairs, modeProcess)
22
% Description:
33
% Using PANDA to infer gene regulatory network.
44
% 1. Reading in input data (expression data, motif prior, TF PPI data)
@@ -22,6 +22,10 @@
2222
% save_pairs: (Optional) boolean parameter
2323
% 1: the final network will be saved .pairs format where each line has a TF-gene edge (Cytoscape compatible)
2424
% 0: the final network will not be saved in .pairs format
25+
% modeProcess: Refers to the procedure to filter input data.
26+
% 'legacy' old deprecated behavior of netZooM <= 0.5
27+
% (Default)'union' fills missing genes and TFs with zero rows
28+
% 'intersection' removes missing genes and TFs
2529
%
2630
% Outputs:
2731
% AgNet : Predicted TF-gene gene complete regulatory network using PANDA as a matrix of size (t,g).
@@ -38,6 +42,9 @@
3842
disp(datestr(now));
3943

4044
% Set default parameters
45+
if nargin < 9
46+
modeProcess='union';
47+
end
4148
if nargin < 8
4249
save_pairs=0;
4350
end
@@ -56,49 +63,7 @@
5663
%% ============================================================================
5764
%% Read in Data
5865
%% ============================================================================
59-
disp('Reading in expression data!');
60-
tic
61-
fid = fopen(exp_file, 'r');
62-
headings = fgetl(fid);
63-
n = length(regexp(headings, '\t'));
64-
frewind(fid);
65-
%Exp = textscan(fid, ['%s', repmat('%f', 1, n)], 'delimiter', '\t', 'CommentStyle', '#');
66-
Exp = textscan(fid, ['%s', repmat('%f', 1, n)], 'delimiter', '\t'); % tiny speed-up by not checking for comments
67-
fclose(fid);
68-
GeneNames = Exp{1};
69-
Exp = cat(2, Exp{2:end});
70-
[NumGenes, NumConditions] = size(Exp);
71-
fprintf('%d genes and %d conditions!\n', NumGenes, NumConditions);
72-
Exp = Exp'; % transpose expression matrix from gene-by-sample to sample-by-gene
73-
toc
74-
75-
disp('Reading in motif data!');
76-
tic
77-
[TF, gene, weight] = textread(motif_file, '%s%s%f');
78-
TFNames = unique(TF);
79-
NumTFs = length(TFNames);
80-
[~,i] = ismember(TF, TFNames);
81-
[~,j] = ismember(gene, GeneNames);
82-
RegNet = zeros(NumTFs, NumGenes);
83-
RegNet(sub2ind([NumTFs, NumGenes], i, j)) = weight;
84-
fprintf('%d TFs and %d edges!\n', NumTFs, length(weight));
85-
toc
86-
87-
disp('Reading in ppi data!');
88-
tic
89-
TFCoop = eye(NumTFs);
90-
if(~isempty(ppi_file))
91-
[TF1, TF2, weight] = textread(ppi_file, '%s%s%f');
92-
[~,i] = ismember(TF1, TFNames);
93-
[~,j] = ismember(TF2, TFNames);
94-
TFCoop(sub2ind([NumTFs, NumTFs], i, j)) = weight;
95-
TFCoop(sub2ind([NumTFs, NumTFs], j, i)) = weight;
96-
fprintf('%d PPIs!\n', length(weight));
97-
end
98-
toc
99-
100-
% Clean up variables to release memory
101-
clear headings n TF gene TF1 TF2 weight;
66+
[Exp,RegNet,TFCoop,TFNames,GeneNames]=processData(exp_file,motif_file,ppi_file,modeProcess);
10267

10368
%% ============================================================================
10469
%% Run PANDA

netZooM/tools/NormalizeNetwork.m

+17-3
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,21 @@
1111
% Auhtors:
1212
% Kimberley Glass
1313

14-
Z1 = zscore(X, 1, 1);
15-
Z2 = zscore(X, 1, 2);
16-
normMat = (Z1 + Z2) / sqrt(2);
14+
mu0=mean(X(:));
15+
std0=std(X(:));
16+
mu1=mean(X);
17+
std1=std(X,1);
18+
mu2=mean(X,2);
19+
std2=std(X,1,2);
20+
21+
Z1=(X-repmat(mu1, size(X,1), 1))./repmat(std1, size(X,1), 1);
22+
Z2=(X-repmat(mu2, 1, size(X,2)))./repmat(std2, 1, size(X,2));
23+
normMat=Z1/sqrt(2)+Z2/sqrt(2);
24+
25+
% checks and defaults for missing data
26+
Z0=(X-mu0)/std0;
27+
f1=isnan(Z1); f2=isnan(Z2);
28+
normMat(f1)=Z2(f1)/sqrt(2)+Z0(f1)/sqrt(2);
29+
normMat(f2)=Z1(f2)/sqrt(2)+Z0(f2)/sqrt(2);
30+
normMat(f1 & f2)=2*Z0(f1 & f2)/sqrt(2);
1731
end

netZooM/tools/processData.m

+146
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
function [Exp,RegNet,TFCoop,TFNames,GeneNames]=processData(exp_file,motif_file,ppi_file,modeProcess)
2+
% Description:
3+
% processData process the input data before running PANDA in
4+
% three different modes.
5+
%
6+
% Inputs:
7+
% exp_file : path to file containing gene expression as a matrix of size (g,g)
8+
% motif_file: path to file containing the prior TF-gene regulatory network based on TF motifs as a matrix of size (t,g)
9+
% ppi_file : path to file containing TF-TF interaction graph as a matrix of size (t,t)
10+
% modeProcess: 'legacy' old deprecated behavior of netZooM <= 0.5
11+
% (Default)'union' fills missing genes and TFs with zero rows
12+
% 'intersection' removes missing genes and TFs
13+
% Outputs:
14+
% Exp : aligned expression matrix
15+
% RegNet : aligned motif prior matrix
16+
% TFCoop : aligned PPI matrix
17+
% TFNames : transcirption factor names
18+
% GeneNames : gene names
19+
%
20+
% Author: Marouen Ben Guebila 12/2019
21+
if isequal(modeProcess,'legacy')
22+
disp('Reading in expression data!');
23+
tic
24+
fid = fopen(exp_file, 'r');
25+
headings = fgetl(fid);
26+
n = length(regexp(headings, '\t'));
27+
frewind(fid);
28+
%Exp = textscan(fid, ['%s', repmat('%f', 1, n)], 'delimiter', '\t', 'CommentStyle', '#');
29+
Exp = textscan(fid, ['%s', repmat('%f', 1, n)], 'delimiter', '\t'); % tiny speed-up by not checking for comments
30+
fclose(fid);
31+
GeneNames = Exp{1};
32+
Exp = cat(2, Exp{2:end});
33+
[NumGenes, NumConditions] = size(Exp);
34+
fprintf('%d genes and %d conditions!\n', NumGenes, NumConditions);
35+
Exp = Exp'; % transpose expression matrix from gene-by-sample to sample-by-gene
36+
toc
37+
38+
disp('Reading in motif data!');
39+
tic
40+
[TF, gene, weight] = textread(motif_file, '%s%s%f');
41+
TFNames = unique(TF);
42+
NumTFs = length(TFNames);
43+
[~,i] = ismember(TF, TFNames);
44+
[~,j] = ismember(gene, GeneNames);
45+
RegNet = zeros(NumTFs, NumGenes);
46+
RegNet(sub2ind([NumTFs, NumGenes], i, j)) = weight;
47+
fprintf('%d TFs and %d edges!\n', NumTFs, length(weight));
48+
toc
49+
50+
disp('Reading in ppi data!');
51+
tic
52+
TFCoop = eye(NumTFs);
53+
if(~isempty(ppi_file))
54+
[TF1, TF2, weight] = textread(ppi_file, '%s%s%f');
55+
[~,i] = ismember(TF1, TFNames);
56+
[~,j] = ismember(TF2, TFNames);
57+
TFCoop(sub2ind([NumTFs, NumTFs], i, j)) = weight;
58+
TFCoop(sub2ind([NumTFs, NumTFs], j, i)) = weight;
59+
fprintf('%d PPIs!\n', length(weight));
60+
end
61+
toc
62+
elseif isequal(modeProcess,'union')
63+
[GeneMotif,GeneNamesExp,TfMotif,TFNamesInit,NumConditions,...
64+
ExpInit,TF,gene,weightMotif,weightPPI,TF1,TF2]=...
65+
readData(exp_file,motif_file,ppi_file);
66+
GeneNames=unique(union(GeneMotif,GeneNamesExp));
67+
TFNames =unique(union(TfMotif,TFNamesInit));
68+
[Exp,RegNet,TFCoop]=populateData(GeneNames,TFNames,NumConditions,...
69+
GeneNamesExp,ExpInit,TF,gene,weightMotif,weightPPI,TF1,TF2);
70+
elseif isequal(modeProcess,'intersection')
71+
[GeneMotif,GeneNamesExp,TfMotif,TFNamesInit,NumConditions,...
72+
ExpInit,TF,gene,weightMotif,weightPPI,TF1,TF2]=...
73+
readData(exp_file,motif_file,ppi_file);
74+
GeneNames=intersect(GeneMotif,GeneNamesExp);
75+
TFNames =intersect(TfMotif,TFNamesInit);
76+
[Exp,RegNet,TFCoop]=populateData(GeneNames,TFNames,NumConditions,...
77+
GeneNamesExp,ExpInit,TF,gene,weightMotif,weightPPI,TF1,TF2);
78+
end
79+
end
80+
81+
function [Exp,RegNet,TFCoop]=populateData(GeneNames,TFNames,NumConditions,...
82+
GeneNamesExp,ExpInit,TF,gene,weightMotif,weightPPI,TF1,TF2)
83+
NumTFs=length(TFNames);NumGenes=length(GeneNames);
84+
%Initialize result
85+
RegNet = zeros(NumTFs,NumGenes);
86+
Exp = zeros(NumGenes,NumConditions);
87+
TFCoop = zeros(NumTFs,NumTFs);
88+
%Populate result
89+
%Gene expression
90+
[~,ig]= ismember(GeneNamesExp,GeneNames);
91+
Exp(find(ig),:)= ExpInit(find(ig),:);
92+
Exp = Exp';
93+
%Motif
94+
[~,i] = ismember(TF, TFNames);
95+
[~,j] = ismember(gene, GeneNames);
96+
indCommMotif = i&j;
97+
i = i(indCommMotif);
98+
j = j(indCommMotif);
99+
weightMotif = weightMotif(indCommMotif);
100+
RegNet(sub2ind([NumTFs, NumGenes], i, j)) = weightMotif;
101+
fprintf('%d TFs and %d edges!\n', NumTFs, length(weightMotif));
102+
%PPI
103+
[~,i] = ismember(TF1, TFNames);
104+
[~,j] = ismember(TF2, TFNames);
105+
indCommPPI = i&j;
106+
i = i(indCommPPI);
107+
j = j(indCommPPI);
108+
weightPPI = weightPPI(indCommPPI);
109+
TFCoop(sub2ind([NumTFs, NumTFs], i, j)) = weightPPI;
110+
TFCoop(sub2ind([NumTFs, NumTFs], j, i)) = weightPPI;
111+
end
112+
113+
function [GeneMotif,GeneNamesExp,TfMotif,TFNamesInit,NumConditions,...
114+
ExpInit,TF,gene,weightMotif,weightPPI,TF1,TF2]=...
115+
readData(exp_file,motif_file,ppi_file)
116+
% Read expression
117+
disp('Reading in expression data!');
118+
tic
119+
fid = fopen(exp_file, 'r');
120+
headings = fgetl(fid);
121+
n = length(regexp(headings, '\t'));
122+
frewind(fid);
123+
ExpInit = textscan(fid, ['%s', repmat('%f', 1, n)], 'delimiter', '\t'); % tiny speed-up by not checking for comments
124+
fclose(fid);
125+
GeneNamesExp = ExpInit{1};
126+
ExpInit = cat(2, ExpInit{2:end});
127+
[~, NumConditions] = size(ExpInit);
128+
toc
129+
if length(unique(GeneNamesExp)) ~= length(GeneNamesExp)
130+
error('There are duplicate genes in the expression matrix.')
131+
end
132+
% Read motif
133+
disp('Reading in motif data!');
134+
[TF, gene, weightMotif] = textread(motif_file, '%s%s%f');
135+
TfMotif = unique(TF);
136+
GeneMotif= unique(gene);
137+
% Read PPI
138+
disp('Reading in ppi data!');
139+
if(~isempty(ppi_file))
140+
[TF1, TF2, weightPPI] = textread(ppi_file, '%s%s%f');
141+
end
142+
TFNamesInit=unique(TF1);
143+
if ~isequal(TFNamesInit,unique(TF2))
144+
error('PPI data has missing information.')
145+
end
146+
end

0 commit comments

Comments
 (0)