1818import org .omixer .utils .Constants ;
1919
2020public class ModuleTaxonomyMatrixWriter extends MatrixWriter {
21-
22- public void writeMatrix (Map <String , Modules > moduleInference , File outfile , Function <Module , Double > f ) throws IOException {
21+
22+ /*
23+ * (non-Javadoc)
24+ * @see org.omixer.rpm.model.io.MatrixWriter#writeMatrix(java.util.Map, java.io.File, java.util.function.Function)
25+ */
26+ public void writeMatrix (Map <String , Modules > moduleInference , File outfile , Function <Module , Double > f )
27+ throws IOException {
2328 // Map of all observed combinations of taxa and modules
2429 Map <String , Set <String >> taxaModules = new HashMap <>();
25-
26- /**
27- * As the module space is very small i.e max 120 modules. For a 1000
28- * samples we have 120000 objects to store which is nothing. So add all
29- * the Modules to a list => put species_ko into hash => iterate and save
30- * sample/features
31- */
32- // generate the Observed taxonModules
30+ // Map of modules by taxon and moduleId for a quick lookup
31+ Map <String , Map <String , Module >> sampleTaxonModules = new HashMap <>();
32+ // generate the Observed taxonModules, as row names for the matrix
3333 for (Entry <String , Modules > entry : moduleInference .entrySet ()) {
34+ // map modules by taxon and moduleId
35+ Map <String , Module > taxonMods = new HashMap <>();
36+ sampleTaxonModules .put (entry .getKey (), taxonMods );
37+ // retain above cutoff modules
3438 List <Module > modules = entry .getValue ().toAboveCutoffList ();
35- // all modules are above cutoff and there is no need to
36- // filter them anymore
39+ // set the new modules
3740 entry .getValue ().setModules (modules );
3841 for (Module module : modules ) {
3942 String taxon = module .getTaxon ();
@@ -47,47 +50,119 @@ public void writeMatrix(Map<String, Modules> moduleInference, File outfile, Func
4750 taxaModules .put (taxon , taxonModules );
4851 }
4952 taxonModules .add (module .getModuleId ());
53+ taxonMods .put (taxon + module .getModuleId () , module );
5054 }
5155 }
5256
5357 /*
54- * The number of samples is known, so write header For each observed
55- * entry < find observed value
58+ * The number of samples is known, so write header For each observed entry <
59+ * find observed value
5660 */
5761 try (BufferedWriter out = new BufferedWriter (new FileWriter (outfile ))) {
5862 List <String > samples = moduleInference .keySet ().stream ().collect (Collectors .toList ());
5963 // output header
6064 String header = samples .stream ().reduce ("Taxon\t Module" , (a , b ) -> (a + Constants .TAB + b ));
6165 out .write (header + Constants .NEW_LINE );
62- /**
63- * Think of another way to optimize. - Could also reduce the search
64- * space after each iteration by removing matched object - Or sort
65- * and compare based on sort to ensure next objext is the closets to
66- * top object
67- */
68-
69- // output for each features
7066 // each taxon
67+ // TODO remove each entry after iteration
7168 for (Entry <String , Set <String >> taxonModules : taxaModules .entrySet ()) {
7269 // each module
7370 for (String observedModule : taxonModules .getValue ()) {
74- String outputString = taxonModules .getKey () + Constants .TAB + observedModule ;
71+ String countOutputString = taxonModules .getKey () + Constants .TAB + observedModule ;
7572 // each sample
76- for (Entry <String , Modules > entry : moduleInference .entrySet ()) {
73+ for (String sample : samples ) {
74+ Module module = sampleTaxonModules .get (sample ).remove (taxonModules .getKey () + observedModule );
7775 Double count = Constants .ZERO ;
78- for (Module module : entry .getValue ().getModules ()) {
79- if (taxonModules .getKey ().equals (module .getTaxon ())
80- && module .getModuleId ().equals (observedModule )) {
81- count = f .apply (module );
82- break ;
83- }
76+
77+ if (module != null ) {
78+ count = f .apply (module );
8479 }
85- outputString += Constants .TAB + count ;
80+ countOutputString += Constants .TAB + count ;
8681 }
87- out .write (outputString );
82+ out .write (countOutputString );
8883 out .newLine ();
8984 }
9085 }
9186 }
9287 }
88+
89+ /*
90+ * (non-Javadoc)
91+ *
92+ * @see org.omixer.rpm.model.io.MatrixWriter#exportModules(java.util.Map,
93+ * java.io.File, java.io.File)
94+ */
95+ @ Override
96+ public void exportModules (Map <String , Modules > moduleInference , File outCounts , File outCoverage )
97+ throws IOException {
98+ // Map of all observed combinations of taxa and modules
99+ Map <String , Set <String >> taxaModules = new HashMap <>();
100+ // Map of modules by taxon and moduleId for a quick lookup
101+ Map <String , Map <String , Module >> sampleTaxonModules = new HashMap <>();
102+ // generate the Observed taxonModules, as row names for the matrix
103+ for (Entry <String , Modules > entry : moduleInference .entrySet ()) {
104+ // map modules by taxon and moduleId
105+ Map <String , Module > taxonMods = new HashMap <>();
106+ sampleTaxonModules .put (entry .getKey (), taxonMods );
107+ // retain above cutoff modules
108+ List <Module > modules = entry .getValue ().toAboveCutoffList ();
109+ // set the new modules
110+ entry .getValue ().setModules (modules );
111+ for (Module module : modules ) {
112+ String taxon = module .getTaxon ();
113+ // make sure it is not null
114+ if (taxon == null ) {
115+ taxon = Constants .EMPTY_STRING ;
116+ }
117+ Set <String > taxonModules = taxaModules .get (taxon );
118+ if (taxonModules == null ) {
119+ taxonModules = new HashSet <>();
120+ taxaModules .put (taxon , taxonModules );
121+ }
122+ taxonModules .add (module .getModuleId ());
123+ taxonMods .put (taxon + module .getModuleId () , module );
124+ }
125+ }
126+
127+ /*
128+ * The number of samples is known, so write header For each observed entry <
129+ * find observed value
130+ */
131+ try (BufferedWriter countOut = new BufferedWriter (new FileWriter (outCounts ));
132+ BufferedWriter coverageOut = new BufferedWriter (new FileWriter (outCoverage ))) {
133+ List <String > samples = moduleInference .keySet ().stream ().collect (Collectors .toList ());
134+ // output header
135+ String header = samples .stream ().reduce ("Taxon\t Module" , (a , b ) -> (a + Constants .TAB + b ));
136+ countOut .write (header + Constants .NEW_LINE );
137+ coverageOut .write (header + Constants .NEW_LINE );
138+ // each taxon
139+ // TODO remove each entry after iteration
140+ for (Entry <String , Set <String >> taxonModules : taxaModules .entrySet ()) {
141+ // each module
142+ for (String observedModule : taxonModules .getValue ()) {
143+ String countOutputString = taxonModules .getKey () + Constants .TAB + observedModule ;
144+ String coverageOutputString = countOutputString ;
145+ // each sample
146+ for (String sample : samples ) {
147+ Module module = sampleTaxonModules .get (sample ).remove (taxonModules .getKey () + observedModule );
148+ Double count = Constants .ZERO ;
149+ Double coverage = Constants .ZERO ;
150+
151+ if (module != null ) {
152+ count = module .getCount ();
153+ coverage = module .getCoverage ();
154+ }
155+
156+ countOutputString += Constants .TAB + count ;
157+ coverageOutputString += Constants .TAB + coverage ;
158+ }
159+ countOut .write (countOutputString );
160+ countOut .newLine ();
161+
162+ coverageOut .write (coverageOutputString );
163+ coverageOut .newLine ();
164+ }
165+ }
166+ }
167+ }
93168}
0 commit comments