Skip to content

Commit 969a326

Browse files
authored
Merge pull request #2 from raeslab/develop
- Addition of abundance based on ortholog Sum or Minimum - Dramatic performance and slight memory improvements (~30 folds for large datasets, from 50 minutes to 1.5)
2 parents f93be53 + c6c5a62 commit 969a326

18 files changed

+444
-157
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ java -jar [omixer-rpm.jar](../../releases/latest) [-a <ANNOTATION>] [-c <COVERA
2424
the number of observed reactions
2525
-o,--output-dir <DIRECTORY> Path to the output directory
2626
-s,--score-estimator <SCORE-ESTIMATOR> The score estimatore.
27-
Accepted values are [median|average].
27+
Accepted values are [median|average|sum|min].
2828
Defaults to median
2929
-t,--threads <THREADS> Number of threads to use when mapping the modules.
3030
Defaults to 1

build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ plugins {
99
}
1010

1111
group = "org.omixer"
12-
version = '1.0'
12+
version = '1.1'
1313

1414
repositories {
1515
jcenter()

src/main/java/org/omixer/rpm/core/InferenceApp.java

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.omixer.rpm.model.ModuleCoverageDistribution;
2222
import org.omixer.rpm.model.ModuleInferenceOptions;
2323
import org.omixer.rpm.model.Modules;
24+
import org.omixer.rpm.model.enums.ModuleInferenceOptimizers;
2425
import org.omixer.rpm.model.enums.ScalingMethod;
2526
import org.omixer.rpm.model.io.MatrixWriter;
2627
import org.omixer.rpm.model.io.ModuleMatrixWriter;
@@ -41,7 +42,7 @@ public class InferenceApp extends AbstractInferenceApp {
4142
public static final String EXEC_COMMAND = "java -jar " + TOOL_NAME + " ";
4243
public static final String HEADER = "\n\nDESCRIPTION\n"
4344
+ " Omixer-RPM\n A Reference Pathways Mapper for turning metagenomic functional profiles into pathway/module profiles\n\n"
44-
+ "VERSION: 1.0 (13 June 2018)\n" + "AUTHOR: Youssef Darzi <[email protected]>\n\n"
45+
+ "VERSION: 1.1\n" + "AUTHOR: Youssef Darzi <[email protected]>\n\n"
4546
+ "ARGUMENTS (Options starting with -X are non-standard and subject to change without notice.)\n\n";
4647

4748
public static final String FOOTER = "\nLicensed under an Academic Non-commercial Software License Agreement, https://github.com/raeslab/omixer-rpm/blob/master/LICENSE";
@@ -58,7 +59,7 @@ public static void main(String[] args) {
5859
.hasArg().argName("COVERAGE").build());
5960

6061
options.addOption(Option.builder("s").longOpt("score-estimator")
61-
.desc("The score estimatore.\nAccepted values are [median|average].\nDefaults to median").hasArg()
62+
.desc("The score estimatore.\nAccepted values are [median|average|sum|min].\nDefaults to median").hasArg()
6263
.argName("SCORE-ESTIMATOR").build());
6364

6465
options.addOption(Option.builder("n").longOpt("normalize-by-length")
@@ -149,7 +150,11 @@ public static void main(String[] args) {
149150
String estimator = line.getOptionValue("score-estimator");
150151

151152
if ("average".equals(estimator)) {
152-
algorithm = "ABUNDANCE_COVERAGE_REACTION_BASED";
153+
algorithm = ModuleInferenceOptimizers.ABUNDANCE_COVERAGE_REACTION_BASED.displayName();
154+
} else if ("sum".equals(estimator)) {
155+
algorithm = ModuleInferenceOptimizers.SUM.displayName();
156+
} else if ("min".equals(estimator)) {
157+
algorithm = ModuleInferenceOptimizers.MIN.displayName();
153158
} else if (!"median".equals(estimator)) {
154159
throw new IllegalArgumentException(estimator
155160
+ " is not a valid value for score calculation. Please chose between median or average");
@@ -277,9 +282,8 @@ public static void main(String[] args) {
277282
File outCounts = new File(outputDir, "modules.tsv");
278283
File outCoverage = new File(outputDir, "modules-coverage.tsv");
279284
MatrixWriter matrixWriter = (annotation.equals("2") && isPerTaxon) ? new ModuleTaxonomyMatrixWriter() : new ModuleMatrixWriter();
280-
matrixWriter.writeCounts(moduleInference, outCounts);
281-
matrixWriter.writeCoverage(moduleInference, outCoverage);
282-
285+
// exportModules in one go instead of iterating uselessly twice
286+
matrixWriter.exportModules(moduleInference, outCounts, outCoverage);
283287
}
284288
} catch (IOException | IncorrectNumberOfEntriesException e) {
285289
app.log.error("Exception while reading input data: " + e.getMessage());

src/main/java/org/omixer/rpm/model/BasicFeature.java

Lines changed: 2 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
/**
99
* A basic feature with name, taxon, score, and one other function so far
10-
* TODO add an annotation Map to allow for more that one annotation
10+
* TODO add an annotation Map to allow for more than 1 annotation
1111
*
1212
* @author <a href="mailto:[email protected]">Youssef Darzi</a>
1313
*
@@ -22,7 +22,6 @@ public class BasicFeature {
2222
*
2323
*/
2424
private Long id;
25-
private String featureId;
2625
private String function;
2726
private Double count;
2827
private String taxon;
@@ -35,9 +34,8 @@ public class BasicFeature {
3534
* @param count
3635
* @param taxon
3736
*/
38-
public BasicFeature(String featureId, String taxon, String function, Double count) {
37+
public BasicFeature(String taxon, String function, Double count) {
3938
super();
40-
this.featureId = featureId;
4139
this.function = function;
4240
this.count = count;
4341
this.taxon = taxon;
@@ -62,13 +60,6 @@ public String getFunction() {
6260
return function;
6361
}
6462

65-
/**
66-
* @return the id
67-
*/
68-
public String getFeatureId() {
69-
return featureId;
70-
}
71-
7263
/**
7364
* @return the taxon
7465
*/
@@ -95,14 +86,6 @@ public void setFunction(String function) {
9586
this.function = function;
9687
}
9788

98-
/**
99-
* @param featureId
100-
* the id to set
101-
*/
102-
public void setFeatureId(String featureId) {
103-
this.featureId = featureId;
104-
}
105-
10689
/**
10790
* @param count
10891
* the score to set
@@ -136,7 +119,6 @@ public boolean haveValidFunction() {
136119
public String toString() {
137120
ToStringBuilder tsb = new ToStringBuilder(this,
138121
ToStringStyle.SHORT_PREFIX_STYLE);
139-
tsb.append(featureId);
140122
tsb.append(taxon);
141123
tsb.append(function);
142124
tsb.append(count);
@@ -156,7 +138,6 @@ public boolean equals(Object o) {
156138
BasicFeature bf = (BasicFeature) o;
157139
EqualsBuilder eb = new EqualsBuilder();
158140

159-
eb.append(getFeatureId(), bf.getFeatureId());
160141
eb.append(getTaxon(), bf.getTaxon());
161142
eb.append(getFunction(), bf.getFunction());
162143
eb.append(getCount(), bf.getCount());
@@ -167,7 +148,6 @@ public boolean equals(Object o) {
167148
@Override
168149
public int hashCode() {
169150
HashCodeBuilder hcb = new HashCodeBuilder(11, 31);
170-
hcb.append(featureId);
171151
hcb.append(taxon);
172152
hcb.append(function);
173153
hcb.append(count);

src/main/java/org/omixer/rpm/model/enums/ModuleInferenceOptimizers.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ public enum ModuleInferenceOptimizers {
44

55
ABUNDANCE_COVERAGE_REACTION_BASED,
66
ABUNDANCE_COVERAGE_ORTHOLOG_BASED,
7-
ABUNDANCE_COVERAGE_MEDIAN_BASED;
7+
ABUNDANCE_COVERAGE_MEDIAN_BASED,
8+
SUM,
9+
MIN;
810

911
public String displayName() {
1012

src/main/java/org/omixer/rpm/model/io/MatrixWriter.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,8 @@ public void writeCoverage(Map<String, Modules> moduleInference, File outfile) th
2828

2929
public abstract void writeMatrix(Map<String, Modules> moduleInference, File outfile, Function<Module, Double> f)
3030
throws IOException;
31+
32+
public abstract void exportModules(Map<String, Modules> moduleInference, File outCounts, File outCoverage)
33+
throws IOException;
34+
3135
}

src/main/java/org/omixer/rpm/model/io/ModuleMatrixWriter.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,9 @@ public void writeMatrix(Map<String, Modules> moduleInference, File outfile, Func
5656
// each module
5757
for (String observedModule : observedModules) {
5858
String outputString = observedModule;
59-
for (Entry<String, Modules> entry : moduleInference.entrySet()) {
59+
for (String sample : samples) {
6060
Double count = Constants.ZERO;
61-
for (Module module : entry.getValue().getModules()) {
61+
for (Module module : moduleInference.get(sample).getModules()) {
6262
if (module.getModuleId().equals(observedModule)) {
6363
count = f.apply(module);
6464
break;
@@ -71,4 +71,17 @@ public void writeMatrix(Map<String, Modules> moduleInference, File outfile, Func
7171
}
7272
}
7373
}
74+
75+
/*
76+
* Simply delegates to writeMatrix for now
77+
*
78+
* (non-Javadoc)
79+
* @see org.omixer.rpm.model.io.MatrixWriter#exportModules(java.util.Map, java.io.File, java.io.File)
80+
*/
81+
@Override
82+
public void exportModules(Map<String, Modules> moduleInference, File outCounts, File outCouverage)
83+
throws IOException {
84+
writeCounts(moduleInference, outCounts);
85+
writeCoverage(moduleInference, outCouverage);
86+
}
7487
}

src/main/java/org/omixer/rpm/model/io/ModuleTaxonomyMatrixWriter.java

Lines changed: 107 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,25 @@
1818
import org.omixer.utils.Constants;
1919

2020
public class ModuleTaxonomyMatrixWriter extends MatrixWriter {
21-
22-
public void writeMatrix(Map<String, Modules> moduleInference, File outfile, Function<Module, Double> f) throws IOException {
21+
22+
/*
23+
* (non-Javadoc)
24+
* @see org.omixer.rpm.model.io.MatrixWriter#writeMatrix(java.util.Map, java.io.File, java.util.function.Function)
25+
*/
26+
public void writeMatrix(Map<String, Modules> moduleInference, File outfile, Function<Module, Double> f)
27+
throws IOException {
2328
// Map of all observed combinations of taxa and modules
2429
Map<String, Set<String>> taxaModules = new HashMap<>();
25-
26-
/**
27-
* As the module space is very small i.e max 120 modules. For a 1000
28-
* samples we have 120000 objects to store which is nothing. So add all
29-
* the Modules to a list => put species_ko into hash => iterate and save
30-
* sample/features
31-
*/
32-
// generate the Observed taxonModules
30+
// Map of modules by taxon and moduleId for a quick lookup
31+
Map<String, Map<String, Module>> sampleTaxonModules = new HashMap<>();
32+
// generate the Observed taxonModules, as row names for the matrix
3333
for (Entry<String, Modules> entry : moduleInference.entrySet()) {
34+
// map modules by taxon and moduleId
35+
Map<String, Module> taxonMods = new HashMap<>();
36+
sampleTaxonModules.put(entry.getKey(), taxonMods);
37+
// retain above cutoff modules
3438
List<Module> modules = entry.getValue().toAboveCutoffList();
35-
// all modules are above cutoff and there is no need to
36-
// filter them anymore
39+
// set the new modules
3740
entry.getValue().setModules(modules);
3841
for (Module module : modules) {
3942
String taxon = module.getTaxon();
@@ -47,47 +50,119 @@ public void writeMatrix(Map<String, Modules> moduleInference, File outfile, Func
4750
taxaModules.put(taxon, taxonModules);
4851
}
4952
taxonModules.add(module.getModuleId());
53+
taxonMods.put(taxon + module.getModuleId() , module);
5054
}
5155
}
5256

5357
/*
54-
* The number of samples is known, so write header For each observed
55-
* entry < find observed value
58+
* The number of samples is known, so write header For each observed entry <
59+
* find observed value
5660
*/
5761
try (BufferedWriter out = new BufferedWriter(new FileWriter(outfile))) {
5862
List<String> samples = moduleInference.keySet().stream().collect(Collectors.toList());
5963
// output header
6064
String header = samples.stream().reduce("Taxon\tModule", (a, b) -> (a + Constants.TAB + b));
6165
out.write(header + Constants.NEW_LINE);
62-
/**
63-
* Think of another way to optimize. - Could also reduce the search
64-
* space after each iteration by removing matched object - Or sort
65-
* and compare based on sort to ensure next objext is the closets to
66-
* top object
67-
*/
68-
69-
// output for each features
7066
// each taxon
67+
// TODO remove each entry after iteration
7168
for (Entry<String, Set<String>> taxonModules : taxaModules.entrySet()) {
7269
// each module
7370
for (String observedModule : taxonModules.getValue()) {
74-
String outputString = taxonModules.getKey() + Constants.TAB + observedModule;
71+
String countOutputString = taxonModules.getKey() + Constants.TAB + observedModule;
7572
// each sample
76-
for (Entry<String, Modules> entry : moduleInference.entrySet()) {
73+
for (String sample : samples) {
74+
Module module = sampleTaxonModules.get(sample).remove(taxonModules.getKey() + observedModule);
7775
Double count = Constants.ZERO;
78-
for (Module module : entry.getValue().getModules()) {
79-
if (taxonModules.getKey().equals(module.getTaxon())
80-
&& module.getModuleId().equals(observedModule)) {
81-
count = f.apply(module);
82-
break;
83-
}
76+
77+
if (module != null) {
78+
count = f.apply(module);
8479
}
85-
outputString += Constants.TAB + count;
80+
countOutputString += Constants.TAB + count;
8681
}
87-
out.write(outputString);
82+
out.write(countOutputString);
8883
out.newLine();
8984
}
9085
}
9186
}
9287
}
88+
89+
/*
90+
* (non-Javadoc)
91+
*
92+
* @see org.omixer.rpm.model.io.MatrixWriter#exportModules(java.util.Map,
93+
* java.io.File, java.io.File)
94+
*/
95+
@Override
96+
public void exportModules(Map<String, Modules> moduleInference, File outCounts, File outCoverage)
97+
throws IOException {
98+
// Map of all observed combinations of taxa and modules
99+
Map<String, Set<String>> taxaModules = new HashMap<>();
100+
// Map of modules by taxon and moduleId for a quick lookup
101+
Map<String, Map<String, Module>> sampleTaxonModules = new HashMap<>();
102+
// generate the Observed taxonModules, as row names for the matrix
103+
for (Entry<String, Modules> entry : moduleInference.entrySet()) {
104+
// map modules by taxon and moduleId
105+
Map<String, Module> taxonMods = new HashMap<>();
106+
sampleTaxonModules.put(entry.getKey(), taxonMods);
107+
// retain above cutoff modules
108+
List<Module> modules = entry.getValue().toAboveCutoffList();
109+
// set the new modules
110+
entry.getValue().setModules(modules);
111+
for (Module module : modules) {
112+
String taxon = module.getTaxon();
113+
// make sure it is not null
114+
if (taxon == null) {
115+
taxon = Constants.EMPTY_STRING;
116+
}
117+
Set<String> taxonModules = taxaModules.get(taxon);
118+
if (taxonModules == null) {
119+
taxonModules = new HashSet<>();
120+
taxaModules.put(taxon, taxonModules);
121+
}
122+
taxonModules.add(module.getModuleId());
123+
taxonMods.put(taxon + module.getModuleId() , module);
124+
}
125+
}
126+
127+
/*
128+
* The number of samples is known, so write header For each observed entry <
129+
* find observed value
130+
*/
131+
try (BufferedWriter countOut = new BufferedWriter(new FileWriter(outCounts));
132+
BufferedWriter coverageOut = new BufferedWriter(new FileWriter(outCoverage))) {
133+
List<String> samples = moduleInference.keySet().stream().collect(Collectors.toList());
134+
// output header
135+
String header = samples.stream().reduce("Taxon\tModule", (a, b) -> (a + Constants.TAB + b));
136+
countOut.write(header + Constants.NEW_LINE);
137+
coverageOut.write(header + Constants.NEW_LINE);
138+
// each taxon
139+
// TODO remove each entry after iteration
140+
for (Entry<String, Set<String>> taxonModules : taxaModules.entrySet()) {
141+
// each module
142+
for (String observedModule : taxonModules.getValue()) {
143+
String countOutputString = taxonModules.getKey() + Constants.TAB + observedModule;
144+
String coverageOutputString = countOutputString;
145+
// each sample
146+
for (String sample : samples) {
147+
Module module = sampleTaxonModules.get(sample).remove(taxonModules.getKey() + observedModule);
148+
Double count = Constants.ZERO;
149+
Double coverage = Constants.ZERO;
150+
151+
if (module != null) {
152+
count = module.getCount();
153+
coverage = module.getCoverage();
154+
}
155+
156+
countOutputString += Constants.TAB + count;
157+
coverageOutputString += Constants.TAB + coverage;
158+
}
159+
countOut.write(countOutputString);
160+
countOut.newLine();
161+
162+
coverageOut.write(coverageOutputString);
163+
coverageOut.newLine();
164+
}
165+
}
166+
}
167+
}
93168
}

0 commit comments

Comments
 (0)