raeslab
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎build.gradle‎
Lines changed: 1 addition & 1 deletion b/‎build.gradle‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/main/java/org/omixer/rpm/core/InferenceApp.java‎
Lines changed: 10 additions & 6 deletions b/‎src/main/java/org/omixer/rpm/core/InferenceApp.java‎
Lines changed: 10 additions & 6 deletions
diff --git a/‎src/main/java/org/omixer/rpm/model/BasicFeature.java‎
Lines changed: 2 additions & 22 deletions b/‎src/main/java/org/omixer/rpm/model/BasicFeature.java‎
Lines changed: 2 additions & 22 deletions
diff --git a/‎src/main/java/org/omixer/rpm/model/enums/ModuleInferenceOptimizers.java‎
Lines changed: 3 additions & 1 deletion b/‎src/main/java/org/omixer/rpm/model/enums/ModuleInferenceOptimizers.java‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/main/java/org/omixer/rpm/model/io/MatrixWriter.java‎
Lines changed: 4 additions & 0 deletions b/‎src/main/java/org/omixer/rpm/model/io/MatrixWriter.java‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/main/java/org/omixer/rpm/model/io/ModuleMatrixWriter.java‎
Lines changed: 15 additions & 2 deletions b/‎src/main/java/org/omixer/rpm/model/io/ModuleMatrixWriter.java‎
Lines changed: 15 additions & 2 deletions
diff --git a/‎src/main/java/org/omixer/rpm/model/io/ModuleTaxonomyMatrixWriter.java‎
Lines changed: 107 additions & 32 deletions b/‎src/main/java/org/omixer/rpm/model/io/ModuleTaxonomyMatrixWriter.java‎
Lines changed: 107 additions & 32 deletions
@@ -24,7 +24,7 @@ java -jar [omixer-rpm.jar](../../releases/latest)  [-a <ANNOTATION>] [-c <COVERA
                               the number of observed reactions
  -o,--output-dir <DIRECTORY>             Path to the output directory
  -s,--score-estimator <SCORE-ESTIMATOR>        The score estimatore.
-                              Accepted values are [median|average].
+                              Accepted values are [median|average|sum|min].
                               Defaults to median
  -t,--threads <THREADS>                Number of threads to use when mapping the modules.
                               Defaults to 1
 
@@ -9,7 +9,7 @@ plugins {
 }
 
 group = "org.omixer"
-version = '1.0'
+version = '1.1'
 
 repositories {
 	jcenter()
 
@@ -21,6 +21,7 @@
 import org.omixer.rpm.model.ModuleCoverageDistribution;
 import org.omixer.rpm.model.ModuleInferenceOptions;
 import org.omixer.rpm.model.Modules;
+import org.omixer.rpm.model.enums.ModuleInferenceOptimizers;
 import org.omixer.rpm.model.enums.ScalingMethod;
 import org.omixer.rpm.model.io.MatrixWriter;
 import org.omixer.rpm.model.io.ModuleMatrixWriter;
@@ -41,7 +42,7 @@ public class InferenceApp extends AbstractInferenceApp {
 	public static final String EXEC_COMMAND = "java -jar " + TOOL_NAME + " ";
 	public static final String HEADER = "\n\nDESCRIPTION\n"
 			+ " Omixer-RPM\n A Reference Pathways Mapper for turning metagenomic functional profiles into pathway/module profiles\n\n"
-			+ "VERSION: 1.0 (13 June 2018)\n" + "AUTHOR: Youssef Darzi <[email protected]>\n\n"
+			+ "VERSION: 1.1\n" + "AUTHOR: Youssef Darzi <[email protected]>\n\n"
 			+ "ARGUMENTS (Options starting with -X are non-standard and subject to change without notice.)\n\n";
 
 	public static final String FOOTER = "\nLicensed under an Academic Non-commercial Software License Agreement, https://github.com/raeslab/omixer-rpm/blob/master/LICENSE";
@@ -58,7 +59,7 @@ public static void main(String[] args) {
 				.hasArg().argName("COVERAGE").build());
 
 		options.addOption(Option.builder("s").longOpt("score-estimator")
-				.desc("The score estimatore.\nAccepted values are [median|average].\nDefaults to median").hasArg()
+				.desc("The score estimatore.\nAccepted values are [median|average|sum|min].\nDefaults to median").hasArg()
 				.argName("SCORE-ESTIMATOR").build());
 
 		options.addOption(Option.builder("n").longOpt("normalize-by-length")
@@ -149,7 +150,11 @@ public static void main(String[] args) {
 				String estimator = line.getOptionValue("score-estimator");
 
 				if ("average".equals(estimator)) {
-					algorithm = "ABUNDANCE_COVERAGE_REACTION_BASED";
+					algorithm = ModuleInferenceOptimizers.ABUNDANCE_COVERAGE_REACTION_BASED.displayName();
+				} else if ("sum".equals(estimator)) {
+					algorithm = ModuleInferenceOptimizers.SUM.displayName();
+				} else if ("min".equals(estimator)) {
+					algorithm = ModuleInferenceOptimizers.MIN.displayName();
 				} else if (!"median".equals(estimator)) {
 					throw new IllegalArgumentException(estimator
 							+ " is not a valid value for score calculation. Please chose between median or average");
@@ -277,9 +282,8 @@ public static void main(String[] args) {
 				File outCounts = new File(outputDir, "modules.tsv");
 				File outCoverage = new File(outputDir, "modules-coverage.tsv");
 				MatrixWriter matrixWriter = (annotation.equals("2") && isPerTaxon) ? new ModuleTaxonomyMatrixWriter() : new ModuleMatrixWriter();
-				matrixWriter.writeCounts(moduleInference, outCounts);
-				matrixWriter.writeCoverage(moduleInference, outCoverage);
-				
+				// exportModules in one go instead of iterating uselessly twice
+				matrixWriter.exportModules(moduleInference, outCounts, outCoverage);
 			}
 		} catch (IOException | IncorrectNumberOfEntriesException e) {
 			app.log.error("Exception while reading input data: " + e.getMessage());
 
@@ -7,7 +7,7 @@
 
 /**
  * A basic feature with name, taxon, score, and one other function so far
- * TODO add an annotation Map to allow for more that one annotation
+ * TODO add an annotation Map to allow for more than 1 annotation
  * 
  * @author <a href="mailto:[email protected]">Youssef Darzi</a>
  * 
@@ -22,7 +22,6 @@ public class BasicFeature {
 	 * 
 	 */
 	private Long id;
-	private String featureId;
 	private String function;
 	private Double count;
 	private String taxon;
@@ -35,9 +34,8 @@ public class BasicFeature {
 	 * @param count
 	 * @param taxon
 	 */
-	public BasicFeature(String featureId, String taxon, String function, Double count) {
+	public BasicFeature(String taxon, String function, Double count) {
 		super();
-		this.featureId = featureId;
 		this.function = function;
 		this.count = count;
 		this.taxon = taxon;
@@ -62,13 +60,6 @@ public String getFunction() {
 		return function;
 	}
 
-	/**
-	 * @return the id
-	 */
-	public String getFeatureId() {
-		return featureId;
-	}
-
 	/**
 	 * @return the taxon
 	 */
@@ -95,14 +86,6 @@ public void setFunction(String function) {
 		this.function = function;
 	}
 
-	/**
-	 * @param featureId
-	 *            the id to set
-	 */
-	public void setFeatureId(String featureId) {
-		this.featureId = featureId;
-	}
-
 	/**
 	 * @param count
 	 *            the score to set
@@ -136,7 +119,6 @@ public boolean haveValidFunction() {
 	public String toString() {
 		ToStringBuilder tsb = new ToStringBuilder(this,
 				ToStringStyle.SHORT_PREFIX_STYLE);
-		tsb.append(featureId);
 		tsb.append(taxon);
 		tsb.append(function);
 		tsb.append(count);
@@ -156,7 +138,6 @@ public boolean equals(Object o) {
 		BasicFeature bf = (BasicFeature) o;
 		EqualsBuilder eb = new EqualsBuilder();
 
-		eb.append(getFeatureId(), bf.getFeatureId());
 		eb.append(getTaxon(), bf.getTaxon());
 		eb.append(getFunction(), bf.getFunction());
 		eb.append(getCount(), bf.getCount());
@@ -167,7 +148,6 @@ public boolean equals(Object o) {
 	@Override
 	public int hashCode() {
 		HashCodeBuilder hcb = new HashCodeBuilder(11, 31);
-		hcb.append(featureId);
 		hcb.append(taxon);
 		hcb.append(function);
 		hcb.append(count);
 
@@ -4,7 +4,9 @@ public enum ModuleInferenceOptimizers {
 
 	ABUNDANCE_COVERAGE_REACTION_BASED,
 	ABUNDANCE_COVERAGE_ORTHOLOG_BASED,
-	ABUNDANCE_COVERAGE_MEDIAN_BASED;
+	ABUNDANCE_COVERAGE_MEDIAN_BASED,
+	SUM,
+	MIN;
 
 	public String displayName() {
 
 
@@ -28,4 +28,8 @@ public void writeCoverage(Map<String, Modules> moduleInference, File outfile) th
 
 	public abstract void writeMatrix(Map<String, Modules> moduleInference, File outfile, Function<Module, Double> f)
 			throws IOException;
+	
+	public abstract void exportModules(Map<String, Modules> moduleInference, File outCounts, File outCoverage)
+			throws IOException;
+	
 }
@@ -56,9 +56,9 @@ public void writeMatrix(Map<String, Modules> moduleInference, File outfile, Func
 			// each module
 			for (String observedModule : observedModules) {
 				String outputString = observedModule;
-				for (Entry<String, Modules> entry : moduleInference.entrySet()) {
+				for (String sample : samples) {
 					Double count = Constants.ZERO;
-					for (Module module : entry.getValue().getModules()) {
+					for (Module module : moduleInference.get(sample).getModules()) {
 						if (module.getModuleId().equals(observedModule)) {
 							count = f.apply(module);
 							break;
@@ -71,4 +71,17 @@ public void writeMatrix(Map<String, Modules> moduleInference, File outfile, Func
 			}
 		}
 	}
+
+	/* 
+	 * Simply delegates to writeMatrix for now
+	 * 
+	 * (non-Javadoc)
+	 * @see org.omixer.rpm.model.io.MatrixWriter#exportModules(java.util.Map, java.io.File, java.io.File)
+	 */
+	@Override
+	public void exportModules(Map<String, Modules> moduleInference, File outCounts, File outCouverage)
+			throws IOException {
+		writeCounts(moduleInference, outCounts);
+		writeCoverage(moduleInference, outCouverage);
+	}
 }
@@ -18,22 +18,25 @@
 import org.omixer.utils.Constants;
 
 public class ModuleTaxonomyMatrixWriter extends MatrixWriter {
-	
-	public void writeMatrix(Map<String, Modules> moduleInference, File outfile, Function<Module, Double> f) throws IOException {
+
+	/*
+	 * (non-Javadoc)
+	 * @see org.omixer.rpm.model.io.MatrixWriter#writeMatrix(java.util.Map, java.io.File, java.util.function.Function)
+	 */
+	public void writeMatrix(Map<String, Modules> moduleInference, File outfile, Function<Module, Double> f)
+			throws IOException {
 		// Map of all observed combinations of taxa and modules
 		Map<String, Set<String>> taxaModules = new HashMap<>();
-
-		/**
-		 * As the module space is very small i.e max 120 modules. For a 1000
-		 * samples we have 120000 objects to store which is nothing. So add all
-		 * the Modules to a list => put species_ko into hash => iterate and save
-		 * sample/features
-		 */
-		// generate the Observed taxonModules
+		// Map of modules by taxon and moduleId for a quick lookup
+		Map<String, Map<String, Module>> sampleTaxonModules = new HashMap<>();
+		// generate the Observed taxonModules, as row names for the matrix
 		for (Entry<String, Modules> entry : moduleInference.entrySet()) {
+			// map modules by taxon and moduleId
+			Map<String, Module> taxonMods = new HashMap<>();
+			sampleTaxonModules.put(entry.getKey(), taxonMods);
+			// retain above cutoff modules
 			List<Module> modules = entry.getValue().toAboveCutoffList();
-			// all modules are above cutoff and there is no need to
-			// filter them anymore
+			// set the new modules
 			entry.getValue().setModules(modules);
 			for (Module module : modules) {
 				String taxon = module.getTaxon();
@@ -47,47 +50,119 @@ public void writeMatrix(Map<String, Modules> moduleInference, File outfile, Func
 					taxaModules.put(taxon, taxonModules);
 				}
 				taxonModules.add(module.getModuleId());
+				taxonMods.put(taxon + module.getModuleId() , module);
 			}
 		}
 
 		/*
-		 * The number of samples is known, so write header For each observed
-		 * entry < find observed value
+		 * The number of samples is known, so write header For each observed entry <
+		 * find observed value
 		 */
 		try (BufferedWriter out = new BufferedWriter(new FileWriter(outfile))) {
 			List<String> samples = moduleInference.keySet().stream().collect(Collectors.toList());
 			// output header
 			String header = samples.stream().reduce("Taxon\tModule", (a, b) -> (a + Constants.TAB + b));
 			out.write(header + Constants.NEW_LINE);
-			/**
-			 * Think of another way to optimize. - Could also reduce the search
-			 * space after each iteration by removing matched object - Or sort
-			 * and compare based on sort to ensure next objext is the closets to
-			 * top object
-			 */
-
-			// output for each features
 			// each taxon
+			// TODO remove each entry after iteration
 			for (Entry<String, Set<String>> taxonModules : taxaModules.entrySet()) {
 				// each module
 				for (String observedModule : taxonModules.getValue()) {
-					String outputString = taxonModules.getKey() + Constants.TAB + observedModule;
+					String countOutputString = taxonModules.getKey() + Constants.TAB + observedModule;
 					// each sample
-					for (Entry<String, Modules> entry : moduleInference.entrySet()) {
+					for (String sample : samples) {
+						Module module = sampleTaxonModules.get(sample).remove(taxonModules.getKey() + observedModule);
 						Double count = Constants.ZERO;
-						for (Module module : entry.getValue().getModules()) {
-							if (taxonModules.getKey().equals(module.getTaxon())
-									&& module.getModuleId().equals(observedModule)) {
-								count = f.apply(module);
-								break;
-							}
+						
+						if (module != null) {
+							count = f.apply(module);
 						}
-						outputString += Constants.TAB + count;
+						countOutputString += Constants.TAB + count;
 					}
-					out.write(outputString);
+					out.write(countOutputString);
 					out.newLine();
 				}
 			}
 		}
 	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see org.omixer.rpm.model.io.MatrixWriter#exportModules(java.util.Map,
+	 * java.io.File, java.io.File)
+	 */
+	@Override
+	public void exportModules(Map<String, Modules> moduleInference, File outCounts, File outCoverage)
+			throws IOException {
+		// Map of all observed combinations of taxa and modules
+		Map<String, Set<String>> taxaModules = new HashMap<>();
+		// Map of modules by taxon and moduleId for a quick lookup
+		Map<String, Map<String, Module>> sampleTaxonModules = new HashMap<>();
+		// generate the Observed taxonModules, as row names for the matrix
+		for (Entry<String, Modules> entry : moduleInference.entrySet()) {
+			// map modules by taxon and moduleId
+			Map<String, Module> taxonMods = new HashMap<>();
+			sampleTaxonModules.put(entry.getKey(), taxonMods);
+			// retain above cutoff modules
+			List<Module> modules = entry.getValue().toAboveCutoffList();
+			// set the new modules
+			entry.getValue().setModules(modules);
+			for (Module module : modules) {
+				String taxon = module.getTaxon();
+				// make sure it is not null
+				if (taxon == null) {
+					taxon = Constants.EMPTY_STRING;
+				}
+				Set<String> taxonModules = taxaModules.get(taxon);
+				if (taxonModules == null) {
+					taxonModules = new HashSet<>();
+					taxaModules.put(taxon, taxonModules);
+				}
+				taxonModules.add(module.getModuleId());
+				taxonMods.put(taxon + module.getModuleId() , module);
+			}
+		}
+
+		/*
+		 * The number of samples is known, so write header For each observed entry <
+		 * find observed value
+		 */
+		try (BufferedWriter countOut = new BufferedWriter(new FileWriter(outCounts));
+				BufferedWriter coverageOut = new BufferedWriter(new FileWriter(outCoverage))) {
+			List<String> samples = moduleInference.keySet().stream().collect(Collectors.toList());
+			// output header
+			String header = samples.stream().reduce("Taxon\tModule", (a, b) -> (a + Constants.TAB + b));
+			countOut.write(header + Constants.NEW_LINE);
+			coverageOut.write(header + Constants.NEW_LINE);
+			// each taxon
+			// TODO remove each entry after iteration
+			for (Entry<String, Set<String>> taxonModules : taxaModules.entrySet()) {
+				// each module
+				for (String observedModule : taxonModules.getValue()) {
+					String countOutputString = taxonModules.getKey() + Constants.TAB + observedModule;
+					String coverageOutputString = countOutputString;
+					// each sample
+					for (String sample : samples) {
+						Module module = sampleTaxonModules.get(sample).remove(taxonModules.getKey() + observedModule);
+						Double count = Constants.ZERO;
+						Double coverage = Constants.ZERO;
+						
+						if (module != null) {
+							count = module.getCount();
+							coverage = module.getCoverage();
+						}
+
+						countOutputString += Constants.TAB + count;
+						coverageOutputString += Constants.TAB + coverage;
+					}
+					countOut.write(countOutputString);
+					countOut.newLine();
+
+					coverageOut.write(coverageOutputString);
+					coverageOut.newLine();
+				}
+			}
+		}
+	}
 }
Original file line number	Diff line number	Diff line change
`@@ -9,7 +9,7 @@ plugins {`
`9`	`9`	`}`
`10`	`10`
`11`	`11`	`group = "org.omixer"`
`12`		`-version = '1.0'`
	`12`	`+version = '1.1'`
`13`	`13`
`14`	`14`	`repositories {`
`15`	`15`	`jcenter()`
Original file line number	Diff line number	Diff line change
`@@ -28,4 +28,8 @@ public void writeCoverage(Map<String, Modules> moduleInference, File outfile) th`
`28`	`28`
`29`	`29`	`public abstract void writeMatrix(Map<String, Modules> moduleInference, File outfile, Function<Module, Double> f)`
`30`	`30`	`throws IOException;`
	`31`	`+`
	`32`	`+ public abstract void exportModules(Map<String, Modules> moduleInference, File outCounts, File outCoverage)`
	`33`	`+ throws IOException;`
	`34`	`+`
`31`	`35`	`}`