diff --git a/orthoinference/src/main/java/org/reactome/orthoinference/EventsInferrer.java b/orthoinference/src/main/java/org/reactome/orthoinference/EventsInferrer.java index 89b5fbd7a..6a1099f9f 100644 --- a/orthoinference/src/main/java/org/reactome/orthoinference/EventsInferrer.java +++ b/orthoinference/src/main/java/org/reactome/orthoinference/EventsInferrer.java @@ -49,10 +49,12 @@ public class EventsInferrer private static String releaseVersion; private static GKInstance instanceEditInst; private static GKInstance speciesInst; - private static Map manualEventToNonHumanSource = new HashMap<>(); - private static List manualHumanEvents = new ArrayList<>(); + private static Map eventsAlreadyInferredMap = new HashMap<>(); + private static List eventsAlreadyInferred = new ArrayList<>(); private static StableIdentifierGenerator stableIdentifierGenerator; private static OrthologousPathwayDiagramGenerator orthologousPathwayDiagramGenerator; + private static final String summationText = "This event has been computationally inferred from an event that has been demonstrated in another species.

The inference is based on the homology mapping from PANTHER. Briefly, reactions for which all involved PhysicalEntities (in input, output and catalyst) have a mapped orthologue/paralogue (for complexes at least 75% of components must have a mapping) are inferred to the other species. High level events are also inferred for these events to allow for easier navigation.

More details and caveats of the event inference in Reactome. For details on PANTHER see also: http://www.pantherdb.org/about.jsp"; + @SuppressWarnings("unchecked") public static void inferEvents(Properties props, String species) throws Exception @@ -103,7 +105,7 @@ public static void inferEvents(Properties props, String species) throws Exceptio String inferredFilename = "inferred_" + species + "_75.txt"; createNewFile(eligibleFilename); createNewFile(inferredFilename); - ReactionInferrer.setEligibleFilename(eligibleFilename); + SkipInstanceChecker.setEligibleFilename(eligibleFilename); ReactionInferrer.setInferredFilename(inferredFilename); stableIdentifierGenerator = new StableIdentifierGenerator(dbAdaptor, (String) speciesObject.get("abbreviation")); @@ -113,6 +115,7 @@ public static void inferEvents(Properties props, String species) throws Exceptio Map homologueMappings = readHomologueMappingFile(species, "hsap", pathToOrthopairs); ProteinCountUtility.setHomologueMappingFile(homologueMappings); EWASInferrer.setHomologueMappingFile(homologueMappings); + SkipInstanceChecker.setHomologueMappingFile(homologueMappings); } catch (FileNotFoundException e) { logger.fatal("Unable to locate " + speciesName +" mapping file: hsap_" + species + "_mapping.txt. Orthology prediction not possible."); return; @@ -165,19 +168,19 @@ public static void inferEvents(Properties props, String species) throws Exceptio logger.info("Attempting RlE inference: " + reactionInst); // Check if the current Reaction already exists for this species, that it is a valid instance (passes some filters), and that it doesn't have a Disease attribute. // Adds to manualHumanEvents array if it passes conditions. This code block allows you to re-run the code without re-inferring instances. - List previouslyInferredInstances = new ArrayList(); + List previouslyInferredInstances = new ArrayList<>(); previouslyInferredInstances = checkIfPreviouslyInferred(reactionInst, orthologousEvent, previouslyInferredInstances); - previouslyInferredInstances = checkIfPreviouslyInferred(reactionInst, inferredFrom, previouslyInferredInstances); - if (previouslyInferredInstances.size() > 0) - { + previouslyInferredInstances.addAll(checkIfPreviouslyInferred(reactionInst, inferredFrom, previouslyInferredInstances)); + if (previouslyInferredInstances.size() > 0) { GKInstance prevInfInst = previouslyInferredInstances.get(0); - if (prevInfInst.getAttributeValue(disease) == null) - { + GKInstance prevInfSummationInst = (GKInstance) prevInfInst.getAttributeValue(summation); + String prevInfSummationText = prevInfSummationInst.getAttributeValue(text).toString(); + if (prevInfInst.getAttributeValue(disease) == null && prevInfSummationText.equals(summationText)) { logger.info("Inferred RlE already exists, skipping inference"); - manualEventToNonHumanSource.put(reactionInst, prevInfInst); - manualHumanEvents.add(reactionInst); + eventsAlreadyInferredMap.put(reactionInst, prevInfInst); + eventsAlreadyInferred.add(reactionInst); } else { - logger.info("Disease reaction, skipping inference"); + logger.info("Either a disease or manually inferred reaction, skipping inference"); } continue; } @@ -191,8 +194,11 @@ public static void inferEvents(Properties props, String species) throws Exceptio return; } } - PathwaysInferrer.setInferredEvent(ReactionInferrer.getInferredEvent()); - PathwaysInferrer.inferPathways(ReactionInferrer.getInferrableHumanEvents()); + // Retrieve events inferred from this run, and any that were already inferred. Combine them and then begin Pathway inference. + // The two methods below perform this for a map, containing the original RlE and the inferred RlE, and a List of just the inferred RlEs. + // The latter will be iterated through when building Pathway hierarchies, the former when information from original RlE is needed during this build. + PathwaysInferrer.setInferredEvent(ReactionInferrer.getInferredEvent(eventsAlreadyInferredMap)); + PathwaysInferrer.inferPathways(ReactionInferrer.getInferrableHumanEvents(eventsAlreadyInferred)); orthologousPathwayDiagramGenerator.generateOrthologousPathwayDiagrams(); outputReport(species); logger.info("Finished orthoinference of " + speciesName); @@ -218,7 +224,7 @@ private static void setReleaseDates(String dateOfRelease) } @SuppressWarnings("unchecked") - private static List checkIfPreviouslyInferred(GKInstance reactionInst, String attribute, List previouslyInferredInstances) throws InvalidAttributeException, Exception + private static List checkIfPreviouslyInferred(GKInstance reactionInst, String attribute, List previouslyInferredInstances) throws Exception { for (GKInstance attributeInst : (Collection) reactionInst.getAttributeValuesList(attribute)) { @@ -233,7 +239,7 @@ private static List checkIfPreviouslyInferred(GKInstance reactionIns private static void outputReport(String species) throws IOException { - int eligibleCount = ReactionInferrer.getEligibleCount(); + int eligibleCount = SkipInstanceChecker.getEligibleCount(); int inferredCount = ReactionInferrer.getInferredCount(); float percentInferred = (float) 100*inferredCount/eligibleCount; // Create file if it doesn't exist @@ -241,8 +247,10 @@ private static void outputReport(String species) throws IOException logger.info("Updating " + reportFilename); if (!Files.exists(Paths.get(reportFilename))) { createNewFile(reportFilename); + String reportHeader = "## Number of inferred reactions by species for Reactome Release " + releaseVersion; + Files.write(Paths.get(reportFilename), reportHeader.getBytes(), StandardOpenOption.APPEND); } - String results = "hsap to " + species + ":\t" + inferredCount + " out of " + eligibleCount + " eligible reactions (" + String.format("%.2f", percentInferred) + "%)\n"; + String results = "hsap to " + species + ":\tInferred " + inferredCount + " out of " + eligibleCount + " eligible reactions (" + String.format("%.2f", percentInferred) + "%)\n"; Files.write(Paths.get(reportFilename), results.getBytes(), StandardOpenOption.APPEND); } @@ -302,7 +310,6 @@ private static void setSummationInstance() throws Exception GKInstance summationInst = new GKInstance(dbAdaptor.getSchema().getClassByName(Summation)); summationInst.setDbAdaptor(dbAdaptor); summationInst.addAttributeValue(created, instanceEditInst); - String summationText = "This event has been computationally inferred from an event that has been demonstrated in another species.

The inference is based on the homology mapping from PANTHER. Briefly, reactions for which all involved PhysicalEntities (in input, output and catalyst) have a mapped orthologue/paralogue (for complexes at least 75% of components must have a mapping) are inferred to the other species. High level events are also inferred for these events to allow for easier navigation.

More details and caveats of the event inference in Reactome. For details on PANTHER see also: http://www.pantherdb.org/about.jsp"; summationInst.addAttributeValue(text, summationText); summationInst.addAttributeValue(_displayName, summationText); summationInst = InstanceUtilities.checkForIdenticalInstances(summationInst, null); diff --git a/orthoinference/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java b/orthoinference/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java index 925339954..0628ae523 100644 --- a/orthoinference/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java +++ b/orthoinference/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java @@ -174,7 +174,7 @@ private static GKInstance createInfEWAS(GKInstance ewasInst, boolean override) t } // Infers Complex or Polymer instances. These instances are generally comprised of more than 1 PhysicalEntity, and calls 'createOrthoEntity' for each one. Complex/Polymer instances // are also subject to the 'countDistinctProteins' function. The result from this needs to have at least 75% of total proteins to be inferrable for inference to continue. - private static GKInstance createInfComplexPolymer(GKInstance complexInst, boolean override) throws InvalidAttributeException, InvalidAttributeValueException, Exception + private static GKInstance createInfComplexPolymer(GKInstance complexInst, boolean override) throws Exception { if (complexPolymerIdenticals.get(complexInst) == null) { diff --git a/orthoinference/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java b/orthoinference/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java index db4ca9910..f5ff5f4bf 100644 --- a/orthoinference/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java +++ b/orthoinference/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java @@ -47,8 +47,6 @@ public static void inferPathways(List inferrableHumanEvents) throws addInferredEventsToInferredPathways(); logger.info("Finished populating inferred Pathways with inferred Events"); - //TODO: LOG starting HERE - // Connect preceding events to RlEs, if they have any in the source species. logger.info("Adding preceding events to inferred Events"); inferPrecedingEvents(); @@ -168,40 +166,31 @@ private static List getInferredEventInstances(GKInstance humanPathwa private static void inferPrecedingEvents() throws Exception { Set seenPrecedingEvent = new HashSet<>(); - for (GKInstance inferrableEventInst : updatedInferrableHumanEvents) - { - if (!seenPrecedingEvent.contains(inferrableEventInst)) - { - if (inferrableEventInst.getAttributeValue(precedingEvent)!= null) - { + for (GKInstance inferrableEventInst : updatedInferrableHumanEvents) { + if (!seenPrecedingEvent.contains(inferrableEventInst)) { + if (inferrableEventInst.getAttributeValue(precedingEvent) != null) { logger.info("Adding preceding event to " + inferrableEventInst); List precedingEventInstances = new ArrayList<>(); // Find all preceding events for source instance that have an inferred counterpart - for (GKInstance precedingEventInst : (Collection) inferrableEventInst.getAttributeValuesList(precedingEvent)) - { - if (inferredEventIdenticals.get(precedingEventInst) != null) - { + for (GKInstance precedingEventInst : (Collection) inferrableEventInst.getAttributeValuesList(precedingEvent)) { + if (inferredEventIdenticals.get(precedingEventInst) != null) { precedingEventInstances.add(inferredEventIdenticals.get(precedingEventInst)); } } Set inferredPrecedingEvents = new HashSet<>(); // Find any inferred preceding events that already exist for the inferred instance (don't want to add any redundant preceding events) - for (GKInstance precedingEventInst : (Collection) inferredEventIdenticals.get(inferrableEventInst).getAttributeValuesList(precedingEvent)) - { + for (GKInstance precedingEventInst : (Collection) inferredEventIdenticals.get(inferrableEventInst).getAttributeValuesList(precedingEvent)) { inferredPrecedingEvents.add(precedingEventInst.getDBID().toString()); } List updatedPrecedingEventInstances = new ArrayList<>(); // Find existing preceding events that haven't already been attached to the inferred instance - for (GKInstance precedingEventInst : precedingEventInstances) - { - if (!inferredPrecedingEvents.contains(precedingEventInst.getDBID().toString())) - { + for (GKInstance precedingEventInst : precedingEventInstances) { + if (!inferredPrecedingEvents.contains(precedingEventInst.getDBID().toString())) { updatedPrecedingEventInstances.add(precedingEventInst); } } // Add preceding event to inferred instance - if (updatedPrecedingEventInstances != null && updatedPrecedingEventInstances.size() > 0) - { + if (updatedPrecedingEventInstances != null && updatedPrecedingEventInstances.size() > 0) { inferredEventIdenticals.get(inferrableEventInst).addAttributeValue(precedingEvent, updatedPrecedingEventInstances); dba.updateInstanceAttribute(inferredEventIdenticals.get(inferrableEventInst), precedingEvent); } diff --git a/orthoinference/src/main/java/org/reactome/orthoinference/ReactionInferrer.java b/orthoinference/src/main/java/org/reactome/orthoinference/ReactionInferrer.java index b422e1e5c..43eae8c95 100644 --- a/orthoinference/src/main/java/org/reactome/orthoinference/ReactionInferrer.java +++ b/orthoinference/src/main/java/org/reactome/orthoinference/ReactionInferrer.java @@ -21,13 +21,11 @@ public class ReactionInferrer { private static final Logger logger = LogManager.getLogger(); private static MySQLAdaptor dba; private static String dateOfRelease = ""; - private static String eligibleFilehandle; private static String inferredFilehandle; private static GKInstance summationInst; private static GKInstance evidenceTypeInst; private static Map inferredCatalyst = new HashMap<>(); private static Map inferredEvent = new HashMap<>(); - private static Integer eligibleCount = 0; private static Integer inferredCount = 0; private static List inferrableHumanEvents = new ArrayList<>(); @@ -44,6 +42,7 @@ public static void inferReaction(GKInstance reactionInst) throws Exception if (inferredEvent.get(reactionInst) == null) { ///// The beginning of an inference process: + // Creates inferred instance of reaction. GKInstance infReactionInst = InstanceUtilities.createNewInferredGKInstance(reactionInst); infReactionInst.addAttributeValue(name, reactionInst.getAttributeValuesList(name)); @@ -52,87 +51,72 @@ public static void inferReaction(GKInstance reactionInst) throws Exception infReactionInst.addAttributeValue(evidenceType, evidenceTypeInst); infReactionInst.addAttributeValue(_displayName, reactionInst.getAttributeValue(_displayName)); - // This function finds the total number of distinct proteins associated with an instance, as well as the number that can be inferred. - // Total proteins are stored in reactionProteinCounts[0], inferrable proteins in [1], and the maximum number of homologues for any entity involved in index [2]. - // Reactions with no proteins/EWAS (Total = 0) are not inferred. - List reactionProteinCounts = ProteinCountUtility.getDistinctProteinCounts(reactionInst); - int reactionTotalProteinCounts = reactionProteinCounts.get(0); - if (reactionTotalProteinCounts > 0) + // Attempt to infer all PhysicalEntities associated with this reaction's Input, Output, CatalystActivity and RegulatedBy attributes. + // Failure to successfully infer any of these attributes will end inference for this reaction. + logger.info("Inferring inputs..."); + if (inferReactionInputsOrOutputs(reactionInst, infReactionInst, input)) { - logger.info("Total protein count for RlE: " + reactionTotalProteinCounts); - String eligibleEventName = reactionInst.getAttributeValue(DB_ID).toString() + "\t" + reactionInst.getDisplayName() + "\n"; - // Having passed all tests/filters until now, the reaction is recorded in the 'eligible reactions' file, meaning inference is continued. - eligibleCount++; - Files.write(Paths.get(eligibleFilehandle), eligibleEventName.getBytes(), StandardOpenOption.APPEND); - // Attempt to infer all PhysicalEntities associated with this reaction's Input, Output, CatalystActivity and RegulatedBy attributes. - // Failure to successfully infer any of these attributes will end inference for this reaction. - logger.info("Inferring inputs..."); - if (inferReactionInputsOrOutputs(reactionInst, infReactionInst, input)) + logger.info("Inferring outputs..."); + if (inferReactionInputsOrOutputs(reactionInst, infReactionInst, output)) { - logger.info("Inferring outputs..."); - if (inferReactionInputsOrOutputs(reactionInst, infReactionInst, output)) + logger.info("Inferring catalysts..."); + if (inferReactionCatalysts(reactionInst, infReactionInst)) { - logger.info("Inferring catalysts..."); - if (inferReactionCatalysts(reactionInst, infReactionInst)) + // Many reactions are not regulated at all, meaning inference is attempted but will not end the process if there is nothing to infer. + // The inference process will end though if inferRegulations returns an invalid value. + logger.info("Inferring regulations..."); + List inferredRegulations = inferReactionRegulations(reactionInst); + if (inferredRegulations.size() == 1 && inferredRegulations.get(0) == null) { - // Many reactions are not regulated at all, meaning inference is attempted but will not end the process if there is nothing to infer. - // The inference process will end though if inferRegulations returns an invalid value. - logger.info("Inferring regulations..."); - List inferredRegulations = inferReactionRegulations(reactionInst); - if (inferredRegulations.size() == 1 && inferredRegulations.get(0) == null) - { - return; - } - if (infReactionInst.getSchemClass().isValidAttribute(releaseDate)) - { - infReactionInst.addAttributeValue(releaseDate, dateOfRelease); - } - // FetchIdenticalInstances would just return the instance being inferred. Since this step is meant to always - // add a new inferred instance, the storeInstance method is just called here. - GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infReactionInst, reactionInst); - infReactionInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); - dba.storeInstance(infReactionInst); - logger.info("Inferred RlE instance: " + infReactionInst); + return; + } + if (infReactionInst.getSchemClass().isValidAttribute(releaseDate)) + { + infReactionInst.addAttributeValue(releaseDate, dateOfRelease); + } + // FetchIdenticalInstances would just return the instance being inferred. Since this step is meant to always + // add a new inferred instance, the storeInstance method is just called here. + GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infReactionInst, reactionInst); + infReactionInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); + dba.storeInstance(infReactionInst); + logger.info("Inferred RlE instance: " + infReactionInst); - if (infReactionInst.getSchemClass().isValidAttribute(inferredFrom)) - { - infReactionInst = InstanceUtilities.addAttributeValueIfNecessary(infReactionInst, reactionInst, inferredFrom); - dba.updateInstanceAttribute(infReactionInst, inferredFrom); - } - infReactionInst = InstanceUtilities.addAttributeValueIfNecessary(infReactionInst, reactionInst, orthologousEvent); - dba.updateInstanceAttribute(infReactionInst, orthologousEvent); - reactionInst.addAttributeValue(orthologousEvent, infReactionInst); - dba.updateInstanceAttribute(reactionInst, orthologousEvent); - - inferredEvent.put(reactionInst, infReactionInst); - - // Regulations instances require the DB to contain the inferred ReactionlikeEvent, so Regulations inference happens post-inference - if (inferredRegulations.size() > 0) + if (infReactionInst.getSchemClass().isValidAttribute(inferredFrom)) + { + infReactionInst = InstanceUtilities.addAttributeValueIfNecessary(infReactionInst, reactionInst, inferredFrom); + dba.updateInstanceAttribute(infReactionInst, inferredFrom); + } + infReactionInst = InstanceUtilities.addAttributeValueIfNecessary(infReactionInst, reactionInst, orthologousEvent); + dba.updateInstanceAttribute(infReactionInst, orthologousEvent); + reactionInst.addAttributeValue(orthologousEvent, infReactionInst); + dba.updateInstanceAttribute(reactionInst, orthologousEvent); + + inferredEvent.put(reactionInst, infReactionInst); + + // Regulations instances require the DB to contain the inferred ReactionlikeEvent, so Regulations inference happens post-inference + if (inferredRegulations.size() > 0) + { + logger.info("Number of regulator(s) inferred: " + inferredRegulations.size()); + for (GKInstance infRegulation : inferredRegulations) { - logger.info("Number of regulator(s) inferred: " + inferredRegulations.size()); - for (GKInstance infRegulation : inferredRegulations) - { - infRegulation = InstanceUtilities.checkForIdenticalInstances(infRegulation, null); - infReactionInst.addAttributeValue("regulatedBy", infRegulation); - dba.updateInstanceAttribute(infReactionInst, "regulatedBy"); - } + infRegulation = InstanceUtilities.checkForIdenticalInstances(infRegulation, null); + infReactionInst.addAttributeValue("regulatedBy", infRegulation); + dba.updateInstanceAttribute(infReactionInst, "regulatedBy"); } - // After successfully adding a new inferred instance to the DB, it is recorded in the 'inferred reactions' file - inferredCount++; - inferrableHumanEvents.add(reactionInst); - String inferredEvent = infReactionInst.getAttributeValue(DB_ID).toString() + "\t" + infReactionInst.getDisplayName() + "\n"; - Files.write(Paths.get(inferredFilehandle), inferredEvent.getBytes(), StandardOpenOption.APPEND); - } else { - logger.info("Catalyst inference unsuccessful -- terminating inference for " + reactionInst); } + // After successfully adding a new inferred instance to the DB, it is recorded in the 'inferred reactions' file + inferredCount++; + inferrableHumanEvents.add(reactionInst); + String inferredEvent = infReactionInst.getAttributeValue(DB_ID).toString() + "\t" + infReactionInst.getDisplayName() + "\n"; + Files.write(Paths.get(inferredFilehandle), inferredEvent.getBytes(), StandardOpenOption.APPEND); } else { - logger.info("Output inference unsuccessful -- terminating inference for " + reactionInst); + logger.info("Catalyst inference unsuccessful -- terminating inference for " + reactionInst); } } else { - logger.info("Input inference unsuccessful -- terminating inference for " + reactionInst); + logger.info("Output inference unsuccessful -- terminating inference for " + reactionInst); } } else { - logger.info("No distinct proteins found in instance -- terminating inference for " + reactionInst); + logger.info("Input inference unsuccessful -- terminating inference for " + reactionInst); } } } @@ -273,11 +257,6 @@ public static void setAdaptor(MySQLAdaptor dbAdaptor) dba = dbAdaptor; } - public static void setEligibleFilename(String eligibleFilename) - { - eligibleFilehandle = eligibleFilename; - } - public static void setInferredFilename(String inferredFilename) { inferredFilehandle = inferredFilename; @@ -293,20 +272,17 @@ public static void setSummationInstance(GKInstance summationInstCopy) summationInst = summationInstCopy; } - public static Map getInferredEvent() + public static Map getInferredEvent(Map eventsAlreadyInferredMap) { + inferredEvent.putAll(eventsAlreadyInferredMap); return inferredEvent; } - public static List getInferrableHumanEvents() + public static List getInferrableHumanEvents(List eventsAlreadyInferred) { + inferrableHumanEvents.addAll(eventsAlreadyInferred); return inferrableHumanEvents; } - - public static int getEligibleCount() - { - return eligibleCount; - } public static int getInferredCount() { diff --git a/orthoinference/src/main/java/org/reactome/orthoinference/SkipInstanceChecker.java b/orthoinference/src/main/java/org/reactome/orthoinference/SkipInstanceChecker.java index f117bc327..729376a18 100644 --- a/orthoinference/src/main/java/org/reactome/orthoinference/SkipInstanceChecker.java +++ b/orthoinference/src/main/java/org/reactome/orthoinference/SkipInstanceChecker.java @@ -2,6 +2,9 @@ import java.io.BufferedReader; import java.io.FileReader; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -23,6 +26,9 @@ public class SkipInstanceChecker { private static final Logger logger = LogManager.getLogger(); private static MySQLAdaptor dba; private static Set skipList = new HashSet<>(); + private static Map homologueMappings = new HashMap<>(); + private static String eligibleFilehandle; + private static Integer eligibleCount = 0; // Skiplist was traditionally provided in a file, but since it's currently just 3 instances, I've just hard-coded them here. public static void getSkipList(String skipListFilename) throws NumberFormatException, Exception @@ -101,9 +107,110 @@ public static boolean checkIfInstanceShouldBeSkipped(GKInstance reactionInst) th logger.info(reactionInst + " has multiple species -- skipping"); return true; } + + // This function finds the total number of distinct proteins associated with an instance, as well as the number that can be inferred. + // Total proteins are stored in reactionProteinCounts[0], inferrable proteins in [1], and the maximum number of homologues for any entity involved in index [2]. + // Reactions with no proteins/EWAS (Total = 0) are not inferred. + List reactionProteinCounts = ProteinCountUtility.getDistinctProteinCounts(reactionInst); + int reactionTotalProteinCounts = reactionProteinCounts.get(0); + if (reactionTotalProteinCounts > 0) { + // Since we want to keep the eligibility counts the same for posterity, this is where Reaction eligibility will be determined, instead of in ReactionInferrer. (October 2019) + logger.info("Total protein count for RlE: " + reactionTotalProteinCounts); + eligibleCount++; + String eligibleEventName = reactionInst.getAttributeValue(DB_ID).toString() + "\t" + reactionInst.getDisplayName() + "\n"; + Files.write(Paths.get(eligibleFilehandle), eligibleEventName.getBytes(), StandardOpenOption.APPEND); + // Checks that ReactionlikeEvents will be fully inferrable before attempting inference + if (!reactionComponentsAreInferrable(reactionInst)) { + return true; + } + } else { + logger.info("No distinct proteins found in instance -- terminating inference for " + reactionInst); + return true; + } return false; } - + + /** + * Each input, output, and catalyst is screened to verify that the Reaction will be inferred. + * This prevents the majority of orphan PEs that were being created during orthoinference. + * Only some EntitySets are still orphaned due to the complexity behind attempting to screen them + * @param reactionInst -- GKInstance that will be screened + * @return -- Boolean is returned that indicates if Reaction is fully inferrable or not + * @throws Exception + */ + private static boolean reactionComponentsAreInferrable(GKInstance reactionInst) throws Exception { + // First gather all inputs, outputs and the PEs in catalyst activities + // Inputs/Outputs/CatalystPEs need to be stored in seperate collections. At time of writing, having it all stored in + // the same collection causes outputs to be inferred in both inputs and outputs during the actual inference -- not ideal + Collection reactionInputs = reactionInst.getAttributeValuesList(input); + Collection reactionOutputs = reactionInst.getAttributeValuesList(output); + Collection reactionCatalystPEs = new ArrayList<>(); + Collection reactionCatalysts = reactionInst.getAttributeValuesList(catalystActivity); + for (GKInstance reactionCatalyst : reactionCatalysts) { + GKInstance catalystPE = (GKInstance) reactionCatalyst.getAttributeValue(physicalEntity); + if (catalystPE != null) { + reactionCatalystPEs.add(catalystPE); + } + } + // Screen inputs + for (GKInstance reactionInput : reactionInputs) { + if (!componentIsInferrable(reactionInput)) { + return false; + } + } + // Screen outputs + for (GKInstance reactionOutput : reactionOutputs) { + if (!componentIsInferrable(reactionOutput)) { + return false; + } + } + // Screen catalyst PhysicalEntities + for (GKInstance reactionCatalystPE : reactionCatalystPEs) { + if (!componentIsInferrable(reactionCatalystPE)) { + return false; + } + } + return true; + } + + // This looks a lot like the code structure found in OrthologousEntityGenerator, just without the actual inference code or override functionality + // This will screen all instance types except for some EntitySets, which are complex to screen ahead of time. + private static boolean componentIsInferrable(GKInstance reactionComponent) throws Exception { + // This block doesn't do anything aside from prevent non-species-containing instances from going through other screening. + // During actual inference, a non-species-containing instance would be returned without any inference. + if (!SpeciesCheckUtility.checkForSpeciesAttribute(reactionComponent)) { +// return true; + } else if (reactionComponent.getSchemClass().isa(GenomeEncodedEntity)) + { + if (reactionComponent.getSchemClass().toString().contains(EntityWithAccessionedSequence)) { + String referenceEntityId = ((GKInstance) reactionComponent.getAttributeValue(referenceEntity)).getAttributeValue(identifier).toString(); + if (homologueMappings.get(referenceEntityId) == null) { + return false; + } + } else { + return false; + } + } else if (reactionComponent.getSchemClass().isa(Complex) || reactionComponent.getSchemClass().isa(Polymer) || reactionComponent.getSchemClass().isa(EntitySet)) { + List complexProteinCounts = ProteinCountUtility.getDistinctProteinCounts(reactionComponent); + int totalProteinCounts = complexProteinCounts.get(0); + int inferrableProteinCounts = complexProteinCounts.get(1); + if (reactionComponent.getSchemClass().isa(Complex) || reactionComponent.getSchemClass().isa(Polymer)) { + int percent = 0; + if (totalProteinCounts > 0) { + percent = (inferrableProteinCounts * 100) / totalProteinCounts; + } + if (percent < 75) { + logger.info("Complex/Polymer protein count is below 75% threshold (" + percent + "%) -- terminating inference"); + return false; + } + } else if (totalProteinCounts > 0 && inferrableProteinCounts == 0) { + logger.info("No distinct proteins found in EntitySet -- terminating inference"); + return false; + } + } + return true; + } + // Goes through all input/output/catalystActivity/regulatedBy attribute instances, and captures all species associates with them. Returns a collection of species instances. @SuppressWarnings("unchecked") private static Collection checkIfEntitiesContainMultipleSpecies(GKInstance reactionInst) throws Exception @@ -219,4 +326,16 @@ public static void setAdaptor(MySQLAdaptor dbAdaptor) { dba = dbAdaptor; } + + public static void setHomologueMappingFile(Map homologueMappingsCopy) { homologueMappings = homologueMappingsCopy; } + + public static void setEligibleFilename(String eligibleFilename) + { + eligibleFilehandle = eligibleFilename; + } + + public static int getEligibleCount() + { + return eligibleCount; + } }