-
Notifications
You must be signed in to change notification settings - Fork 1
Feature/orthoinference 71 updates #104
base: develop
Are you sure you want to change the base?
Changes from 7 commits
057b22c
063db2f
a4bfeba
426c654
77c4726
83299cd
83dfd45
93b0e15
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -49,10 +49,12 @@ public class EventsInferrer | |
private static String releaseVersion; | ||
private static GKInstance instanceEditInst; | ||
private static GKInstance speciesInst; | ||
private static Map<GKInstance,GKInstance> manualEventToNonHumanSource = new HashMap<>(); | ||
private static List<GKInstance> manualHumanEvents = new ArrayList<>(); | ||
private static Map<GKInstance, GKInstance> eventsAlreadyInferredMap = new HashMap<>(); | ||
private static List<GKInstance> eventsAlreadyInferred = new ArrayList<>(); | ||
private static StableIdentifierGenerator stableIdentifierGenerator; | ||
private static OrthologousPathwayDiagramGenerator orthologousPathwayDiagramGenerator; | ||
private static final String summationText = "This event has been computationally inferred from an event that has been demonstrated in another species.<p>The inference is based on the homology mapping from PANTHER. Briefly, reactions for which all involved PhysicalEntities (in input, output and catalyst) have a mapped orthologue/paralogue (for complexes at least 75% of components must have a mapping) are inferred to the other species. High level events are also inferred for these events to allow for easier navigation.<p><a href='/electronic_inference_compara.html' target = 'NEW'>More details and caveats of the event inference in Reactome.</a> For details on PANTHER see also: <a href='http://www.pantherdb.org/about.jsp' target='NEW'>http://www.pantherdb.org/about.jsp</a>"; | ||
|
||
|
||
@SuppressWarnings("unchecked") | ||
public static void inferEvents(Properties props, String species) throws Exception | ||
|
@@ -103,7 +105,7 @@ public static void inferEvents(Properties props, String species) throws Exceptio | |
String inferredFilename = "inferred_" + species + "_75.txt"; | ||
createNewFile(eligibleFilename); | ||
createNewFile(inferredFilename); | ||
ReactionInferrer.setEligibleFilename(eligibleFilename); | ||
SkipInstanceChecker.setEligibleFilename(eligibleFilename); | ||
ReactionInferrer.setInferredFilename(inferredFilename); | ||
|
||
stableIdentifierGenerator = new StableIdentifierGenerator(dbAdaptor, (String) speciesObject.get("abbreviation")); | ||
|
@@ -113,6 +115,7 @@ public static void inferEvents(Properties props, String species) throws Exceptio | |
Map<String,String[]> homologueMappings = readHomologueMappingFile(species, "hsap", pathToOrthopairs); | ||
ProteinCountUtility.setHomologueMappingFile(homologueMappings); | ||
EWASInferrer.setHomologueMappingFile(homologueMappings); | ||
SkipInstanceChecker.setHomologueMappingFile(homologueMappings); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The name |
||
} catch (FileNotFoundException e) { | ||
logger.fatal("Unable to locate " + speciesName +" mapping file: hsap_" + species + "_mapping.txt. Orthology prediction not possible."); | ||
return; | ||
|
@@ -165,19 +168,19 @@ public static void inferEvents(Properties props, String species) throws Exceptio | |
logger.info("Attempting RlE inference: " + reactionInst); | ||
// Check if the current Reaction already exists for this species, that it is a valid instance (passes some filters), and that it doesn't have a Disease attribute. | ||
// Adds to manualHumanEvents array if it passes conditions. This code block allows you to re-run the code without re-inferring instances. | ||
List<GKInstance> previouslyInferredInstances = new ArrayList<GKInstance>(); | ||
List<GKInstance> previouslyInferredInstances = new ArrayList<>(); | ||
previouslyInferredInstances = checkIfPreviouslyInferred(reactionInst, orthologousEvent, previouslyInferredInstances); | ||
previouslyInferredInstances = checkIfPreviouslyInferred(reactionInst, inferredFrom, previouslyInferredInstances); | ||
if (previouslyInferredInstances.size() > 0) | ||
{ | ||
previouslyInferredInstances.addAll(checkIfPreviouslyInferred(reactionInst, inferredFrom, previouslyInferredInstances)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since You could do this part of the code as follows:
Then the
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, it may be better to rename |
||
if (previouslyInferredInstances.size() > 0) { | ||
GKInstance prevInfInst = previouslyInferredInstances.get(0); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is a check needed to see if the list contains more than one element? If the list has more than one previously inferred instances would that be an error? |
||
if (prevInfInst.getAttributeValue(disease) == null) | ||
{ | ||
GKInstance prevInfSummationInst = (GKInstance) prevInfInst.getAttributeValue(summation); | ||
String prevInfSummationText = prevInfSummationInst.getAttributeValue(text).toString(); | ||
if (prevInfInst.getAttributeValue(disease) == null && prevInfSummationText.equals(summationText)) { | ||
logger.info("Inferred RlE already exists, skipping inference"); | ||
manualEventToNonHumanSource.put(reactionInst, prevInfInst); | ||
manualHumanEvents.add(reactionInst); | ||
eventsAlreadyInferredMap.put(reactionInst, prevInfInst); | ||
eventsAlreadyInferred.add(reactionInst); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are the |
||
} else { | ||
logger.info("Disease reaction, skipping inference"); | ||
logger.info("Either a disease or manually inferred reaction, skipping inference"); | ||
} | ||
continue; | ||
} | ||
|
@@ -191,8 +194,11 @@ public static void inferEvents(Properties props, String species) throws Exceptio | |
return; | ||
} | ||
} | ||
PathwaysInferrer.setInferredEvent(ReactionInferrer.getInferredEvent()); | ||
PathwaysInferrer.inferPathways(ReactionInferrer.getInferrableHumanEvents()); | ||
// Retrieve events inferred from this run, and any that were already inferred. Combine them and then begin Pathway inference. | ||
// The two methods below perform this for a map, containing the original RlE and the inferred RlE, and a List of just the inferred RlEs. | ||
// The latter will be iterated through when building Pathway hierarchies, the former when information from original RlE is needed during this build. | ||
PathwaysInferrer.setInferredEvent(ReactionInferrer.getInferredEvent(eventsAlreadyInferredMap)); | ||
PathwaysInferrer.inferPathways(ReactionInferrer.getInferrableHumanEvents(eventsAlreadyInferred)); | ||
orthologousPathwayDiagramGenerator.generateOrthologousPathwayDiagrams(); | ||
outputReport(species); | ||
logger.info("Finished orthoinference of " + speciesName); | ||
|
@@ -218,7 +224,7 @@ private static void setReleaseDates(String dateOfRelease) | |
} | ||
|
||
@SuppressWarnings("unchecked") | ||
private static List<GKInstance> checkIfPreviouslyInferred(GKInstance reactionInst, String attribute, List<GKInstance> previouslyInferredInstances) throws InvalidAttributeException, Exception | ||
private static List<GKInstance> checkIfPreviouslyInferred(GKInstance reactionInst, String attribute, List<GKInstance> previouslyInferredInstances) throws Exception | ||
{ | ||
for (GKInstance attributeInst : (Collection<GKInstance>) reactionInst.getAttributeValuesList(attribute)) | ||
{ | ||
|
@@ -233,16 +239,18 @@ private static List<GKInstance> checkIfPreviouslyInferred(GKInstance reactionIns | |
|
||
private static void outputReport(String species) throws IOException | ||
{ | ||
int eligibleCount = ReactionInferrer.getEligibleCount(); | ||
int eligibleCount = SkipInstanceChecker.getEligibleCount(); | ||
int inferredCount = ReactionInferrer.getInferredCount(); | ||
float percentInferred = (float) 100*inferredCount/eligibleCount; | ||
// Create file if it doesn't exist | ||
String reportFilename = "report_ortho_inference_test_reactome_" + releaseVersion + ".txt"; | ||
logger.info("Updating " + reportFilename); | ||
if (!Files.exists(Paths.get(reportFilename))) { | ||
createNewFile(reportFilename); | ||
String reportHeader = "## Number of inferred reactions by species for Reactome Release " + releaseVersion; | ||
Files.write(Paths.get(reportFilename), reportHeader.getBytes(), StandardOpenOption.APPEND); | ||
} | ||
String results = "hsap to " + species + ":\t" + inferredCount + " out of " + eligibleCount + " eligible reactions (" + String.format("%.2f", percentInferred) + "%)\n"; | ||
String results = "hsap to " + species + ":\tInferred " + inferredCount + " out of " + eligibleCount + " eligible reactions (" + String.format("%.2f", percentInferred) + "%)\n"; | ||
Files.write(Paths.get(reportFilename), results.getBytes(), StandardOpenOption.APPEND); | ||
} | ||
|
||
|
@@ -302,7 +310,6 @@ private static void setSummationInstance() throws Exception | |
GKInstance summationInst = new GKInstance(dbAdaptor.getSchema().getClassByName(Summation)); | ||
summationInst.setDbAdaptor(dbAdaptor); | ||
summationInst.addAttributeValue(created, instanceEditInst); | ||
String summationText = "This event has been computationally inferred from an event that has been demonstrated in another species.<p>The inference is based on the homology mapping from PANTHER. Briefly, reactions for which all involved PhysicalEntities (in input, output and catalyst) have a mapped orthologue/paralogue (for complexes at least 75% of components must have a mapping) are inferred to the other species. High level events are also inferred for these events to allow for easier navigation.<p><a href='/electronic_inference_compara.html' target = 'NEW'>More details and caveats of the event inference in Reactome.</a> For details on PANTHER see also: <a href='http://www.pantherdb.org/about.jsp' target='NEW'>http://www.pantherdb.org/about.jsp</a>"; | ||
summationInst.addAttributeValue(text, summationText); | ||
summationInst.addAttributeValue(_displayName, summationText); | ||
summationInst = InstanceUtilities.checkForIdenticalInstances(summationInst, null); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,8 +47,6 @@ public static void inferPathways(List<GKInstance> inferrableHumanEvents) throws | |
addInferredEventsToInferredPathways(); | ||
logger.info("Finished populating inferred Pathways with inferred Events"); | ||
|
||
//TODO: LOG starting HERE | ||
|
||
// Connect preceding events to RlEs, if they have any in the source species. | ||
logger.info("Adding preceding events to inferred Events"); | ||
inferPrecedingEvents(); | ||
|
@@ -168,40 +166,31 @@ private static List<GKInstance> getInferredEventInstances(GKInstance humanPathwa | |
private static void inferPrecedingEvents() throws Exception | ||
{ | ||
Set<GKInstance> seenPrecedingEvent = new HashSet<>(); | ||
for (GKInstance inferrableEventInst : updatedInferrableHumanEvents) | ||
{ | ||
if (!seenPrecedingEvent.contains(inferrableEventInst)) | ||
{ | ||
if (inferrableEventInst.getAttributeValue(precedingEvent)!= null) | ||
{ | ||
for (GKInstance inferrableEventInst : updatedInferrableHumanEvents) { | ||
if (!seenPrecedingEvent.contains(inferrableEventInst)) { | ||
if (inferrableEventInst.getAttributeValue(precedingEvent) != null) { | ||
Comment on lines
+170
to
+171
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could these conditions be merged into one if-statement as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These two conditions could be combined with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You may also want to consider refactoring the body of the if statement to its own method so that you have the operation occurring for each |
||
logger.info("Adding preceding event to " + inferrableEventInst); | ||
List<GKInstance> precedingEventInstances = new ArrayList<>(); | ||
// Find all preceding events for source instance that have an inferred counterpart | ||
for (GKInstance precedingEventInst : (Collection<GKInstance>) inferrableEventInst.getAttributeValuesList(precedingEvent)) | ||
{ | ||
if (inferredEventIdenticals.get(precedingEventInst) != null) | ||
{ | ||
for (GKInstance precedingEventInst : (Collection<GKInstance>) inferrableEventInst.getAttributeValuesList(precedingEvent)) { | ||
if (inferredEventIdenticals.get(precedingEventInst) != null) { | ||
precedingEventInstances.add(inferredEventIdenticals.get(precedingEventInst)); | ||
} | ||
} | ||
Set<String> inferredPrecedingEvents = new HashSet<>(); | ||
// Find any inferred preceding events that already exist for the inferred instance (don't want to add any redundant preceding events) | ||
for (GKInstance precedingEventInst : (Collection<GKInstance>) inferredEventIdenticals.get(inferrableEventInst).getAttributeValuesList(precedingEvent)) | ||
{ | ||
for (GKInstance precedingEventInst : (Collection<GKInstance>) inferredEventIdenticals.get(inferrableEventInst).getAttributeValuesList(precedingEvent)) { | ||
inferredPrecedingEvents.add(precedingEventInst.getDBID().toString()); | ||
} | ||
List<GKInstance> updatedPrecedingEventInstances = new ArrayList<>(); | ||
// Find existing preceding events that haven't already been attached to the inferred instance | ||
for (GKInstance precedingEventInst : precedingEventInstances) | ||
{ | ||
if (!inferredPrecedingEvents.contains(precedingEventInst.getDBID().toString())) | ||
{ | ||
for (GKInstance precedingEventInst : precedingEventInstances) { | ||
if (!inferredPrecedingEvents.contains(precedingEventInst.getDBID().toString())) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could |
||
updatedPrecedingEventInstances.add(precedingEventInst); | ||
} | ||
} | ||
// Add preceding event to inferred instance | ||
if (updatedPrecedingEventInstances != null && updatedPrecedingEventInstances.size() > 0) | ||
{ | ||
if (updatedPrecedingEventInstances != null && updatedPrecedingEventInstances.size() > 0) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The null check shouldn't be necessary since |
||
inferredEventIdenticals.get(inferrableEventInst).addAttributeValue(precedingEvent, updatedPrecedingEventInstances); | ||
dba.updateInstanceAttribute(inferredEventIdenticals.get(inferrableEventInst), precedingEvent); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure the "target = 'NEW'" attribute that is used two times is allowed in modern HTML. For a new window or tab, 'target=_blank' is the attribute value to use:
https://www.w3schools.com/tags/att_a_target.asp
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could this string be split on to several lines? This is a bit wide, even for me.