Skip to content

Commit fbb6c23

Browse files
committed
Add facets.tsv to release products
1 parent ee0a475 commit fbb6c23

File tree

2 files changed

+139
-1
lines changed

2 files changed

+139
-1
lines changed

Makefile

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -516,7 +516,7 @@ $(REFRESH_IMPS):
516516
##########################################
517517

518518
.PHONY: products
519-
products: primary human merged base subsets release_reports
519+
products: primary human merged base subsets release_reports src/facets.tsv
520520

521521
# release vars
522522
TS = $(shell date +'%d:%m:%Y %H:%M')
@@ -794,6 +794,7 @@ $(VERSION_IMPS): version_%: src/ontology/imports/%_import.owl | check_robot
794794
--output $<
795795
@echo "Updated versionIRI of $<"
796796

797+
797798
# ----------------------------------------
798799
# RELEASE COPY
799800
# ----------------------------------------
@@ -811,6 +812,15 @@ publish: products
811812
@echo ""
812813

813814

815+
# ----------------------------------------
816+
# FACET SEARCH FILE
817+
# ----------------------------------------
818+
819+
src/facets.tsv: $(DM).owl src/sparql/build/facets.rq | check_robot
820+
@$(ROBOT) query --input $< --query $(word 2,$^) $@
821+
@echo "Created $@"
822+
823+
814824
##########################################
815825
## VERIFY build products
816826
##########################################

src/sparql/build/facets.rq

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
# PRODUCES DATA FOR THE DO-KB FACETED SEARCH
2+
# - Includes annotated evidence class and their superclasses
3+
# - Includes directly asserted, logically-related
4+
# imported classes derived from direct assertions of disease classes,
5+
# superclasses, and 'disease has feature' classes, along with their
6+
# superclasses
7+
# - Excludes distant/weakly related classes resulting from multi-step import
8+
# traversal
9+
# - Similar to 'OWL flattener' (https://github.com/DiseaseOntology/OWLFlattener)
10+
# described in 2024 publication (https://pmc.ncbi.nlm.nih.gov/articles/PMC10767934/)
11+
# with fixes to evidence code and EQ axiom handling, and greater flexbility
12+
# for facet generation
13+
# - NOTE: Some facet names are adjusted for consistency with existing DO-KB
14+
# facets; consider removal in future (identified by `# REMOVE?` in query)
15+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
16+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
17+
PREFIX owl: <http://www.w3.org/2002/07/owl#>
18+
PREFIX obo: <http://purl.obolibrary.org/obo/>
19+
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>
20+
PREFIX dc: <http://purl.org/dc/elements/1.1/>
21+
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
22+
23+
SELECT DISTINCT ?id ?name ?definition ?facet ?import_keyword ?import_id
24+
FROM <http://purl.obolibrary.org/obo/doid/doid-merged.owl>
25+
WHERE {
26+
# get logically-related import classes (with limits)
27+
{
28+
{
29+
# get disease class, superclass, and 'disease has feature' asserted +
30+
# logically-related classes
31+
SELECT DISTINCT ?iri ?asserted
32+
WHERE {
33+
# for class asserted axioms, include classes in unions
34+
{
35+
?iri a owl:Class ;
36+
(owl:equivalentClass|rdfs:subClassOf) ?anon .
37+
FILTER(CONTAINS(str(?iri), "DOID"))
38+
FILTER NOT EXISTS { ?iri owl:deprecated true }
39+
?anon (owl:unionOf|owl:intersectionOf|owl:Restriction|rdf:first|rdf:rest|owl:someValuesFrom|owl:onClass)* ?asserted .
40+
}
41+
UNION
42+
# for superclass or 'disease has feature' diseases, exclude classes in
43+
# unions (frequent source of error, e.g.
44+
# DOID:0040085 'bacterial sepsis' --> NCBITaxon:10239 'Viruses')
45+
{
46+
?iri a owl:Class ;
47+
(owl:equivalentClass|rdfs:subClassOf|owl:intersectionOf|owl:Restriction|rdf:first|rdf:rest|owl:someValuesFrom|owl:onClass)+ ?sup_feat .
48+
?sup_feat (owl:equivalentClass|rdfs:subClassOf)* ?anon .
49+
FILTER(CONTAINS(str(?iri), "DOID") && CONTAINS(str(?sup_feat), "DOID"))
50+
FILTER NOT EXISTS { ?iri owl:deprecated true }
51+
52+
?anon (owl:intersectionOf|owl:Restriction|rdf:first|rdf:rest|owl:someValuesFrom|owl:onClass)* ?asserted .
53+
}
54+
}
55+
}
56+
FILTER(
57+
!isBlank(?asserted) &&
58+
!CONTAINS(str(?asserted), "DOID") &&
59+
!(?asserted IN (owl:Thing, rdf:nil))
60+
)
61+
62+
# get disease info
63+
?iri rdfs:label ?nm_lang .
64+
OPTIONAL { ?iri obo:IAO_0000115 ?def_lang }
65+
66+
# expand imported, related classes to include superclasses, excluding
67+
# placeholder doid: classes
68+
?asserted rdfs:subClassOf* ?import_iri .
69+
?import_iri rdfs:label ?import_lang ;
70+
rdfs:subClassOf* ?facet_iri .
71+
FILTER(!isBlank(?import_iri) && !CONTAINS(str(?import_iri), "doid"))
72+
73+
# get facet label
74+
?facet_iri rdfs:label ?facet_lang .
75+
# ensure facet name is root (including doid: placeholder labels)
76+
FILTER(!isBlank(?facet_iri))
77+
FILTER(
78+
EXISTS { ?facet_iri rdfs:subClassOf owl:Thing } ||
79+
(
80+
NOT EXISTS { ?facet_iri rdfs:subClassOf ?any } &&
81+
NOT EXISTS { ?facet_iri owl:deprecated ?obsolete }
82+
)
83+
)
84+
85+
# format output
86+
BIND(CONCAT("DOID:", STRAFTER(str(?iri), "DOID_")) AS ?id)
87+
BIND(REPLACE(str(?import_iri), "^.*/([^/]+)[#_]", "$1:") AS ?import_id)
88+
BIND(str(?nm_lang) AS ?name)
89+
BIND(str(?def_lang) AS ?definition)
90+
BIND(str(?import_lang) AS ?import_keyword)
91+
# REMOVE? (adjustment for facet names)
92+
BIND(
93+
IF(
94+
str(?facet_lang) = "cell",
95+
"cell_type",
96+
REPLACE(str(?facet_lang), " +", "_")
97+
) AS ?facet)
98+
}
99+
UNION
100+
# get evidence codes annotated on definitions (not logically-related)
101+
{
102+
VALUES ?facet_iri { obo:ECO_0000000 }
103+
?facet_iri rdfs:label ?facet_lang .
104+
105+
?iri rdfs:label ?nm_lang ;
106+
obo:IAO_0000115 ?def_lang .
107+
FILTER(CONTAINS(str(?iri), "DOID"))
108+
FILTER NOT EXISTS { ?iri owl:deprecated true }
109+
110+
?axiom a owl:Axiom ;
111+
owl:annotatedSource ?iri ;
112+
owl:annotatedProperty obo:IAO_0000115 ;
113+
owl:annotatedTarget ?def_lang ;
114+
(dc:type|rdfs:subClassOf)* ?import_iri .
115+
116+
?import_iri rdfs:label ?import_lang ;
117+
rdfs:subClassOf* ?facet_iri .
118+
119+
# format for output
120+
BIND(CONCAT("DOID:", STRAFTER(str(?iri), "DOID_")) AS ?id)
121+
BIND(CONCAT("ECO:", STRAFTER(str(?import_iri), "ECO_")) AS ?import_id)
122+
BIND(str(?nm_lang) AS ?name)
123+
BIND(str(?def_lang) AS ?definition)
124+
BIND(str(?import_lang) AS ?import_keyword)
125+
BIND(str(?facet_lang) AS ?facet)
126+
}
127+
}
128+
ORDER BY ?id ?facet ?import_id

0 commit comments

Comments
 (0)