1+ # PRODUCES DATA FOR THE DO-KB FACETED SEARCH
2+ # - Includes annotated evidence class and their superclasses
3+ # - Includes directly asserted, logically-related
4+ # imported classes derived from direct assertions of disease classes,
5+ # superclasses, and 'disease has feature' classes, along with their
6+ # superclasses
7+ # - Excludes distant/weakly related classes resulting from multi-step import
8+ # traversal
9+ # - Similar to 'OWL flattener' (https://github.com/DiseaseOntology/OWLFlattener)
10+ # described in 2024 publication (https://pmc.ncbi.nlm.nih.gov/articles/PMC10767934/)
11+ # with fixes to evidence code and EQ axiom handling, and greater flexbility
12+ # for facet generation
13+ # - NOTE: Some facet names are adjusted for consistency with existing DO-KB
14+ # facets; consider removal in future (identified by `# REMOVE?` in query)
15+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
16+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
17+ PREFIX owl: <http://www.w3.org/2002/07/owl#>
18+ PREFIX obo: <http://purl.obolibrary.org/obo/>
19+ PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>
20+ PREFIX dc: <http://purl.org/dc/elements/1.1/>
21+ PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
22+
23+ SELECT DISTINCT ?id ?name ?definition ?facet ?import_keyword ?import_id
24+ FROM <http://purl.obolibrary.org/obo/doid/doid-merged.owl>
25+ WHERE {
26+ # get logically-related import classes (with limits)
27+ {
28+ {
29+ # get disease class, superclass, and 'disease has feature' asserted +
30+ # logically-related classes
31+ SELECT DISTINCT ?iri ?asserted
32+ WHERE {
33+ # for class asserted axioms, include classes in unions
34+ {
35+ ?iri a owl:Class ;
36+ (owl:equivalentClass|rdfs:subClassOf) ?anon .
37+ FILTER (CONTAINS (str(?iri ), "DOID"))
38+ FILTER NOT EXISTS { ?iri owl:deprecated true }
39+ ?anon (owl:unionOf|owl:intersectionOf|owl:Restriction|rdf:first|rdf:rest|owl:someValuesFrom|owl:onClass)* ?asserted .
40+ }
41+ UNION
42+ # for superclass or 'disease has feature' diseases, exclude classes in
43+ # unions (frequent source of error, e.g.
44+ # DOID:0040085 'bacterial sepsis' --> NCBITaxon:10239 'Viruses')
45+ {
46+ ?iri a owl:Class ;
47+ (owl:equivalentClass|rdfs:subClassOf|owl:intersectionOf|owl:Restriction|rdf:first|rdf:rest|owl:someValuesFrom|owl:onClass)+ ?sup_feat .
48+ ?sup_feat (owl:equivalentClass|rdfs:subClassOf)* ?anon .
49+ FILTER (CONTAINS (str(?iri ), "DOID") && CONTAINS (str(?sup_feat ), " DOID" ))
50+ FILTER NOT EXISTS { ?iri owl:deprecated true }
51+
52+ ?anon (owl:intersectionOf|owl:Restriction|rdf:first|rdf:rest|owl:someValuesFrom|owl:onClass)* ?asserted .
53+ }
54+ }
55+ }
56+ FILTER (
57+ !isBlank(?asserted ) &&
58+ !CONTAINS (str(?asserted ), " DOID" ) &&
59+ !(?asserted IN (owl:Thing, rdf:nil))
60+ )
61+
62+ # get disease info
63+ ?iri rdfs:label ?nm_lang .
64+ OPTIONAL { ?iri obo:IAO_0000115 ?def_lang }
65+
66+ # expand imported, related classes to include superclasses, excluding
67+ # placeholder doid: classes
68+ ?asserted rdfs:subClassOf* ?import_iri .
69+ ?import_iri rdfs:label ?import_lang ;
70+ rdfs:subClassOf* ?facet_iri .
71+ FILTER (!isBlank(?import_iri ) && !CONTAINS (str(?import_iri ), " doid" ))
72+
73+ # get facet label
74+ ?facet_iri rdfs:label ?facet_lang .
75+ # ensure facet name is root (including doid: placeholder labels)
76+ FILTER (!isBlank(?facet_iri ))
77+ FILTER (
78+ EXISTS { ?facet_iri rdfs:subClassOf owl:Thing } ||
79+ (
80+ NOT EXISTS { ?facet_iri rdfs:subClassOf ?any } &&
81+ NOT EXISTS { ?facet_iri owl:deprecated ?obsolete }
82+ )
83+ )
84+
85+ # format output
86+ BIND(CONCAT(" DOID:" , STRAFTER(str(?iri), " DOID_" )) AS ?id)
87+ BIND(REPLACE(str(?import_iri), " ^.*/([^/]+)[#_]" , " $1:" ) AS ?import_id)
88+ BIND(str(?nm_lang) AS ?name)
89+ BIND(str(?def_lang) AS ?definition)
90+ BIND(str(?import_lang) AS ?import_keyword)
91+ # REMOVE? (adjustment for facet names)
92+ BIND(
93+ IF(
94+ str(?facet_lang) = " cell" ,
95+ " cell_type" ,
96+ REPLACE(str(?facet_lang), " +" , " _" )
97+ ) AS ?facet)
98+ }
99+ UNION
100+ # get evidence codes annotated on definitions (not logically-related)
101+ {
102+ VALUES ?facet_iri { obo:ECO_0000000 }
103+ ?facet_iri rdfs:label ?facet_lang .
104+
105+ ?iri rdfs:label ?nm_lang ;
106+ obo:IAO_0000115 ?def_lang .
107+ FILTER(CONTAINS(str(?iri), " DOID" ))
108+ FILTER NOT EXISTS { ?iri owl:deprecated true }
109+
110+ ?axiom a owl:Axiom ;
111+ owl:annotatedSource ?iri ;
112+ owl:annotatedProperty obo:IAO_0000115 ;
113+ owl:annotatedTarget ?def_lang ;
114+ (dc:type|rdfs:subClassOf)* ?import_iri .
115+
116+ ?import_iri rdfs:label ?import_lang ;
117+ rdfs:subClassOf* ?facet_iri .
118+
119+ # format for output
120+ BIND(CONCAT(" DOID:" , STRAFTER(str(?iri), " DOID_" )) AS ?id)
121+ BIND(CONCAT(" ECO:" , STRAFTER(str(?import_iri), " ECO_" )) AS ?import_id)
122+ BIND(str(?nm_lang) AS ?name)
123+ BIND(str(?def_lang) AS ?definition)
124+ BIND(str(?import_lang) AS ?import_keyword)
125+ BIND(str(?facet_lang) AS ?facet)
126+ }
127+ }
128+ ORDER BY ?id ?facet ?import_id
0 commit comments