Skip to content

Expand sparql query for extracting spatial coverage info from schema.org documents #100

Open
@iannesbitt

Description

@iannesbitt

The existing methods of extracting of geographical information from schema.org documents does not meet the standards of the SOSO guidelines on describing Spatial Coverage. The beans that do this work are highlighted below. At the moment, it seems like we only support spatialCoverage sections similar to the following:

    "spatialCoverage": {
        "@type": "Place",
        "geo": {
            "@type": "GeoShape",
            "box": "-5.5000 -92.6000 1.8000 -74.5000"
        }
    },

<!-- Extract bounding box coordinates from a 'SO:spatialCoverage' property. -->
<bean id="schema_org_geoShape_box_south" class="org.dataone.cn.indexer.annotation.SparqlField">
<constructor-arg name="name" value="southBoundCoord" />
<constructor-arg name="query">
<value>
<![CDATA[
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX SO: <http://schema.org/>
SELECT ?southBoundCoord
WHERE {
?datasetId rdf:type SO:Dataset .
?datasetId SO:spatialCoverage ?spatial .
?spatial rdf:type SO:Place .
?spatial SO:geo ?geo .
?geo rdf:type SO:GeoShape .
?geo SO:box ?box .
# Extract south coord, replacing commas and multiple whitespaces to single whitespaces
# to simplify parsing
bind(strbefore(replace(str(?box), "\\s*,\\s*|\\s{2,}", " "), " ") as ?southBoundCoord)
}
limit 1
]]>
</value>
</constructor-arg>
<property name="converter" ref="solrLatitudeConverter" />
</bean>
<!-- Extract bounding box coordinates from a 'SO:spatialCoverage' property. -->
<bean id="schema_org_geoShape_box_west" class="org.dataone.cn.indexer.annotation.SparqlField">
<constructor-arg name="name" value="westBoundCoord" />
<constructor-arg name="query">
<value>
<![CDATA[
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX SO: <http://schema.org/>
SELECT ?westBoundCoord
WHERE {
?datasetId rdf:type SO:Dataset .
?datasetId SO:spatialCoverage ?spatial .
?spatial rdf:type SO:Place .
?spatial SO:geo ?geo .
?geo rdf:type SO:GeoShape .
?geo SO:box ?box .
bind(strbefore(replace(str(?box), "\\s*,\\s*|\\s{2,}", " "), " ") as ?southBoundCoord)
bind(strafter(replace(str(?box), "\\s*,\\s*|\\s{2,}", " "), " ") as ?rest)
bind(strbefore(str(?rest), " ") as ?westBoundCoord)
}
limit 1
]]>
</value>
</constructor-arg>
<property name="converter" ref="solrLongitudeConverter" />
</bean>
<!-- Extract bounding box coordinates from a 'SO:spatialCoverage' property. -->
<bean id="schema_org_geoShape_box_north" class="org.dataone.cn.indexer.annotation.SparqlField">
<constructor-arg name="name" value="northBoundCoord" />
<constructor-arg name="query">
<value>
<![CDATA[
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX SO: <http://schema.org/>
SELECT ?northBoundCoord
WHERE {
?datasetId rdf:type SO:Dataset .
?datasetId SO:spatialCoverage ?spatial .
?spatial rdf:type SO:Place .
?spatial SO:geo ?geo .
?geo rdf:type SO:GeoShape .
?geo SO:box ?box .
bind(strbefore(replace(str(?box), "\\s*,\\s*|\\s{2,}", " "), " ") as ?southBoundCoord)
bind(strafter(replace(str(?box), "\\s*,\\s*|\\s{2,}", " "), " ") as ?rest)
bind(strbefore(str(?rest), " ") as ?westBoundCoord)
bind(strafter(str(?rest), " ") as ?rest2)
bind(strbefore(str(?rest2), " ") as ?northBoundCoord)
}
limit 1
]]>
</value>
</constructor-arg>
<property name="converter" ref="solrLatitudeConverter" />
</bean>
<!-- Extract bounding box coordinates from a 'SO:spatialCoverage' property. -->
<bean id="schema_org_geoShape_box_east" class="org.dataone.cn.indexer.annotation.SparqlField">
<constructor-arg name="name" value="eastBoundCoord" />
<constructor-arg name="query">
<value>
<![CDATA[
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX SO: <http://schema.org/>
SELECT ?eastBoundCoord
WHERE {
?datasetId rdf:type SO:Dataset .
?datasetId SO:spatialCoverage ?spatial .
?spatial rdf:type SO:Place .
?spatial SO:geo ?geo .
?geo rdf:type SO:GeoShape .
?geo SO:box ?box .
bind(strbefore(replace(str(?box), "\\s*,\\s*|\\s{2,}", " "), " ") as ?southBoundCoord)
bind(strafter(replace(str(?box), "\\s*,\\s*|\\s{2,}", " "), " ") as ?rest)
bind(strbefore(str(?rest), " ") as ?westBoundCoord)
bind(strafter(str(?rest), " ") as ?rest2)
bind(strbefore(str(?rest2), " ") as ?northBoundCoord)
bind(strafter(str(?rest2), " ") as ?eastBoundCoord)
}
limit 1
]]>
</value>
</constructor-arg>
<property name="converter" ref="solrLongitudeConverter" />
</bean>

Since we claim to support all of the representations of schema.org outlined in the SOSO guidelines, we need to be able to support a broader range of spatialCoverage information as outlined in that document. For example, the following is a valid representation from a dataset with a point location, which I think should also be fairly straightforward to support:

  "spatialCoverage": {
    "@type": "Place",
    "name": "BioBasis Nuuk - Monitoring lakes",
    "geo": {
      "@type": "GeoCoordinates",
      "latitude": "64.13",
      "longitude": "-51.38",
      "name": "BioBasis Nuuk - Monitoring lakes"
    },
    "additionalProperty": {
      "@type": [ "PropertyValue", "http://www.wikidata.org/entity/Q4018860" ],
      "name": "WKT",
      "value": "POINT (-51.38 64.13)",
      "valueReference": [
        {
          "@type": [ "PropertyValue", "http://www.wikidata.org/entity/Q31385480" ],
          "name": "datatype",
          "value": "http://www.opengis.net/ont/geosparql#wktLiteral"
        },
        {
          "@type": [ "PropertyValue", "http://www.wikidata.org/entity/Q161779" ],
          "name": "SRS",
          "alternateName": "Spatial Reference System",
          "value": "http://www.opengis.net/def/crs/EPSG/0/4326"
        }
      ]
    }
  },

I have no experience with SPARQL apart from looking through the resource document referenced above so would be coming at this from square zero...if someone else has some idea of what they're doing, I would appreciate some guidance :)

Metadata

Metadata

Assignees

Labels

bugSomething isn't workinghelp wantedExtra attention is needed

Type

No type

Projects

No projects

Relationships

None yet

Development

No branches or pull requests

Issue actions