Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
8bc4a32
Fresh start for LDP Container
benosteen Aug 5, 2025
e7a7942
Initial plumbing in of the LDPContainer
benosteen Aug 7, 2025
5b4c285
Some fixes
benosteen Aug 7, 2025
1e5e33f
handling record rollback in graphstore
benosteen Aug 7, 2025
39fa61e
Refactoring fix to allow for easy container handling
benosteen Aug 7, 2025
8f916d4
Adding in the LDP Container autogeneration
benosteen Aug 7, 2025
547b609
Ensure the root container exists
benosteen Aug 8, 2025
afd0cd1
Fixing the testing pattern which creates app THEN db. It affected the…
benosteen Aug 8, 2025
9e75074
Aligning the postgres/migration
benosteen Aug 8, 2025
1de8dfc
Not sure why pcre is in here
benosteen Aug 14, 2025
8fcdd77
Adding the beginnings of the LDP docs
benosteen Aug 14, 2025
84d258c
Finished adding all the LDP BACKEND hooks and utils for the API
benosteen Aug 14, 2025
c2665e7
Fix segmetation + refresh capability to ingest + root LDP view
benosteen Aug 15, 2025
49d7740
Added LDP_API paginated GET Response for containers
benosteen Aug 15, 2025
6622e84
Making the container page etag depend on the page number too
benosteen Aug 20, 2025
0124762
Making the page and container etags are different
benosteen Aug 20, 2025
5f97a04
Cleaning up the LDP representation
benosteen Aug 27, 2025
f3e2e31
Adding in the base representation handling for POST/PUT resources
benosteen Aug 27, 2025
ab8adaf
If the JSON-LD gets prefixed to FQDN, remove the @context.@base if pr…
benosteen Aug 27, 2025
d60a8ed
Adding RDF parsing to framed JSON-LD capability for LDP
benosteen Aug 28, 2025
b2bcce6
Merge branch 'main' into feature-ldp
benosteen Aug 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -114,4 +114,7 @@ RDF_CONTEXT_CACHE_EXPIRES=30
# a .env file DOES. Very annoying.
# When defining environment variables in a file to be used with docker run --env-file, avoid quoting values unless you explicitly intend for the quotes to be part of the variable's content. If a value contains spaces or special characters, simply define it without quotes, as Docker will treat the entire string after the = as the value.

LDP_BACKEND=True
LDP_AUTOCREATE_CONTAINERS=True

CONTENT_PROFILE_DATA={"name": "Simplified Dublin Core", "description": "No description given.", "url": "https://raw.githubusercontent.com/thegetty/getty-jsonld-sparql-patterns/refs/heads/main/src/gettysparqlpatterns/data/crmtodublincore.json", "patterns": [{"name": "InformationObject as DC", "description": "Views a CRM InformationObject in terms of Simplified Dublin Core", "sparql_pattern": "# MOCKED DCMI RESPONSE\nPREFIX aat: <http://vocab.getty.edu/aat/>\nPREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\nPREFIX dc: <http://purl.org/dc/elements/1.1/>\nPREFIX dcterm: <http://purl.org/dc/terms/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n\n\nCONSTRUCT {\n <$URI> dc:identifier ?identifier ;\n dc:title ?title ;\n dc:description ?note ;\n dc:creator ?agent_name ;\n dc:date ?date_range ;\n dc:date ?date_expression ;\n dc:language ?lang ;\n dc:language ?lang_as_text ;\n dc:subject ?subject .\n ?subject rdfs:label ?subject_text .\n \n}\nWHERE {\nSELECT ?identifier ?title ?note ?subject ?subject_text ?date_range ?date_expression ?lang_as_text ?lang ?agent_name\n WHERE {\n GRAPH <$URI> {\n <$URI> crm:P1_is_identified_by ?id_block .\n ?id_block crm:P2_has_type ?id_type ;\n crm:P190_has_symbolic_content ?identifier_text \n BIND(CONCAT(?identifier_text, \" (\", STR(?id_type), \")\") as ?identifier) .\n <$URI> rdfs:label ?title .\n OPTIONAL {\n <$URI> crm:P72_has_language ?lang .\n ?lang rdfs:label ?lang_as_text\n }\n OPTIONAL {\n <$URI> crm:P129_is_about ?subject .\n ?subject a crm:E33_Linguistic_Object ;\n rdfs:label ?subject_text\n }\n OPTIONAL {\n <$URI> crm:P67i_is_referred_to_by ?notes .\n ?notes crm:P2_has_type ?note_type ;\n crm:P190_has_symbolic_content ?note_text \n BIND(CONCAT(\"(\", STR(?note_type), \") \", ?note_text) as ?note) .\n }\n OPTIONAL {\n ?prod_act a crm:E12_Production ;\n OPTIONAL {\n \t?prod_act crm:P9_consists_of ?activities .\n ?activities crm:P14_carried_out_by ?agent .\n ?agent rdfs:label ?agent_name .\n }\n OPTIONAL {\n ?prod_act crm:P4_has_time-span ?timespan .\n ?timespan crm:P82a_begin_of_the_begin ?begin ;\n \t\t\t crm:P82b_end_of_the_end ?end \n OPTIONAL {\n BIND(CONCAT(STR(?begin), \" - \", STR(?end)) as ?date_range)\n ?timespan crm:P1_is_identified_by ?expression .\n ?expression a crm:E33_E41_Linguistic_Appellation ;\n \t\t\t\t crm:P190_has_symbolic_content ?date_expression ;\n }\n }\n }\n }\n}\n}", "stype": "construct", "keyword_parameters": ["URI"], "default_values": {}, "applies_to": ["InformationObject"], "ask_filter": null, "framing": null, "profile_uri": "urn:getty:dublincore"}, {"name": "LinguisticObject to dublincore", "description": "No description given", "sparql_pattern": "PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\nPREFIX dc: <http://purl.org/dc/elements/1.1/>\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\nCONSTRUCT {\n <$URI> dc:title ?title ;\n dc:description ?content ;\n dc:type ?type ;\n dc:type ?type_name .\n} WHERE {\n SELECT ?title ?content ?type ?type_name WHERE {\n <$URI> a crm:E33_Linguistic_Object ;\n rdfs:label ?title ;\n crm:P2_has_type ?type ;\n crm:P190_has_symbolic_content ?content .\n ?type rdfs:label ?type_name .\n }\n}", "stype": "construct", "keyword_parameters": ["URI"], "default_values": {}, "applies_to": ["LinguisticObject"], "ask_filter": null, "framing": null, "profile_uri": "urn:getty:dublincore"}]}
2 changes: 1 addition & 1 deletion documentation/content_negotiation.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Content Negotiation

([back to ToC](/README.md))
## Overview

The LOD Gateway when configured to support RDF Processing provides support for both standard HTTP Content Negotiation of mimetype as well as data-specific support for [Content Negotiation by Profile from the W3C](https://www.w3.org/TR/dx-prof-onneg/) (aka CNBP).
Expand Down
30 changes: 30 additions & 0 deletions documentation/ldp.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Linked Data Platform Backend and API
([back to ToC](/README.md))
## Overview
The [Linked Data Platform (LDP)](https://www.w3.org/TR/ldp/#ldpc) is a W3C standard that defines a set of rules for interacting with web resources using HTTP, enabling a read-write Linked Data architecture on the web. It provides a way to access, create, update, and delete RDF resources over HTTP in a standardized manner, facilitating data integration and interoperability. LDP builds upon the principles of Linked Data, using URIs as names for things, providing useful information when a URI is looked up, including links to other URIs.

One of the core concepts is the idea of a 'container' which contains zero or more Linked Data resources as a set. A container may have resources added or removed from it, and when resolving its URI, it should be possible to find out more about the container as well as list every resource it contains. A container may contain almost anything that can be referenced with a URI.

When the LDP features are enabled, the LOD Gateway is extended with container structures, and the service itself is viewed as having a single 'root' container, that everything else (resources and other containers) is part of. The API is also extended, to allow for a subset of the LDP API functionality as defined in the specification.

### Limited Support for the Full LDP Specification
The LOD Gateway and the LDP specification are based around different ideas on how to handle and store Linked Data:
- The LOD Gateway is built around a CRUD API for JSON documents and extends that by adding optional Named Graph JSON-LD RDF capabilities, Content Negotiation, SPARQL, Activitystreams and fast bulk ingest and updating.
- LDP is a much broader specification which can involve both RDF and Non-RDF resources such as images, and specifies the concepts and API for containers. The focus is on the REST API that is required to manage these resources and containers.

The LOD Gateway implementation supports a narrower portion of the full LDP specification:

- The LOD Gateway LDP support is dependent on RDF support also being enabled (PROCESS_RDF=True)
- The LOD Gateway root path is the root in the container hierarchy.
- The Container hierarchy will mirror the URL path. eg /annotations/test/12345 will have the following container hierarchy:
- root '/' <-member- '/annotations/' <-member- '/annotations/test/' <-member '/annotations/test/12345'
- A Container will contain either other containers, or named graphs (the JSON-LD documents).
- Adding binary or Non-RDF content will be not supported
- The LOD Gateway will use ldp:BasicContainers and only supports dc:title and dc:description fields for their user-editable metadata.
- Pagination using the [LDP-PAGING specification](https://www.w3.org/TR/ldp-paging/) will be the default, and the members will be listed containers-first and then the member JSON-LD resources. This will be an ordered but dynamic response, so page responses may vary after resources are deleted.
- This is usefully different to the existing static activitystream responses, as the container listing will only contain members that are part of that container at the time of querying, and will not list deleted members.
- Containers and Named Graphs must be added to an existing container resource, or the request will fail:
- A POST request to a container, using the Slug header to specify an optional relative identifier.
- A PUT request to a specific path. The Slug will be inferred from the path.

NB There is an optional feature that can be enabled to make the LOD Gateway automatically create the necessary container structure if it does not exist.
54 changes: 52 additions & 2 deletions source/web-service/flaskapp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from flaskapp.models import db
from flaskapp.models.activity import Activity
from flaskapp.models.record import Record
from flaskapp.models.container import LDPContainer, LDPContainerContents
from flaskapp import local_thesaurus
from flaskapp.base_graph_utils import base_graph_filter, document_loader

Expand Down Expand Up @@ -131,6 +132,10 @@ def create_app():
app.config["RDF_FILTER_SET"] = None
app.config["CONTENT_PROFILE_DATA_URL"] = None
app.config["CONTENT_PROFILE_PATTERNS"] = {}
app.config["LDP_BACKEND"] = False
app.config["LDP_API"] = False
app.config["LDP_AUTOCREATE_CONTAINERS"] = False
app.config["LDP_VALIDATE_SLUGS"] = False

# Set up RDF/Content Profile defaults:
app.config["USE_PYLD_REFORMAT"] = True
Expand All @@ -143,6 +148,42 @@ def create_app():
app.config["SPARQL_QUERY_ENDPOINT"] = environ["SPARQL_QUERY_ENDPOINT"]
app.config["SPARQL_UPDATE_ENDPOINT"] = environ["SPARQL_UPDATE_ENDPOINT"]

# LDP Support (NB all dependent on PROCESS_RDF being true)
# LDP_BACKEND -> This flag controls the backend bookkeeping where the container lists are kept
# up-to-date based on record changes (add or delete).
# LDP_API -> This flag switches on the LDP support in the API (Link headers, API endpoint behaviors and responses)
# Also dependent on the LDP_BACKEND flag
if ldp_backend := (environ.get("LDP_BACKEND", "False").lower() == "true"):
# LDP_API is dependent on the backend being switched on as well
app.config["LDP_BACKEND"] = ldp_backend

# Switch on the LDP API? This turns on both the advertising using Link headers, and also the routing support
# for containers, paging through their contents, and adding/updating resources and containers
app.config["LDP_API"] = environ.get("LDP_API", "False").lower() == "true"

# Should the backend automatically instantiate containers when a record is put through the /ingest route?
# eg if True, Adding '/objects/magazines/123456' will create an LDP container for '/objects' and for
# '/objects/magazines' with the parent-child relationship you'd expect, and add the resource
# '/objects/magazines/123456' to the container '/objects/magazines'
# If False, adding a resource through /ingest without the expected containers in place will fail, which will mimic
# the behavior of the LDP API if you attempt to add a resource to a container that does not exist
app.config["LDP_AUTOCREATE_CONTAINERS"] = (
environ.get("LDP_AUTOCREATE_CONTAINERS", "False").lower() == "true"
)

# Perform an extra step to validate that the generated slug for a resource doesn't exist in the container yet?
app.config["LDP_VALIDATE_SLUGS"] = (
environ.get("LDP_VALIDATE_SLUGS", "False").lower() == "true"
)

app.logger.info(
f"LDP Support: Backend active? {ldp_backend}, LDP API active? {app.config['LDP_API']}, Autocreate Containers on /ingest? {app.config['LDP_AUTOCREATE_CONTAINERS']}"
)
elif environ.get("LDP_API", "False").lower() == "true":
app.logger.error(
"LDP_API was set to True BUT LDP_BACKEND was not. LDP_API will NOT BE ACTIVE!"
)

# Content Profiles - by URL resource
app.config["CONTENT_PROFILE_DATA_URL"] = environ.get("CONTENT_PROFILE_DATA_URL")
# or by JSON-encoded env variable:
Expand Down Expand Up @@ -304,6 +345,13 @@ def create_app():
app.config["RDF_BASE_GRAPH"], app.config["FULL_BASE_GRAPH"]
)

# Are we able to use postgresql optimizations?
engine = db.engine
if engine.dialect.name == "postgresql":
app.config["DB_DIALECT"] = "postgresql"
else:
app.config["DB_DIALECT"] = "base"

app.config["SERVER_CAPABILITIES"] = (
", ".join(
[
Expand All @@ -314,6 +362,8 @@ def create_app():
("SUBADDRESSING", "Subaddressing"),
("KEEP_LAST_VERSION", "Versioning"),
("CONTENT_PROFILE_PATTERNS_AVAILABLE", "Content Profiles"),
("LDP_BACKEND", "LDP Container Backend"),
("LDP_API", "LDP API Support"),
]
if app.config.get(k)
]
Expand All @@ -326,9 +376,9 @@ def create_app():
if link_bank_str:
try:
app.config["LINK_BANK"] = json.loads(link_bank_str)
except json.decoder.JSONDecodeError as e:
except json.decoder.JSONDecodeError:
app.logger.error(
f"The data in ENV: 'LINK_BANK' is not valid JSON! Will not load Link Bank values"
"The data in ENV: 'LINK_BANK' is not valid JSON! Will not load Link Bank values"
)

app.register_blueprint(home_page, url_prefix=f"/{ns}")
Expand Down
4 changes: 2 additions & 2 deletions source/web-service/flaskapp/base_graph_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ def base_graph_filter(basegraphobj, fqdn_id):
try:
record = get_record(basegraphobj)

if record and record.data:
if record and "record" in record and record["record"].data:
# only change the named graph to be a FQDN
data = dict(record.data)
data = dict(record["record"].data)
else:
current_app.logger.warning(
f"No base graph was present at {basegraphobj} - adding an empty base graph."
Expand Down
83 changes: 82 additions & 1 deletion source/web-service/flaskapp/conneg.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import re
import json
import requests

from pyld import jsonld

from werkzeug.http import parse_accept_header

# type hint imports
Expand All @@ -9,7 +12,13 @@

from flaskapp.errors import status_graphstore_error, status_nt

from .graph_prefix_bindings import FORMATS
from .graph_prefix_bindings import (
FORMATS,
get_bound_graph,
get_frame,
BASE_FRAME_CONTEXT,
)
from .utilities import triples_to_quads

# Trying to use a regex to parse out a profile="" statement from the Accept header
# Not in use yet, but is close to workable so keeping this here.
Expand Down Expand Up @@ -150,3 +159,75 @@ def get_data_using_profile_query(
except Exception as e:
print(f"Hit unexpected Exception {str(e)}")
raise e


def reformat_rdf(data, shortformat="turtle", use_pyld=True, rdf_docloader=None):
if shortformat == "json-ld":
# Assume data is *already* JSON-LD
return data
if use_pyld is True:
# Use the PyLD library to parse into nquads, and rdflib to convert
# rdflib's json-ld import has not been tested on our data, so not relying on it
proc = jsonld.JsonLdProcessor()
serialized_rdf = proc.to_rdf(
data,
{
"format": "application/n-quads",
"documentLoader": rdf_docloader,
},
)

ident = data.get("id") or data.get("@id")

# rdflib to load and format the nquads
# forcing it, because of pyld's awful nquad export
g = get_bound_graph(identifier=ident)

# May not be nquads, even though we requested it:
serialized_rdf = triples_to_quads(serialized_rdf, ident)

g.parse(data=serialized_rdf, format="nquads")
data = g.serialize(format=shortformat)
return data
else:
ident = data.get("id") or data.get("@id")

# using rdflib to both parse and re-serialize the RDF:
g = get_bound_graph(identifier=ident)

g.parse(data=json.dumps(data), format="json-ld")
data = g.serialize(format=shortformat)
# blank out the etag for now
return data


def reformat_to_jsonld(data, incoming_format, target_base=None, top_level_id=None):
if incoming_format == "json-ld":
return data

# Rebase to target
if incoming_format in ["turtle", "n3"] and target_base is not None:
# rewrite the incoming base to the new one
# the json-ld property setter will validate and finish the rebase properly
data = "\n".join(
[f"@base <{target_base}> ."]
+ [x for x in data.split("\n") if not x.startswith("@base")]
)

# using rdflib to both parse and re-serialize the RDF:
g = get_bound_graph(identifier=top_level_id)
g.parse(data=data, format=incoming_format)

# data = g.serialize(format="json-ld", base=target_base, compact=True)
data = g.serialize(format="json-ld", compact=True)

# load it as JSON
return json.loads(data)


def frame_jsonld(data, target_uri):
# This will be used on data pulled in via turtle or similar, not JSON-LD
frame = get_frame(target_uri)
expanded = jsonld.expand(data)
framed = jsonld.frame(expanded, frame)
return jsonld.compact(framed, BASE_FRAME_CONTEXT)
12 changes: 12 additions & 0 deletions source/web-service/flaskapp/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
status_record_not_found = status_nt(
404, "Record Not Found", "Unable to obtain matching record from database"
)
status_container_not_found = status_nt(
404, "Container Not Found", "Unable to find container in database"
)

status_page_not_found = status_nt(404, "Page Not Found", "Page number out of bounds")

Expand Down Expand Up @@ -50,6 +53,15 @@
)


class RDFDataError(ValueError):
"""Error representing a general problem treating some data as RDF"""


class ResourceValidationError(RDFDataError):
"""An error occurred trying to validate the given resource. This can happen if say,
the JSONLD doc has a @base but its top level 'id' has a scheme like https/urn/etc"""


# Construct 'error response' object
def construct_error_response(status, source: int = None, detail: str = None):

Expand Down
21 changes: 21 additions & 0 deletions source/web-service/flaskapp/graph_prefix_bindings.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from rdflib import ConjunctiveGraph, Namespace

from rdflib.namespace import DC, DCTERMS


BINDING = {
"crm": Namespace("http://www.cidoc-crm.org/cidoc-crm/"),
Expand All @@ -17,6 +19,14 @@
"prov": Namespace("http://www.w3.org/ns/prov#"),
}

# For items uploaded outside of JSON-LD
BASE_FRAME_CONTEXT = {
"id": "@id",
"type": "@type",
}
for k, v in BINDING.items():
BASE_FRAME_CONTEXT[k] = str(v)

FORMATS = {
# RDF triple formats
"application/n-triples; charset=UTF-8": "nt11",
Expand All @@ -33,8 +43,19 @@
}


# Basic framing, anticipating a single top-level URI
def get_frame(identifier):
return {
"@context": BASE_FRAME_CONTEXT,
"@id": identifier,
"@embed": "@always",
}


def get_bound_graph(identifier):
g = ConjunctiveGraph(identifier=identifier)
g.bind("dc", DC)
g.bind("dcterm", DCTERMS)
for k, v in BINDING.items():
g.bind(k, v)
return g
Loading