Skip to content

Commit 84b5391

Browse files
committed
Add VoID SPARQL endpoint link filter extractor
1 parent 64632e2 commit 84b5391

8 files changed

Lines changed: 263 additions & 0 deletions

File tree

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"@context": [
3+
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/runner/^4.0.0/components/context.jsonld",
4+
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-metadata-extract-link-filter-void/^0.0.0/components/context.jsonld"
5+
],
6+
"@id": "urn:comunica:default:Runner",
7+
"@type": "Runner",
8+
"actors": [
9+
{
10+
"@id": "urn:comunica:default:rdf-metadata-extract/actors#link-filter-void",
11+
"@type": "ActorRdfMetadataExtractLinkFilterVoid"
12+
}
13+
]
14+
}

engines/query-sparql-link-traversal-solid/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@
264264
"@comunica/actor-rdf-metadata-extract-hydra-controls": "^4.1.0",
265265
"@comunica/actor-rdf-metadata-extract-hydra-count": "^4.1.0",
266266
"@comunica/actor-rdf-metadata-extract-hydra-pagesize": "^4.1.0",
267+
"@comunica/actor-rdf-metadata-extract-link-filter-void": "^0.6.0",
267268
"@comunica/actor-rdf-metadata-extract-patch-sparql-update": "^4.1.0",
268269
"@comunica/actor-rdf-metadata-extract-put-accepted": "^4.1.0",
269270
"@comunica/actor-rdf-metadata-extract-request-time": "^4.1.0",

engines/query-sparql-link-traversal/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,7 @@
260260
"@comunica/actor-rdf-metadata-extract-hydra-controls": "^4.1.0",
261261
"@comunica/actor-rdf-metadata-extract-hydra-count": "^4.1.0",
262262
"@comunica/actor-rdf-metadata-extract-hydra-pagesize": "^4.1.0",
263+
"@comunica/actor-rdf-metadata-extract-link-filter-void": "^0.6.0",
263264
"@comunica/actor-rdf-metadata-extract-patch-sparql-update": "^4.1.0",
264265
"@comunica/actor-rdf-metadata-extract-put-accepted": "^4.1.0",
265266
"@comunica/actor-rdf-metadata-extract-request-time": "^4.1.0",
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Comunica VoID Link Filter RDF Metadata Extract Actor
2+
3+
[![npm version](https://badge.fury.io/js/%40comunica%2Factor-rdf-metadata-extract-link-filter-void.svg)](https://www.npmjs.com/package/@comunica/actor-rdf-metadata-extract-link-filter-void)
4+
5+
An [RDF Metadata Extract](https://github.com/comunica/comunica/tree/master/packages/bus-rdf-metadata-extract) actor that
6+
creates link filters based on [VoID descriptions](https://www.w3.org/TR/void/) to filter out redundant document links for datasets that have a SPARQL endpoint.
7+
The filters are added to the context filter list.
8+
9+
This module is part of the [Comunica framework](https://github.com/comunica/comunica),
10+
and should only be used by [developers that want to build their own query engine](https://comunica.dev/docs/modify/).
11+
12+
[Click here if you just want to query with Comunica](https://comunica.dev/docs/query/).
13+
14+
## Install
15+
16+
```bash
17+
$ yarn add @comunica/actor-rdf-metadata-extract-link-filter-void
18+
```
19+
20+
## Configure
21+
22+
After installing, this package can be added to your engine's configuration as follows:
23+
```json
24+
{
25+
"@context": [
26+
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-rdf-metadata-extract-link-filter-void/^0.0.0/components/context.jsonld"
27+
],
28+
"actors": [
29+
{
30+
"@id": "urn:comunica:default:rdf-metadata-extract/actors#link-filter-void",
31+
"@type": "ActorRdfMetadataExtractLinkFilterVoid"
32+
}
33+
]
34+
}
35+
```
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import type { MediatorExtractLinks } from '@comunica/bus-extract-links';
2+
import type {
3+
IActionRdfMetadataExtract,
4+
IActorRdfMetadataExtractOutput,
5+
IActorRdfMetadataExtractArgs,
6+
} from '@comunica/bus-rdf-metadata-extract';
7+
import { ActorRdfMetadataExtract } from '@comunica/bus-rdf-metadata-extract';
8+
import { KeysRdfResolveHypermediaLinks } from '@comunica/context-entries-link-traversal';
9+
import type { IActorTest, TestResult } from '@comunica/core';
10+
import { passTestVoid, failTest } from '@comunica/core';
11+
import type { ILink } from '@comunica/types';
12+
import type { LinkFilterType } from '@comunica/types-link-traversal';
13+
import type * as RDF from '@rdfjs/types';
14+
15+
/**
16+
* Comunica RDF metadata extract actor to collect link filters from VoID descriptions.
17+
*/
18+
export class ActorRdfMetadataExtractLinkFilterVoid extends ActorRdfMetadataExtract {
19+
private readonly mediatorExtractLinks: MediatorExtractLinks;
20+
21+
public constructor(args: IActorRdfMetadataExtractArgs) {
22+
super(args);
23+
}
24+
25+
public async test(action: IActionRdfMetadataExtract): Promise<TestResult<IActorTest>> {
26+
if (!action.context.has(KeysRdfResolveHypermediaLinks.linkFilters)) {
27+
return failTest('unable to extract link filters without context storage target present');
28+
}
29+
return passTestVoid();
30+
}
31+
32+
public async run(action: IActionRdfMetadataExtract): Promise<IActorRdfMetadataExtractOutput> {
33+
return new Promise<IActorRdfMetadataExtractOutput>((resolve, reject) => {
34+
const datasetsWithEndpoint = new Set<string>();
35+
const uriRegexPatterns: Record<string, RegExp> = {};
36+
const uriSpaces: Record<string, string> = {};
37+
38+
action.metadata
39+
.on('error', reject)
40+
.on('data', (quad: RDF.Quad) => {
41+
switch (quad.predicate.value) {
42+
case 'http://rdfs.org/ns/void#sparqlEndpoint':
43+
datasetsWithEndpoint.add(quad.subject.value);
44+
break;
45+
case 'http://rdfs.org/ns/void#uriSpace':
46+
uriSpaces[quad.subject.value] = quad.object.value;
47+
break;
48+
case 'http://rdfs.org/ns/void#uriRegexPattern':
49+
uriRegexPatterns[quad.subject.value] = new RegExp(quad.object.value, 'u');
50+
break;
51+
}
52+
})
53+
.on('end', () => {
54+
const linkFilters = action.context.getSafe(KeysRdfResolveHypermediaLinks.linkFilters);
55+
56+
// Find out which datasets have both endpoint and URI filter available,
57+
// and create the corresponding link filters in the action context storage
58+
for (const datasetUri of datasetsWithEndpoint) {
59+
let linkFilter: LinkFilterType | undefined;
60+
if (uriSpaces[datasetUri]) {
61+
linkFilter = (link: ILink) => !link.url.startsWith(uriSpaces[datasetUri]);
62+
} else if (uriRegexPatterns[datasetUri]) {
63+
linkFilter = (link: ILink) => !uriRegexPatterns[datasetUri].test(link.url);
64+
}
65+
if (linkFilter) {
66+
linkFilters.push(linkFilter);
67+
this.logDebug(action.context, 'Extracted link filter from VoID', () => ({
68+
dataset: datasetUri,
69+
uriSpace: uriSpaces[datasetUri],
70+
uriRegexPattern: uriRegexPatterns[datasetUri],
71+
}));
72+
}
73+
}
74+
75+
// Return something that meets the output criteria
76+
resolve({ metadata: {}});
77+
});
78+
});
79+
}
80+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
export * from './ActorRdfMetadataExtractLinkFilterVoid';
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
{
2+
"name": "@comunica/actor-rdf-metadata-extract-link-filter-void",
3+
"version": "0.6.0",
4+
"description": "An actor to extract link filters based on VoID descriptions",
5+
"lsd:module": true,
6+
"license": "MIT",
7+
"funding": {
8+
"type": "opencollective",
9+
"url": "https://opencollective.com/comunica-association"
10+
},
11+
"homepage": "https://comunica.dev/",
12+
"repository": {
13+
"type": "git",
14+
"url": "https://github.com/comunica/comunica-feature-link-traversal.git",
15+
"directory": "packages/actor-rdf-metadata-extract-link-filter-void"
16+
},
17+
"bugs": {
18+
"url": "https://github.com/comunica/comunica-feature-link-traversal/issues"
19+
},
20+
"keywords": [
21+
"comunica",
22+
"actor",
23+
"rdf-metadata-link-filter-void"
24+
],
25+
"main": "lib/index.js",
26+
"typings": "lib/index",
27+
"publishConfig": {
28+
"access": "public"
29+
},
30+
"files": [
31+
"components",
32+
"lib/**/*.d.ts",
33+
"lib/**/*.js",
34+
"lib/**/*.js.map"
35+
],
36+
"scripts": {
37+
"build": "yarn run build:ts && yarn run build:components",
38+
"build:ts": "node \"../../node_modules/typescript/bin/tsc\"",
39+
"build:components": "componentsjs-generator"
40+
},
41+
"dependencies": {
42+
"@comunica/bus-extract-links": "^0.6.0",
43+
"@comunica/bus-rdf-metadata-extract": "^4.1.0",
44+
"@comunica/context-entries-link-traversal": "^0.6.0",
45+
"@comunica/core": "^4.1.0",
46+
"@comunica/types": "^4.1.0",
47+
"@comunica/types-link-traversal": "^0.6.0",
48+
"@rdfjs/types": "*"
49+
}
50+
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
import { KeysRdfResolveHypermediaLinks } from '@comunica/context-entries-link-traversal';
2+
import { Bus, ActionContext } from '@comunica/core';
3+
import type { LinkFilterType } from '@comunica/types-link-traversal';
4+
import { DataFactory } from 'rdf-data-factory';
5+
import '@comunica/utils-jest';
6+
7+
import { ActorRdfMetadataExtractLinkFilterVoid } from '../lib/ActorRdfMetadataExtractLinkFilterVoid';
8+
9+
const streamifyArray = require('streamify-array');
10+
11+
const DF = new DataFactory();
12+
13+
const voidUriSpace = DF.namedNode('http://rdfs.org/ns/void#uriSpace');
14+
const voidUriRegexPattern = DF.namedNode('http://rdfs.org/ns/void#uriRegexPattern');
15+
const voidSparqlEndpoint = DF.namedNode('http://rdfs.org/ns/void#sparqlEndpoint');
16+
17+
describe('ActorRdfMetadataExtractLinkFilterVoid', () => {
18+
let bus: any;
19+
let actor: ActorRdfMetadataExtractLinkFilterVoid;
20+
let context: ActionContext;
21+
let linkFilters: LinkFilterType[];
22+
23+
beforeEach(() => {
24+
jest.resetAllMocks();
25+
bus = new Bus({ name: 'bus' });
26+
actor = new ActorRdfMetadataExtractLinkFilterVoid({ bus, name: 'actor' });
27+
(<any>actor).logDebug = (_context: any, _message: string, extraDataFn: () => any) => extraDataFn();
28+
linkFilters = [];
29+
context = new ActionContext({ [KeysRdfResolveHypermediaLinks.linkFilters.name]: linkFilters });
30+
});
31+
32+
describe('test', () => {
33+
it('should pass with filter storage in context', async() => {
34+
await expect(actor.test({
35+
context,
36+
metadata: <any>{},
37+
requestTime: 0,
38+
url: 'url',
39+
})).resolves.toPassTestVoid();
40+
});
41+
42+
it('should fail without filter storage in context', async() => {
43+
await expect(actor.test({
44+
context: new ActionContext(),
45+
metadata: <any>{},
46+
requestTime: 0,
47+
url: 'url',
48+
})).resolves.toFailTest('unable to extract link filters without context storage target present');
49+
});
50+
});
51+
52+
describe('run', () => {
53+
it('should parse filters from void:uriSpace', async() => {
54+
const subjectWithEndpoint = DF.blankNode();
55+
const subjectWithoutEndpoint = DF.blankNode();
56+
const metadata = streamifyArray([
57+
DF.quad(subjectWithEndpoint, voidSparqlEndpoint, DF.literal('http://localhost/endpoint')),
58+
DF.quad(subjectWithEndpoint, voidUriSpace, DF.literal('http://localhost/')),
59+
DF.quad(subjectWithoutEndpoint, voidUriSpace, DF.literal('http://otherhost/')),
60+
]);
61+
await expect(actor.run(<any>{ metadata, context })).resolves.toEqual({ metadata: {}});
62+
expect(linkFilters).toHaveLength(1);
63+
expect(linkFilters[0]({ url: 'http://localhost/some/uri' })).toBeFalsy();
64+
expect(linkFilters[0]({ url: 'http://otherhose/some/uri' })).toBeTruthy();
65+
});
66+
67+
it('should parse filters from void:uriRegexPattern', async() => {
68+
const subjectWithEndpoint = DF.blankNode();
69+
const subjectWithoutEndpoint = DF.blankNode();
70+
const metadata = streamifyArray([
71+
DF.quad(subjectWithEndpoint, voidSparqlEndpoint, DF.literal('http://localhost/endpoint')),
72+
DF.quad(subjectWithEndpoint, voidUriRegexPattern, DF.literal('^http://localhost/')),
73+
DF.quad(subjectWithoutEndpoint, voidUriRegexPattern, DF.literal('^http://otherhost/')),
74+
]);
75+
await expect(actor.run(<any>{ metadata, context })).resolves.toEqual({ metadata: {}});
76+
expect(linkFilters).toHaveLength(1);
77+
expect(linkFilters[0]({ url: 'http://localhost/some/uri' })).toBeFalsy();
78+
expect(linkFilters[0]({ url: 'http://otherhose/some/uri' })).toBeTruthy();
79+
});
80+
});
81+
});

0 commit comments

Comments
 (0)