Skip to content

Commit 7972b61

Browse files
authored
Introducing a new parameter to get additional metadata (#1926)
* extract SotD and abstract if additionaMetadata=true is specified * describe additional metadata * fix parameter name * skip section title for abstract
1 parent 9959469 commit 7972b61

File tree

8 files changed

+96
-21
lines changed

8 files changed

+96
-21
lines changed

README.md

+21-16
Original file line numberDiff line numberDiff line change
@@ -178,26 +178,28 @@ Once the [event `end-all`](#validation-events) is emitted, the metadata should b
178178
The `options` accepted are equal to those in `validate()`, except that a `profile` is not necessary and will be ignored (finding out the profile is one of the
179179
goals of this method).
180180

181-
`this.meta` will be an `Object` and may include up to 16 properties described below:
181+
`this.meta` will be an `Object` and may include up to 20 properties described below:
182182

183183
- `profile`
184-
- `title`: The (possible) title of the document.
185-
- `docDate`: The date associated to the document.
186-
- `thisVersion`: URL of this version of the document.
187-
- `latestVersion`: URL of the latest version of the document.
188-
- `previousVersion`: URL of the previous version of the document (the last one, if multiple are shown).
189-
- `editorsDraft`: URL of the latest editor's draft.
190-
- `delivererIDs`: ID(s) of the deliverer(s); an `Array` of `Number`s.
191-
- `editorIDs`: ID(s) of the editor(s) responsible for the document; an `Array` of `Number`s.
192-
- `informative`: Whether the document in informative or not.
193-
- `process`: The process rules link.
194-
- `sameWorkAs`: The previous shortlink if any.
195-
- `implementationFeedbackDue`: The implementation review date for CRs.
196-
- `prReviewsDue`: The review date for PRs.
197-
- `implementationReport`: Implementation report link for CRs, PRs and RECs.
198-
- `errata`: The errata link of the document.
184+
- `title`: The (possible) title of the document
185+
- `docDate`: The date associated to the document
186+
- `thisVersion`: URL of this version of the document
187+
- `latestVersion`: URL of the latest version of the document
188+
- `previousVersion`: URL of the previous version of the document (the last one, if multiple are shown)
189+
- `editorsDraft`: URL of the latest editor's draft
190+
- `delivererIDs`: ID(s) of the deliverer(s); an `Array` of `Number`s
191+
- `editorIDs`: ID(s) of the editor(s) responsible for the document; an `Array` of `Number`s
192+
- `informative`: Whether the document in informative or not
193+
- `process`: The process rules link
194+
- `sameWorkAs`: The previous shortlink if any
195+
- `implementationFeedbackDue`: The implementation review date for CRs
196+
- `prReviewsDue`: The review date for PRs
197+
- `implementationReport`: Implementation report link for CRs, PRs and RECs
198+
- `errata`: The errata link of the document
199199
- `substantiveChanges`: Whether the document is a REC and has proposed amendments
200200
- `newFeatures`: Whether the document is a REC and has proposed additions
201+
- `sotd`: The section "Status of this Document"
202+
- `abstract`: The abstract of the document
201203

202204
If some of these pieces of metadata cannot be deduced, that key will not exist, or its value will not be defined.
203205

@@ -259,6 +261,9 @@ curl "https://www.w3.org/pubrules/api/metadata?url=https://example.com/doc.html"
259261

260262
# POST
261263
curl "https://www.w3.org/pubrules/api/metadata" -F "file=@/tmp/foo.html"
264+
265+
# GET with additional metadata
266+
curl "https://www.w3.org/pubrules/api/metadata?url=https://example.com/doc.html&additionalMetadata=true"
262267
```
263268

264269
Metadata is a bunch of data extracted from the document. It includes the type (profile) of the document, publish date, editors' names, Patent Policy version the document is under, etc...

lib/api.js

+1
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ const processRequest = async (req, res, params) => {
200200
);
201201
});
202202
options.events = handler;
203+
options.additionalMetadata = req.query.additionalMetadata === 'true';
203204
if (validate) v.validate(options);
204205
else v.extractMetadata(options);
205206
}

lib/l10n-en_GB.js

+2
Original file line numberDiff line numberDiff line change
@@ -380,4 +380,6 @@ export const messages = {
380380
'metadata.errata': false,
381381
'metadata.patent-policy': false,
382382
'metadata.charters': false,
383+
'metadata.sotd': false,
384+
'metadata.abstract': false,
383385
};

lib/profiles/additionalMetadata.js

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
/**
2+
* Pseudo-profile for additional metadata extraction.
3+
*/
4+
5+
import { rules as baseRules } from './metadata.js';
6+
import { insertAfter } from './profileUtil.js';
7+
8+
import * as abstract from '../rules/metadata/abstract.js';
9+
import * as sotd from '../rules/metadata/sotd.js';
10+
11+
export const name = 'AdditionalMetadata';
12+
13+
export const rules = insertAfter(baseRules, 'metadata.errata', [
14+
abstract,
15+
sotd,
16+
]);

lib/rules/metadata/abstract.js

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/**
2+
* Pseudo-rule for metadata extraction: abstract.
3+
*/
4+
5+
export const name = 'metadata.abstract';
6+
7+
/**
8+
* @param sr
9+
* @param done
10+
*/
11+
export function check(sr, done) {
12+
let abstractTitle;
13+
Array.prototype.some.call(sr.jsDocument.querySelectorAll('h2'), h2 => {
14+
if (sr.norm(h2.textContent).toLowerCase() === 'abstract') {
15+
abstractTitle = h2;
16+
return true;
17+
}
18+
});
19+
20+
if (abstractTitle) {
21+
const div = sr.jsDocument.createElement('div');
22+
[...abstractTitle.parentElement.children].forEach(child => {
23+
{
24+
if (child !== abstractTitle) {
25+
div.appendChild(child.cloneNode(true));
26+
}
27+
}
28+
});
29+
return done({ abstract: sr.norm(div.innerHTML) });
30+
} else {
31+
return done({ abstract: 'Not found' });
32+
}
33+
}

lib/rules/metadata/sotd.js

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
/**
2+
* Pseudo-rule for metadata extraction: sotd.
3+
*/
4+
5+
export const name = 'metadata.sotd';
6+
7+
/**
8+
* @param sr
9+
* @param done
10+
*/
11+
export function check(sr, done) {
12+
const sotd = sr.getSotDSection();
13+
return done({ sotd: sotd ? sr.norm(sotd.innerHTML) : 'Not found' });
14+
}

lib/util.js

+2-1
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,8 @@ const buildProcessParamsFunction = function () {
121121
p === 'informativeOnly' ||
122122
p === 'echidnaReady' ||
123123
p === 'events' ||
124-
p === 'editorial'
124+
p === 'editorial' ||
125+
p === 'additionalMetadata'
125126
) {
126127
// Other params:
127128
if (Object.prototype.hasOwnProperty.call(result, p))

lib/validator.js

+7-4
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import w3cApi from 'node-w3capi';
1111
import { Exceptions } from './exceptions.js';
1212
import { assembleData, setLanguage } from './l10n.js';
1313
import * as profileMetadata from './profiles/metadata.js';
14+
import * as profileAdditionalMetadata from './profiles/additionalMetadata.js';
1415
import { get } from './throttled-ua.js';
1516
import {
1617
AB,
@@ -22,7 +23,6 @@ import {
2223
TAG,
2324
} from './util.js';
2425

25-
const { rules } = profileMetadata;
2626
const { version } = importJSON('../package.json', import.meta.url);
2727

2828
setLanguage('en_GB');
@@ -80,10 +80,13 @@ Specberus.prototype.extractMetadata = function (options) {
8080
const doMetadataExtraction = function (err, jsDocument) {
8181
if (err) return self.throw(err);
8282
self.jsDocument = jsDocument;
83-
self.sink.emit('start-all', profileMetadata);
84-
const total = (rules || []).length;
83+
const profile = options.additionalMetadata
84+
? profileAdditionalMetadata
85+
: profileMetadata;
86+
self.sink.emit('start-all', profile);
87+
const total = (profile.rules || []).length;
8588
let done = 0;
86-
rules.forEach(rule => {
89+
profile.rules.forEach(rule => {
8790
try {
8891
rule.check(
8992
self,

0 commit comments

Comments
 (0)