Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions envs/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,3 @@ KEYCLOAK_ISSUER_BASE_URL=keycloak-base-url
KEYCLOAK_SERVICE_CLIENT_ID=client-id-goes-here
KEYCLOAK_SERVICE_CLIENT_SECRET=client-secret-goes-here

# Transformer specific
FILTERS='["filterByFileType", "filterByIsbnIdentifier", "filterByIssuedYear", "filterByMaterialType"]'
#FILTER_ISBN_REVERSE: 'false'
FILTER_YEAR_NOT_BEFORE='2000'
#FILTER_YEAR_NOT_AFTER: '2016'

3 changes: 0 additions & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@
"lint": "eslint ./src",
"lint:dev": "eslint ./src --fix",
"test": "npm run lint && npm run test:base",
"test:base": "cross-env NODE_ENV=test node --test --test-force-exit --experimental-test-coverage --test-reporter=spec './src/*.test.js' './src/**/*.test.js'",
"test:only": "cross-env NODE_ENV=test node --test --test-only --test-force-exit --experimental-test-coverage --test-reporter=spec './src/**/*.test.js'",
"test:base": "cross-env NODE_ENV=test node --test --test-force-exit --experimental-test-coverage --test-reporter=spec './src/**/*.test.js'",
"test:kb": "NODE_ENV=test node --test --test-force-exit",
"watch:test": "cross-env DEBUG=@natlibfi/* NODE_ENV=test node --watch --test --experimental-test-coverage --test-reporter=spec './src/*.test.js' './src/**/*.test.js'",
"watch:test": "cross-env DEBUG=@natlibfi/* NODE_ENV=test node --watch --test --experimental-test-coverage --test-reporter=spec './src/**/*.test.js'",
"dev": "npm run watch:test",
"dev:kb": "NODE_ENV=test node --test --watch",
"dev:debug": "cross-env LOG_LEVEL=debug DEBUG=@natlibfi/* NODE_ENV=test npm run watch:test"
Expand Down Expand Up @@ -63,4 +64,4 @@
"nanoid": "^3.3.8",
"isbn3": "2.0.0"
}
}
}
11 changes: 0 additions & 11 deletions src/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,3 @@ export const abortOnInvalidRecords = readEnvironmentVariable('ABORT_ON_INVALID_R
export const readFrom = readEnvironmentVariable('READ_FROM', {defaultValue: 'blobContent'});
export const nextQueueStatus = readEnvironmentVariable('NEXT_QUEUE_STATUS', {defaultValue: 'TRANSFORMED'});

// Filter configuration: what filters to apply and with what type of configuration
export const applyFilters = readEnvironmentVariable('FILTERS', {defaultValue: [], format: v => JSON.parse(v)});
export const filterConfig = {
filterByIsbnIdentifier: {
reverse: readEnvironmentVariable('FILTER_ISBN_REVERSE', {defaultValue: false, format: v => parseBoolean(v)})
},
filterByIssuedYear: {
filterYearNotBefore: readEnvironmentVariable('FILTER_YEAR_NOT_BEFORE', {defaultValue: 0, format: v => Number(v)}), // NB: 0 -> not applied
filterYearNotAfter: readEnvironmentVariable('FILTER_YEAR_NOT_AFTER', {defaultValue: 0, format: v => Number(v)}) // NB: 0 -> not applied
}
};
95 changes: 93 additions & 2 deletions src/constants.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,117 @@

const productionSources = {
'julkaisut.valtioneuvosto.fi': {
'filters': [
{'type': 'isbn', 'active': true, 'reverse': false},
{'type': 'issuedYear', 'active': true, 'notBeforeYear': '2016', 'notAfterYear': false},
{'type': 'fileType', 'active': true},
{'type': 'materialType', 'active': true}
],
'fSID': {'10024': 'valto', '11111': 'valt2'},
'f884': 'MELINDA_RECORD_IMPORT_REPO:VALTO'
},
'www.julkari.fi': {
'filters': [
{'type': 'isbn', 'active': true, 'reverse': false},
{'type': 'issuedYear', 'active': true, 'notBeforeYear': '2016', 'notAfterYear': false},
{'type': 'fileType', 'active': true},
{'type': 'materialType', 'active': true}
],
'fSID': {'10024': 'julkr', '11111': 'julk2'},
'f884': 'MELINDA_RECORD_IMPORT_REPO:JULKARI'
},
'lutpub.lut.fi': {
'filters': [
{'type': 'isbn', 'active': true, 'reverse': false},
{'type': 'issuedYear', 'active': true, 'notBeforeYear': false, 'notAfterYear': false},
{'type': 'fileType', 'active': true},
{'type': 'materialType', 'active': true}
],
'fSID': {'10024': 'lutpb', '11111': 'lutp2'},
'f884': 'MELINDA_RECORD_IMPORT_REPO:LUTPUB'
},
'jukuri.luke.fi': {
'filters': [
{'type': 'isbn', 'active': true, 'reverse': false},
{'type': 'issuedYear', 'active': true, 'notBeforeYear': '2016', 'notAfterYear': false},
{'type': 'fileType', 'active': true},
{'type': 'materialType', 'active': true}
],
'fSID': {'10024': 'jukur', '11111': 'juku2'},
'f884': 'MELINDA_RECORD_IMPORT_REPO:JUKURI'
}
};

const testSources = {
'foobar.example.com': process.env.NODE_ENV === 'test' ? {'fSID': {'10024': 'fooba', '11111': 'foob2'}, 'f884': 'MELINDA_RECORD_IMPORT_REPO:FOOBAR'} : null,
'foobar.example2.dev': process.env.NODE_ENV === 'test' ? {'fSID': {'10024': 'fooba', '11111': 'foob2'}, 'f884': 'MELINDA_RECORD_IMPORT_REPO:FOOBAR'} : null,
'foobar.isbn.example.com': process.env.NODE_ENV === 'test' ? {
'filters': [
{'type': 'isbn', 'active': true, 'reverse': false}
],
'fSID': {'10024': 'fooba', '11111': 'foob2'},
'f884': 'MELINDA_RECORD_IMPORT_REPO:FOOBAR'
} : null,
'foobar.isbn2.example.com': process.env.NODE_ENV === 'test' ? {
'filters': [
{'type': 'isbn', 'active': true, 'reverse': true}
],
'fSID': {'10024': 'fooba', '11111': 'foob2'},
'f884': 'MELINDA_RECORD_IMPORT_REPO:FOOBAR'
} : null,
'foobar.fileType.example.com': process.env.NODE_ENV === 'test' ? {
'filters': [
{'type': 'fileType', 'active': true}
],
'fSID': {'10024': 'fooba', '11111': 'foob2'},
'f884': 'MELINDA_RECORD_IMPORT_REPO:FOOBAR'
} : null,
'foobar.issuedYear.example.com': process.env.NODE_ENV === 'test' ? {
'filters': [
{'type': 'issuedYear', 'active': true, 'notBeforeYear': '2024', 'notAfterYear': false}
],
'fSID': {'10024': 'fooba', '11111': 'foob2'},
'f884': 'MELINDA_RECORD_IMPORT_REPO:FOOBAR'
} : null,
'foobar.issuedYear2.example.com': process.env.NODE_ENV === 'test' ? {
'filters': [
{'type': 'issuedYear', 'active': true, 'notBeforeYear': false, 'notAfterYear': '2020'}
],
'fSID': {'10024': 'fooba', '11111': 'foob2'},
'f884': 'MELINDA_RECORD_IMPORT_REPO:FOOBAR'
} : null,
'foobar.issuedYear3.example.com': process.env.NODE_ENV === 'test' ? {
'filters': [
{'type': 'issuedYear', 'active': true, 'notBeforeYear': '2010', 'notAfterYear': '2020'}
],
'fSID': {'10024': 'fooba', '11111': 'foob2'},
'f884': 'MELINDA_RECORD_IMPORT_REPO:FOOBAR'
} : null,
'foobar.materialType.example.com': process.env.NODE_ENV === 'test' ? {
'filters': [
{'type': 'materialType', 'active': true}
],
'fSID': {'10024': 'fooba', '11111': 'foob2'},
'f884': 'MELINDA_RECORD_IMPORT_REPO:FOOBAR'
} : null,
'foobar.example.com': process.env.NODE_ENV === 'test' ? {
'filters': [
{'type': 'isbn', 'active': true, 'reverse': false},
{'type': 'issuedYear', 'active': true, 'notBeforeYear': '2024', 'notAfterYear': false},
{'type': 'fileType', 'active': true},
{'type': 'materialType', 'active': true}
],
'fSID': {'10024': 'fooba', '11111': 'foob2'},
'f884': 'MELINDA_RECORD_IMPORT_REPO:FOOBAR'
} : null,
'foobar.example2.dev.example.com': process.env.NODE_ENV === 'test' ? {
'filters': [
{'type': 'isbn', 'active': false, 'reverse': false},
{'type': 'issuedYear', 'active': true, 'notBeforeYear': false, 'notAfterYear': false},
{'type': 'fileType', 'active': false},
{'type': 'materialType', 'active': false}
],
'fSID': {'10024': 'fooba', '11111': 'foob2'},
'f884': 'MELINDA_RECORD_IMPORT_REPO:FOOBAR'
} : null
};

export const sourceConfig = {
Expand Down
15 changes: 10 additions & 5 deletions src/transform/filter/filterByFileType.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,18 @@ import ConversionError from '../convert/conversionError.js';

/**
* Filter filtering items that do not have filetype information included to their metadata.
* @param {{ active?: boolean; }} param0
* @param {boolean} [param0.active=true] Is filter active
* @returns Object containing filter and its name
*/
export function filterByFileType() {
return {
filter,
name: 'filterByFileType'
};
export function filterByFileType({active = true}) {
if (active) {
return {
filter,
name: 'filterByFileType'
};
}
return false;

function filter(record, debugInfo = {}) {
const filetypeInformation = getAllValuesInContext(record, 'kk:file');
Expand Down
29 changes: 20 additions & 9 deletions src/transform/filter/filterByIsbnIdentifier.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,36 @@ import ConversionError from '../convert/conversionError.js';

/**
* Filter filtering items without isbn identifiers.
* @param {{ active?: boolean; reverse?: boolean;}} param0
* @param {boolean} [param0.active=true] Is filter active
* @param {boolean} [param0.reverse=false] Reverse operation of filter (Pass only recods whitout ISBN)
* @returns Object containing filter and its name
*/
export function filterByIsbnIdentifier(filterConfig) {
const reversed = filterConfig?.filterByIsbnIdentifier?.reverse;

return {
filter,
name: 'filterByIsbnIdentifier'
};
export function filterByIsbnIdentifier({active = true, reverse = false}) {
Comment thread
natlibfi-jonollil marked this conversation as resolved.
if (active) {
return {
filter,
name: 'filterByIsbnIdentifier'
};
}
return false;

function filter({getFieldValues}, debugInfo = {}) {
// <kk:field schema="dc" element="identifier" qualifier="isbn" language="none" value="978-xxx-xxx-xxx-8" />
const isbnIdentifier = getFieldValues('dc.identifier.isbn') || [];
// <kk:field schema="dc" element="identifier" qualifier="urn" language="en" value="URN:978-xxx-xxx-xxx-8" />
//const isbnUrnIdentifier = getFieldValues('dc.identifier.urn').filter(field => field.match(/URN:978-/ui) || field.match(/URN:ISBN:978-/ui)) || [];
//const hasIdentifier = isbnIdentifier.length !== 0 || isbnUrnIdentifier.length !== 0;
const hasIdentifier = isbnIdentifier.length !== 0;


const {identifiers, title} = debugInfo;

if (reversed && isbnIdentifier.length !== 0) {
if (reverse && hasIdentifier) {
throw new ConversionError({identifiers, title}, 'Filter: Could find ISBN identifier which not allowed by the applied filter');
}

if (!reversed && isbnIdentifier.length === 0) {
if (!reverse && !hasIdentifier) {
throw new ConversionError({identifiers, title}, 'Filter: Cannot find ISBN identifier which is a required field by the applied filter');
}
}
Expand Down
31 changes: 17 additions & 14 deletions src/transform/filter/filterByIssuedYear.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,24 @@ import ConversionError from '../convert/conversionError.js';

/**
* Filter filtering items based on issued year.
* @param filterConfig Filter configuration
* @param {{ active?: boolean; notBeforeYear?: boolean|string; notAfterYear?: boolean|string;}} param0
* @param {boolean} [param0.active=true] Is filter active
* @param {boolean|string} [param0.notBeforeYear=false] Year limiter to pass records after specified year
* @param {boolean|string} [param0.notAfterYear=false] Year limiter to pass records before specified year
* @returns Object containing filter and its name
*/
export function filterByIssuedYear(filterConfig) {
const filterYearNotAfter = filterConfig?.filterByIssuedYear?.filterYearNotAfter;
const filterYearNotBefore = filterConfig?.filterByIssuedYear?.filterYearNotBefore;

return {
filter,
name: 'filterByIssuedYear'
};
export function filterByIssuedYear({active = false, notBeforeYear = false, notAfterYear = false}) {
if (active) {
return {
filter,
name: 'filterByIssuedYear'
};
}
return false;

function filter({getFieldValues}, debugInfo = {}) {
const filterBeforeMissing = !filterYearNotBefore || isNaN(filterYearNotBefore);
const filterAfterMissing = !filterYearNotAfter || isNaN(filterYearNotAfter);
const filterBeforeMissing = !notBeforeYear || isNaN(notBeforeYear);
const filterAfterMissing = !notAfterYear || isNaN(notAfterYear);
const filterConfigMissing = filterBeforeMissing && filterAfterMissing;

// NB: default value of zero is falsy
Expand All @@ -36,11 +39,11 @@ export function filterByIssuedYear(filterConfig) {
const useFilterBefore = !filterBeforeMissing;
const useFilterAfter = !filterAfterMissing;

const tooEarly = useFilterBefore && issuedYears.some(v => Number(v) < filterYearNotBefore);
const tooLate = useFilterAfter && issuedYears.some(v => Number(v) > filterYearNotAfter);
const tooEarly = useFilterBefore && issuedYears.some(v => Number(v) < notBeforeYear);
const tooLate = useFilterAfter && issuedYears.some(v => Number(v) > notAfterYear);

if (tooEarly || tooLate) {
throw new ConversionError({identifiers, title}, `Filter: Date issued information (${JSON.stringify(issuedYears)}) matches filter configuration (${filterYearNotBefore} < X < ${filterYearNotAfter})`);
throw new ConversionError({identifiers, title}, `Filter: Date issued information (${JSON.stringify(issuedYears)}) matches filter configuration (${notBeforeYear} < X < ${notAfterYear})`);
}

return;
Expand Down
15 changes: 10 additions & 5 deletions src/transform/filter/filterByMaterialType.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,18 @@ import ConversionError from '../convert/conversionError.js';

/**
* Filter filtering items based on material type (dc.type.okm).
* @param {{ active?: boolean; }} param0
* @param {boolean} [param0.active=true] Is filter active
* @returns Object containing filter and its name
*/
export function filterByMaterialType() {
return {
filter,
name: 'filterByMaterialType'
};
export function filterByMaterialType({active = true}) {
if (active) {
return {
filter,
name: 'filterByMaterialType'
};
}
return false;

function filter({getFieldValues}, debugInfo = {}) {
const materialTypes = getFieldValues('dc.type.okm');
Expand Down
16 changes: 11 additions & 5 deletions src/transform/filter/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,16 @@ import {filterByMaterialType} from './filterByMaterialType.js';
* @param {object} filterConfig configuration object containing filter specific configurations
* @return object containing fieldValueInterface (for interacting with record) and commonErrorPayload to use for producing debugging information
*/
export default (harvestSource, record, applyFilters = [], filterConfig = {}) => {
export default (harvestSource, record, filterConfigs = []) => {
const debug = createDebugLogger('@natlibfi/melinda-record-import/transformer-dc:filter');
debug('Staring to define and apply filter configuration');

const isbnFilterConf = filterConfigs.find(filter => filter.type === 'isbn') ?? {active: false};
const issuedYearConf = filterConfigs.find(filter => filter.type === 'issuedYear') ?? {active: false};
const fileTypeConf = filterConfigs.find(filter => filter.type === 'fileType') ?? {active: false};
const materialTypeConf = filterConfigs.find(filter => filter.type === 'materialType') ?? {active: false};
debug(`Configured filters: isbn ${isbnFilterConf.active}, issued year ${issuedYearConf.active}, file type ${fileTypeConf.active}, material type ${materialTypeConf.active}`);

const inputFields = getInputFields(record);
const fieldValueInterface = createValueInterface(inputFields);
const {getFieldValues} = fieldValueInterface;
Expand All @@ -42,14 +48,14 @@ export default (harvestSource, record, applyFilters = [], filterConfig = {}) =>

// Only some filters require config during initialization
const availableFilters = {
raw: [filterByFileType()],
interface: [filterByMaterialType(), filterByIsbnIdentifier(filterConfig), filterByIssuedYear(filterConfig)]
raw: [filterByFileType(fileTypeConf)],
interface: [filterByMaterialType(materialTypeConf), filterByIsbnIdentifier(isbnFilterConf), filterByIssuedYear(issuedYearConf)]
};

// Use only filters that are defined in config
const selectedFilters = {
raw: availableFilters.raw.filter(f => applyFilters.includes(f.name)),
interface: availableFilters.interface.filter(f => applyFilters.includes(f.name))
raw: availableFilters.raw.filter(f => f),
interface: availableFilters.interface.filter(f => f)
};

const selectedFiltersNames = Object.keys(selectedFilters).map(filterType => {
Expand Down
4 changes: 2 additions & 2 deletions src/transform/filter/index.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ generateTests({
}
});

function callback({getFixture, filter, filterConfig = {}}) {
function callback({getFixture}) {
const momentMock = () => moment('2020-01-01T00:00:00');
const results = [];

const inputData = getFixture({components: ['input.xml'], reader: READERS.STREAM});
const expectedResult = getFixture({components: ['output.json'], reader: READERS.JSON});

const transform = createTransformer({applyFilters: [filter], filterConfig, moment: momentMock});
const transform = createTransformer({moment: momentMock});

return new Promise((resolve, reject) => {
transform(inputData)
Expand Down
Loading