From 6ec67c6e3b0208698e3b04f0664086423d59ee81 Mon Sep 17 00:00:00 2001 From: Kalin Chernev Date: Wed, 8 May 2019 16:39:11 +0300 Subject: [PATCH 01/10] feat(etls): add interreg 2014UK16RFOP001 - EUBFR-258 --- .../etl/2014uk16rfop001/xls/babel.config.js | 29 ++ .../etl/2014uk16rfop001/xls/package.json | 32 +++ .../etl/2014uk16rfop001/xls/serverless.yml | 123 ++++++++ .../xls/src/events/onParseXLS.js | 152 ++++++++++ .../xls/src/lib/getFundingType.js | 37 +++ .../2014uk16rfop001/xls/src/lib/getRecords.js | 43 +++ .../xls/src/lib/improveObjectKeys.js | 16 ++ .../xls/src/lib/transform/ESF/README.md | 20 ++ .../xls/src/lib/transform/ESF/transform.js | 268 ++++++++++++++++++ .../xls/src/lib/transform/ESIF/README.md | 9 + .../xls/src/lib/transform/ESIF/transform.js | 206 ++++++++++++++ .../xls/src/lib/transform/getTransform.js | 23 ++ .../xls/test/stubs/ESF/record.json | 14 + .../xls/test/stubs/ESIF/record.json | 1 + .../xls/test/unit/events/onParseXLS.spec.js | 20 ++ .../lib/__snapshots__/transform.spec.js.snap | 89 ++++++ .../xls/test/unit/lib/transform.spec.js | 28 ++ .../etl/2014uk16rfop001/xls/webpack.config.js | 32 +++ 18 files changed, 1142 insertions(+) create mode 100644 services/ingestion/etl/2014uk16rfop001/xls/babel.config.js create mode 100644 services/ingestion/etl/2014uk16rfop001/xls/package.json create mode 100644 services/ingestion/etl/2014uk16rfop001/xls/serverless.yml create mode 100644 services/ingestion/etl/2014uk16rfop001/xls/src/events/onParseXLS.js create mode 100644 services/ingestion/etl/2014uk16rfop001/xls/src/lib/getFundingType.js create mode 100644 services/ingestion/etl/2014uk16rfop001/xls/src/lib/getRecords.js create mode 100644 services/ingestion/etl/2014uk16rfop001/xls/src/lib/improveObjectKeys.js create mode 100644 services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESF/README.md create mode 100644 services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESF/transform.js create mode 100644 services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/README.md create mode 100644 services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/transform.js create mode 100644 services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/getTransform.js create mode 100644 services/ingestion/etl/2014uk16rfop001/xls/test/stubs/ESF/record.json create mode 100644 services/ingestion/etl/2014uk16rfop001/xls/test/stubs/ESIF/record.json create mode 100644 services/ingestion/etl/2014uk16rfop001/xls/test/unit/events/onParseXLS.spec.js create mode 100644 services/ingestion/etl/2014uk16rfop001/xls/test/unit/lib/__snapshots__/transform.spec.js.snap create mode 100644 services/ingestion/etl/2014uk16rfop001/xls/test/unit/lib/transform.spec.js create mode 100644 services/ingestion/etl/2014uk16rfop001/xls/webpack.config.js diff --git a/services/ingestion/etl/2014uk16rfop001/xls/babel.config.js b/services/ingestion/etl/2014uk16rfop001/xls/babel.config.js new file mode 100644 index 000000000..0397ff2b1 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/xls/babel.config.js @@ -0,0 +1,29 @@ +module.exports = { + presets: [ + '@babel/preset-flow', + [ + '@babel/preset-env', + { + targets: { + node: '8.10', + }, + modules: false, + loose: true, + }, + ], + ], + env: { + test: { + presets: [ + [ + '@babel/preset-env', + { + targets: { + node: '8.10', + }, + }, + ], + ], + }, + }, +}; diff --git a/services/ingestion/etl/2014uk16rfop001/xls/package.json b/services/ingestion/etl/2014uk16rfop001/xls/package.json new file mode 100644 index 000000000..8eb542637 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/xls/package.json @@ -0,0 +1,32 @@ +{ + "private": true, + "name": "@eubfr/ingestion-etl-2014uk16rfop001-xls", + "version": "0.6.0", + "scripts": { + "deploy": "sls deploy -v", + "test:unit": "jest --testPathPattern=unit" + }, + "dependencies": { + "@eubfr/lib": "^0.6.0", + "@eubfr/logger-messenger": "^0.6.0", + "xlsx": "0.14.2" + }, + "devDependencies": { + "@babel/core": "7.4.3", + "@babel/preset-env": "7.4.3", + "@babel/preset-flow": "7.0.0", + "@eubfr/types": "^0.6.0", + "aws-sdk": "2.434.0", + "babel-jest": "24.7.0", + "babel-loader": "8.0.5", + "jest": "24.7.0", + "serverless": "1.40.0", + "serverless-webpack": "5.2.0", + "webpack": "4.29.6" + }, + "jest": { + "transform": { + "^.+\\.js$": "babel-jest" + } + } +} diff --git a/services/ingestion/etl/2014uk16rfop001/xls/serverless.yml b/services/ingestion/etl/2014uk16rfop001/xls/serverless.yml new file mode 100644 index 000000000..cf8a981e1 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/xls/serverless.yml @@ -0,0 +1,123 @@ +service: ingestion-etl-2014uk16rfop001-xls + +plugins: + - serverless-webpack + +custom: + webpack: + webpackConfig: ./webpack.config.js + includeModules: + forceExclude: + - aws-sdk + packager: yarn + eubfrEnvironment: ${opt:eubfr_env, file(../../../../../config.json):eubfr_env, env:EUBFR_ENV, 'dev'} + bucketName: ${file(../../../../../resources/harmonized-storage/serverless.yml):custom.bucketName} + +package: + individually: true + +provider: + name: aws + runtime: nodejs8.10 + timeout: 60 + stage: ${opt:stage, file(../../../../../config.json):stage, env:EUBFR_STAGE, 'dev'} + region: ${opt:region, file(../../../../../config.json):region, env:EUBFR_AWS_REGION, 'eu-central-1'} + deploymentBucket: + name: eubfr-${self:custom.eubfrEnvironment}-deploy + stackTags: + ENV: ${self:custom.eubfrEnvironment} + iamRoleStatements: + - Effect: 'Allow' + Action: + - 's3:PutObject' + Resource: + Fn::Join: + - '' + - - 'arn:aws:s3:::' + - ${self:custom.bucketName} + - '/*' + # Allow queueing messages to the DLQ https://docs.aws.amazon.com/lambda/latest/dg/dlq.html + - Effect: 'Allow' + Action: + - sqs:SendMessage + Resource: '*' + +functions: + parseXls: + handler: src/events/onParseXLS.handler + name: ${self:provider.stage}-${self:service}-parseXls + memorySize: 1024 + environment: + BUCKET: ${self:custom.bucketName} + REGION: ${self:provider.region} + STAGE: ${self:provider.stage} + events: + - sns: + arn: + Fn::Join: + - '' + - - 'arn:aws:sns:' + - Ref: 'AWS::Region' + - ':' + - Ref: 'AWS::AccountId' + - ':${self:provider.stage}-etl-2014uk16rfop001-xls' + topicName: ${self:provider.stage}-etl-2014uk16rfop001-xls + - sns: + arn: + Fn::Join: + - '' + - - 'arn:aws:sns:' + - Ref: 'AWS::Region' + - ':' + - Ref: 'AWS::AccountId' + - ':${self:provider.stage}-etl-2014uk16rfop001-xlsx' + topicName: ${self:provider.stage}-etl-2014uk16rfop001-xlsx + +resources: + Resources: + ParseXlsLambdaFunction: + Type: 'AWS::Lambda::Function' + Properties: + DeadLetterConfig: + TargetArn: + Fn::ImportValue: ${self:provider.stage}:ingestion-dead-letter-queue:LambdaFailureQueue + SNSTopic2014uk16rfop001XLS: + Type: AWS::SNS::Topic + Properties: + TopicName: ${self:provider.stage}-etl-2014uk16rfop001-xls + DisplayName: 2014uk16rfop001 XLS ETL + SNSTopic2014uk16rfop001XLSX: + Type: AWS::SNS::Topic + Properties: + TopicName: ${self:provider.stage}-etl-2014uk16rfop001-xlsx + DisplayName: 2014uk16rfop001 XLSX ETL + SNSTopic2014uk16rfop001XLSPolicy: + Type: AWS::SNS::TopicPolicy + Properties: + PolicyDocument: + Version: '2012-10-17' + Statement: + - Sid: Allow-IngestionManager-Publish + Action: + - sns:Publish + Effect: Allow + Resource: + Fn::Join: + - '' + - - 'arn:aws:sns:' + - Ref: 'AWS::Region' + - ':' + - Ref: 'AWS::AccountId' + - ':${self:provider.stage}-etl-2014uk16rfop001-*' + Principal: + AWS: + Fn::Join: + - '' + - - 'arn:aws:sts::' + - Ref: 'AWS::AccountId' + - ':assumed-role/ingestion-manager-${self:provider.stage}-' + - Ref: 'AWS::Region' + - '-lambdaRole/${self:provider.stage}-ingestion-manager-onObjectCreated' + Topics: + - Ref: SNSTopic2014uk16rfop001XLS + - Ref: SNSTopic2014uk16rfop001XLSX diff --git a/services/ingestion/etl/2014uk16rfop001/xls/src/events/onParseXLS.js b/services/ingestion/etl/2014uk16rfop001/xls/src/events/onParseXLS.js new file mode 100644 index 000000000..64b5bd6c3 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/xls/src/events/onParseXLS.js @@ -0,0 +1,152 @@ +import AWS from 'aws-sdk'; // eslint-disable-line import/no-extraneous-dependencies +import XLSX from 'xlsx'; + +// ETL utilities. +import ensureExtensions from '@eubfr/lib/etl/ensureExtensions'; +import extractMessage from '@eubfr/lib/etl/extractMessage'; +import handleError from '@eubfr/lib/etl/handleError'; + +import MessengerFactory from '@eubfr/logger-messenger/src/lib/MessengerFactory'; +import { STATUS } from '@eubfr/logger-messenger/src/lib/status'; + +import getFundingType from '../lib/getFundingType'; +import getRecords from '../lib/getRecords'; +import getTransform from '../lib/transform/getTransform'; + +export const handler = async (event, context) => { + const { BUCKET, REGION, STAGE } = process.env; + + if (!BUCKET || !REGION || !STAGE) { + throw new Error( + 'BUCKET, REGION and STAGE environment variables are required!' + ); + } + + try { + const snsMessage = extractMessage(event); + const { key } = snsMessage.object; + + if (!ensureExtensions({ file: key, extensions: ['.xls', '.xlsx'] })) { + throw new Error('XLS or XLSX file expected for this ETL.'); + } + + const messenger = MessengerFactory.Create({ context }); + const s3 = new AWS.S3(); + + await messenger.send({ + message: { + computed_key: key, + status_message: 'Start parsing XLS...', + status_code: STATUS.PARSING, + }, + to: ['logs'], + }); + + // Get file + const readStream = s3 + .getObject({ Bucket: snsMessage.bucket.name, Key: key }) + .createReadStream(); + + return new Promise((resolve, reject) => { + // Put data in buffer + const buffers = []; + + readStream.on('data', data => { + buffers.push(data); + }); + + readStream.on('error', async e => + handleError( + { messenger, key, statusCode: STATUS.ERROR }, + { error: e, callback: reject } + ) + ); + + // Manage data + readStream.on('end', async () => { + let dataString = ''; + + // Parse file + const buffer = Buffer.concat(buffers); + const workbook = XLSX.read(buffer, { + cellText: false, + cellDates: true, + }); + const sheetNameList = workbook.SheetNames; + // Take into account only first sheet. + const sheet = workbook.Sheets[sheetNameList[0]]; + const rows = XLSX.utils.sheet_to_json(sheet); + + // The incoming XLS file could contain different types of information depending on funding type. + const type = getFundingType(rows); + + if (!type) { + const error = + 'Provided file does not contain a valid structure for giving information about ESF or ESIF types of funding!'; + + await handleError( + { messenger, key, statusCode: STATUS.ERROR }, + { + error, + callback: reject, + } + ); + + throw error; + } + + // Try to get the right transform corresponding function for this funding type. + const transform = getTransform(type); + + if (!transform) { + const error = `Couldn't find a transform function corresponding to ${type}`; + + await handleError( + { messenger, key, statusCode: STATUS.ERROR }, + { + error, + callback: reject, + } + ); + + throw error; + } + + // At this point, we have ensured that we can handle the incoming XLS file. + // So it's worth preparing the data for the transform function. + const records = getRecords({ rows, type }); + + records.forEach(record => { + const data = transform(record); + dataString += `${JSON.stringify(data)}\n`; + }); + + // Load data + const params = { + Bucket: BUCKET, + Key: `${key}.ndjson`, + Body: dataString, + ContentType: 'application/x-ndjson', + }; + + await s3.upload(params).promise(); + + await messenger.send({ + message: { + computed_key: key, + status_message: + 'XLS parsed successfully. Results will be uploaded to ElasticSearch soon...', + status_code: STATUS.PARSED, + }, + to: ['logs'], + }); + + return resolve('XLS parsed successfully'); + }); + }); + } catch (e) { + throw e; + } +}; + +export default handler; diff --git a/services/ingestion/etl/2014uk16rfop001/xls/src/lib/getFundingType.js b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/getFundingType.js new file mode 100644 index 000000000..6a27551d7 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/getFundingType.js @@ -0,0 +1,37 @@ +import improveObjectKeys from './improveObjectKeys'; + +/** + * Because the contents of the incoming XLS file can vary, this utility will help figuring out which transform function to use. + * Only the first 2 items of the `sheet` array are used, as always they contain background information or leading header row which suffice to know the contents of the whole XLS file. + * + * @param {Array} sheet Contains result of XLSX.utils.sheet_to_json(sheet) + * @returns {String} The type of incoming XLS file. Could be an empty, 'ESF' or 'ESIF' + */ +const getFundingType = sheet => { + let type = ''; + const first = improveObjectKeys(sheet[0]); + const second = improveObjectKeys(sheet[1]); + + if ( + first['LIST OF OPERATIONSWYKAZ OPERACJI'] && + first['LIST OF OPERATIONSWYKAZ OPERACJI'] === 'Beneficiary Name' && + second['LIST OF OPERATIONSWYKAZ OPERACJI'] && + second['LIST OF OPERATIONSWYKAZ OPERACJI'] === 'Nazwa Odbiorcy' + ) { + type = 'ESF'; + } + + if ( + first.__EMPTY_1 && + first.__EMPTY_1 === + 'EUROPEAN STRUCTURAL AND INVESTMENT FUNDS LIST OF OPERATIONS 2014 TO 2020' && + second.__EMPTY && + second.__EMPTY === 'Last updated January 2019' + ) { + type = 'ESIF'; + } + + return type; +}; + +export default getFundingType; diff --git a/services/ingestion/etl/2014uk16rfop001/xls/src/lib/getRecords.js b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/getRecords.js new file mode 100644 index 000000000..51c626c52 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/getRecords.js @@ -0,0 +1,43 @@ +import improveObjectKeys from './improveObjectKeys'; + +/** + * Takes raw parsed rows from XLSX.utils.sheet_to_json(sheet) and returns well-formatted records for transform functions. + * + * @param {Object} Should contain `rows` ({Array}) and `type` ({String}) + * @returns {Array} Improved list of records. + */ +const getRecords = ({ rows, type }) => { + const records = []; + + switch (type) { + case 'ESF': { + // First row is the header. + const headerRow = rows.shift(); + // Remove row with information in Polish. + rows.shift(); + + // Normalize the list by replacing properties + rows + .map(record => { + const mapped = {}; + Object.keys(record).forEach(prop => { + mapped[headerRow[prop]] = record[prop]; + }); + return mapped; + }) + .map(improveObjectKeys) + .forEach(record => records.push(record)); + + break; + } + case 'ESIF': + break; + + default: + break; + } + + return records; +}; + +export default getRecords; diff --git a/services/ingestion/etl/2014uk16rfop001/xls/src/lib/improveObjectKeys.js b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/improveObjectKeys.js new file mode 100644 index 000000000..5f4d4c25c --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/improveObjectKeys.js @@ -0,0 +1,16 @@ +const improveObjectKeys = o => { + const newObject = {}; + + Object.keys(o).forEach(key => { + const newKey = key + .trim() + .replace(/(\r\n|\n|\r)/gm, '') + .replace(/ {1,}/g, ' '); + + newObject[newKey] = o[key]; + }); + + return newObject; +}; + +module.exports = improveObjectKeys; diff --git a/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESF/README.md b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESF/README.md new file mode 100644 index 000000000..d9620f084 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESF/README.md @@ -0,0 +1,20 @@ +# 2014uk16rfop001 XLS ETL mapping rules + +Funding type: ESF (European Social Fund) + +Model to compare with is available at: https://ec-europa.github.io/eubfr-data-lake/ + +| Field | Target | +| -------------------------------------------------------------- | ----------------- | +| Beneficiary Name | third_parties | +| Operation Name | title | +| Operation Summary | description | +| Operation Start Date | timeframe.from | +| Operation End Date | timeframe.to | +| Total Eligible Expenditure Allocated to the Operation;Original | | +| Total Eligible Expenditure Allocated to the Operation;Current | budget.total_cost | +| Union co‑financing rate, as per priority axis; | budget.eu_contrib | +| Operation postcode; or other appropriate location indicator; | project_locations | +| Country | project_locations | +| Category of Intervention | themes | +| Last updated | | diff --git a/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESF/transform.js b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESF/transform.js new file mode 100644 index 000000000..18829ddba --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESF/transform.js @@ -0,0 +1,268 @@ +// @flow + +import crypto from 'crypto'; +import type { Project } from '@eubfr/types'; +import getCountryCode from '@eubfr/lib/getCountryCode'; +import sanitizeBudgetItem from '@eubfr/lib/budget/budgetFormatter'; + +/** + * Preprocess `budget`. + * + * Input fields taken from the `record` are: + * - `Total Eligible Expenditure Allocated to the Operation;Current` + * - `Union co‑financing rate, as per priority axis;` + * + * @memberof 2014uk16rfop001XlsTransform + * @param {Object} record The row received from parsed file + * @returns {Budget} + */ + +const getBudget = record => ({ + total_cost: sanitizeBudgetItem({ + value: + record['Total Eligible Expenditure Allocated to the Operation;Current'], + currency: 'GBP', + raw: + record['Total Eligible Expenditure Allocated to the Operation;Current'], + }), + eu_contrib: sanitizeBudgetItem({ + value: + record['Total Eligible Expenditure Allocated to the Operation;Current'] * + record['Union co‑financing rate, as per priority axis;'], + currency: 'GBP', + raw: record['Union co‑financing rate, as per priority axis;'], + }), + private_fund: sanitizeBudgetItem(), + public_fund: sanitizeBudgetItem(), + other_contrib: sanitizeBudgetItem(), + funding_area: [], + mmf_heading: '', +}); + +/** + * Preprocess `description`. + * + * Input fields taken from the `record` are: + * - `Operation Summary` + * + * @memberof 2014uk16rfop001XlsTransform + * @param {Object} record The row received from parsed file + * @returns {String} + */ + +const getDescription = record => record['Operation Summary'] || ''; + +/** + * Preprocess `project_id`. + * + * There are rows with overlapping information about beneficiaries and operations. + * In order to keep them separate, as they are in the ingested file, we take into account the budgetary information as well. + * + * Input fields taken from the `record` are: + * - `Operation Name` + * - `Total Eligible Expenditure Allocated to the Operation;Current` + * + * @memberof 2014uk16rfop001XlsTransform + * @param {Object} record The row received from parsed file + * @returns {String} + */ + +const getProjectId = record => { + const infoCombined = + record['Operation Name'] + + String( + record['Total Eligible Expenditure Allocated to the Operation;Current'] + ); + + return crypto + .createHash('md5') + .update(infoCombined) + .digest('hex'); +}; + +/** + * Preprocess `project_locations`. + * + * Input fields taken from the `record` are: + * - `Operation postcode; or other appropriate location indicator;` + * - `Country` + * + * @memberof 2014uk16rfop001XlsTransform + * @param {Object} record The row received from parsed file + * @returns {Array} + */ + +const getLocations = record => { + let region = ''; + const locations = []; + const country = getCountryCode(record.Country); + + const location = record[ + 'Operation postcode; or other appropriate location indicator;' + ] + ? record[ + 'Operation postcode; or other appropriate location indicator;' + ].split('LEP')[0] + : ''; + + if (location) { + const places = location.trim().split(/\s*(?:,|&)\s*/); + + // Check if several places are included, take only the first one. + if (places.length && places.length > 1) { + region = places[0].trim(); + } + // Otherwise take whatever is before the 'LEP' clarification. + else { + region = location.trim(); + } + } + + if (country) { + locations.push({ + address: '', + centroid: null, + country_code: country, + location: null, + nuts: [], + postal_code: '', + region, + town: '', + }); + } + + return locations; +}; + +/** + * Preprocess `themes`. + * + * Input fields taken from the `record` are: + * - `Category of Intervention` + * + * @memberof 2014uk16rfop001XlsTransform + * @param {Object} record The row received from parsed file + * @returns {Array} + */ + +const getThemes = record => + record['Category of Intervention'] + ? [ + record['Category of Intervention'] + .trim() + .replace(/(\r\n|\n|\r)/gm, '') + .replace(/ {1,}/g, ' '), + ] + : []; + +/** + * Preprocess `third_parties`. + * + * Input fields taken from the `record` are: + * - `Beneficiary Name` + * + * @memberof 2014uk16rfop001XlsTransform + * @param {Object} record The row received from parsed file + * @returns {Array} + */ + +const getThirdParties = record => + record['Beneficiary Name'] + ? [ + { + address: '', + country: 'GB', + email: '', + name: record['Beneficiary Name'] + ? record['Beneficiary Name'].trim() + : '', + phone: '', + region: '', + role: 'Beneficiary', + type: '', + website: '', + }, + ] + : []; + +/** + * Preprocess `timeframe`. + * + * Input fields taken from the `record` are: + * - `Operation Start Date` + * - `Operation End Date` + * + * @memberof 2014uk16rfop001XlsTransform + * @param {Object} record The row received from parsed file + * @returns {Timeframe} + */ + +const getTimeframe = record => { + const from = record['Operation Start Date'] || null; + const to = record['Operation End Date'] || null; + + return { + from, + from_precision: 'day', + to, + to_precision: 'day', + }; +}; + +/** + * Preprocess `title`. + * + * Input fields taken from the `record` are: + * - `Operation Name` + * + * @memberof 2014uk16rfop001XlsTransform + * @param {Object} record The row received from parsed file + * @returns {String} + */ + +const getTitle = record => + record['Operation Name'] ? record['Operation Name'].trim() : ''; + +/** + * Map fields for 2014uk16rfop001 producer, XLS file types + * + * Example input data: {@link https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop001/xls/test/stubs/ESF/record.json|stub} + * + * Transform function: {@link https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESF/transform.js|implementation details} + * + * @name 2014uk16rfop001XlsTransform + * @param {Object} record Piece of data to transform before going to harmonized storage. + * @returns {Project} JSON matching the type fields. + */ +export default (record: Object): Project | null => { + if (!record) return null; + + // Map the fields + return { + action: '', + budget: getBudget(record), + call_year: '', + description: getDescription(record), + ec_priorities: [], + media: [], + programme_name: '', + project_id: getProjectId(record), + project_locations: getLocations(record), + project_website: '', + complete: false, + related_links: [], + reporting_organisation: 'Member states', + results: { + available: '', + result: '', + }, + status: '', + sub_programme_name: '', + success_story: '', + themes: getThemes(record), + third_parties: getThirdParties(record), + timeframe: getTimeframe(record), + title: getTitle(record), + type: [], + }; +}; diff --git a/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/README.md b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/README.md new file mode 100644 index 000000000..558171dbf --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/README.md @@ -0,0 +1,9 @@ +# 2014uk16rfop001 XLS ETL mapping rules + +Funding type: ESIF (EUROPEAN STRUCTURAL AND INVESTMENT FUNDS) + +Model to compare with is available at: https://ec-europa.github.io/eubfr-data-lake/ + +| Field | Target | +| ----- | ------ | +| | | diff --git a/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/transform.js b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/transform.js new file mode 100644 index 000000000..331042150 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/transform.js @@ -0,0 +1,206 @@ +// @flow + +import type { Project } from '@eubfr/types'; +import sanitizeBudgetItem from '@eubfr/lib/budget/budgetFormatter'; + +/** + * Preprocess `budget`. + * + * Input fields taken from the `record` are: + * - `Total Paid Amount` + * + * @memberof 2014uk16rfop001XlsTransform + * @param {Object} record The row received from parsed file + * @returns {Budget} + */ + +const getBudget = record => ({ + total_cost: sanitizeBudgetItem({ + value: record['Total Paid Amount'], + currency: 'GBP', + raw: record['Total Paid Amount'], + }), + eu_contrib: sanitizeBudgetItem(), + private_fund: sanitizeBudgetItem(), + public_fund: sanitizeBudgetItem(), + other_contrib: sanitizeBudgetItem(), + funding_area: [], + mmf_heading: '', +}); + +/** + * Preprocess `project_id`. + * + * Input fields taken from the `record` are: + * - `Ref. No` + * + * @memberof 2014uk16rfop001XlsTransform + * @param {Object} record The row received from parsed file + * @returns {String} + */ + +const getProjectId = record => record['Ref. No'] || ''; + +/** + * Preprocess `project_locations`. + * + * @memberof 2014uk16rfop001XlsTransform + * @param {Object} record The row received from parsed file + * @returns {Array} + */ + +const getLocations = record => [ + { + address: '', + centroid: null, + country_code: 'GB', // Takes into account lib/getCountryCode.js rules directly. + location: null, + nuts: [], + postal_code: record['Project Postcode'] + ? record['Project Postcode'].trim() + : '', + region: '', + town: '', + }, +]; + +/** + * Preprocess `third_parties`. + * + * Input fields taken from the `record` are: + * - `Organisation Name` + * + * @memberof 2014uk16rfop001XlsTransform + * @param {Object} record The row received from parsed file + * @returns {Array} + */ + +const getThirdParties = record => + record['Organisation Name'] + ? [ + { + address: '', + country: 'GB', + email: '', + name: record['Organisation Name'] + ? record['Organisation Name'].trim() + : '', + phone: '', + region: '', + role: 'Lead organisation', + type: '', + website: '', + }, + ] + : []; + +/** + * Preprocess/format date. + * + * @memberof 2014uk16rfop001XlsTransform + * @param {Date} date Date + * + * Supported formats: + * + * - `DD/MM/YYYY` + * + * @returns {Date} The date formatted into an ISO 8601 date format + * + */ +const formatDate = date => { + if (!date || typeof date !== 'string') return null; + + const d = date.split(/\//); + if (d.length !== 3) return null; + + const [day, month, year] = d; + + if (!day || !month || !year) return null; + + try { + return new Date(Date.UTC(year, month - 1, day)).toISOString(); + } catch (e) { + return null; + } +}; + +/** + * Preprocess `timeframe`. + * + * Input fields taken from the `record` are: + * - `Operation Start Date` + * - `Operation End Date` + * + * @memberof 2014uk16rfop001XlsTransform + * @param {Object} record The row received from parsed file + * @returns {Timeframe} + */ + +const getTimeframe = record => { + const from = formatDate(record['Operation Start Date']); + const to = formatDate(record['Operation End Date']); + + return { + from, + from_precision: 'day', + to, + to_precision: 'day', + }; +}; + +/** + * Preprocess `title`. + * + * Input fields taken from the `record` are: + * - `Project Title` + * + * @memberof 2014uk16rfop001XlsTransform + * @param {Object} record The row received from parsed file + * @returns {String} + */ + +const getTitle = record => + record['Project Title'] ? record['Project Title'].trim() : ''; + +/** + * Map fields for 2014uk16rfop001 producer, XLS file types + * + * Example input data: {@link https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop001/xls/test/stubs/record.json|stub} + * + * Transform function: {@link https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform.js|implementation details} + * @name 2014uk16rfop001XlsTransform + * @param {Object} record Piece of data to transform before going to harmonized storage. + * @returns {Project} JSON matching the type fields. + */ +export default (record: Object): Project | null => { + if (!record) return null; + + // Map the fields + return { + action: '', + budget: getBudget(record), + call_year: '', + description: '', + ec_priorities: [], + media: [], + programme_name: '', + project_id: getProjectId(record), + project_locations: getLocations(record), + project_website: '', + complete: false, + related_links: [], + reporting_organisation: 'Member states', + results: { + available: '', + result: '', + }, + status: '', + sub_programme_name: '', + success_story: '', + themes: [], + third_parties: getThirdParties(record), + timeframe: getTimeframe(record), + title: getTitle(record), + type: [], + }; +}; diff --git a/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/getTransform.js b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/getTransform.js new file mode 100644 index 000000000..401618584 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/getTransform.js @@ -0,0 +1,23 @@ +import esf from './ESF/transform'; +import esif from './ESIF/transform'; + +const getTransform = type => { + let transform = null; + + switch (type) { + case 'ESF': + transform = esf; + break; + + case 'ESIF': + transform = esif; + break; + + default: + break; + } + + return transform; +}; + +export default getTransform; diff --git a/services/ingestion/etl/2014uk16rfop001/xls/test/stubs/ESF/record.json b/services/ingestion/etl/2014uk16rfop001/xls/test/stubs/ESF/record.json new file mode 100644 index 000000000..79a4660de --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/xls/test/stubs/ESF/record.json @@ -0,0 +1,14 @@ +{ + "Beneficiary Name": "15BILLION", + "Operation Name": "CAREERS CLUSTERS NORTH AND EAST LONDON", + "Operation Summary": "To establish and manage a geographic or employer sector based Career Cluster that aims to improve the labour market relevance of education", + "Operation Start Date": "2016-06-30T22:00:00.000Z", + "Operation End Date": "2019-03-30T21:00:00.000Z", + "Total Eligible Expenditure Allocated to the Operation;Original": 666666, + "Total Eligible Expenditure Allocated to the Operation;Current": 866664, + "Union co‑financing rate, as per priority axis;": 0.5, + "Operation postcode; or other appropriate location indicator;": "LONDON LEP More Developed", + "Country": "UK", + "Category of Intervention": "02     Sustainable integration of young people not in employment, education or training in the labour market", + "Last updated": "2018-12-13T21:00:00.000Z" +} diff --git a/services/ingestion/etl/2014uk16rfop001/xls/test/stubs/ESIF/record.json b/services/ingestion/etl/2014uk16rfop001/xls/test/stubs/ESIF/record.json new file mode 100644 index 000000000..0967ef424 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/xls/test/stubs/ESIF/record.json @@ -0,0 +1 @@ +{} diff --git a/services/ingestion/etl/2014uk16rfop001/xls/test/unit/events/onParseXLS.spec.js b/services/ingestion/etl/2014uk16rfop001/xls/test/unit/events/onParseXLS.spec.js new file mode 100644 index 000000000..2dad09746 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/xls/test/unit/events/onParseXLS.spec.js @@ -0,0 +1,20 @@ +/** + * @jest-environment node + */ + +import onParseXLS from '../../../src/events/onParseXLS'; + +describe(`Function onParseXLS in "@eubfr/ingestion-etl-2014uk16rfop001-xls"`, () => { + test('The function requires BUCKET, REGION and STAGE environment variables', async () => { + const event = {}; + const context = {}; + + try { + await onParseXLS(event, context); + } catch (error) { + expect(error.message).toEqual( + 'BUCKET, REGION and STAGE environment variables are required!' + ); + } + }); +}); diff --git a/services/ingestion/etl/2014uk16rfop001/xls/test/unit/lib/__snapshots__/transform.spec.js.snap b/services/ingestion/etl/2014uk16rfop001/xls/test/unit/lib/__snapshots__/transform.spec.js.snap new file mode 100644 index 000000000..19be68d09 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/xls/test/unit/lib/__snapshots__/transform.spec.js.snap @@ -0,0 +1,89 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`XLS transformers for 2014uk16rfop001 Type ESF: produces correct JSON output structure 1`] = ` +Object { + "action": "", + "budget": Object { + "eu_contrib": Object { + "currency": "GBP", + "raw": 0.5, + "value": 433332, + }, + "funding_area": Array [], + "mmf_heading": "", + "other_contrib": Object { + "currency": "", + "raw": "", + "value": 0, + }, + "private_fund": Object { + "currency": "", + "raw": "", + "value": 0, + }, + "public_fund": Object { + "currency": "", + "raw": "", + "value": 0, + }, + "total_cost": Object { + "currency": "GBP", + "raw": 866664, + "value": 866664, + }, + }, + "call_year": "", + "complete": false, + "description": "To establish and manage a geographic or employer sector based Career Cluster that aims to improve the labour market relevance of education", + "ec_priorities": Array [], + "media": Array [], + "programme_name": "", + "project_id": "42ce684eaf8ac11f096d80fe5d84cde5", + "project_locations": Array [ + Object { + "address": "", + "centroid": null, + "country_code": "GB", + "location": null, + "nuts": Array [], + "postal_code": "", + "region": "LONDON", + "town": "", + }, + ], + "project_website": "", + "related_links": Array [], + "reporting_organisation": "Member states", + "results": Object { + "available": "", + "result": "", + }, + "status": "", + "sub_programme_name": "", + "success_story": "", + "themes": Array [ + "02     Sustainable integration of young people not in employment, education or training in the labour market", + ], + "third_parties": Array [ + Object { + "address": "", + "country": "GB", + "email": "", + "name": "15BILLION", + "phone": "", + "region": "", + "role": "Beneficiary", + "type": "", + "website": "", + }, + ], + "timeframe": Object { + "from": "2016-06-30T22:00:00.000Z", + "from_precision": "day", + "to": "2019-03-30T21:00:00.000Z", + "to_precision": "day", + }, + "title": "CAREERS CLUSTERS NORTH AND EAST LONDON", + "type": Array [], +} +`; diff --git a/services/ingestion/etl/2014uk16rfop001/xls/test/unit/lib/transform.spec.js b/services/ingestion/etl/2014uk16rfop001/xls/test/unit/lib/transform.spec.js new file mode 100644 index 000000000..8e9b27f82 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/xls/test/unit/lib/transform.spec.js @@ -0,0 +1,28 @@ +/** + * @jest-environment node + */ + +import mapperESF from '../../../src/lib/transform/ESF/transform'; +import mapperESIF from '../../../src/lib/transform/ESIF/transform'; + +import testRecordESF from '../../stubs/ESF/record'; +import testRecordESIF from '../../stubs/ESIF/record'; + +describe('XLS transformers for 2014uk16rfop001', () => { + let esf = {}; + let esif = {}; + + beforeAll(() => { + esf = mapperESF(testRecordESF); + esif = mapperESIF(testRecordESIF); + }); + + test('Both types return null when record is not provided', () => { + expect(mapperESF()).toBe(null); + expect(mapperESIF()).toBe(null); + }); + + test('Type ESF: produces correct JSON output structure', () => { + expect(esf).toMatchSnapshot(); + }); +}); diff --git a/services/ingestion/etl/2014uk16rfop001/xls/webpack.config.js b/services/ingestion/etl/2014uk16rfop001/xls/webpack.config.js new file mode 100644 index 000000000..30fd8ced7 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/xls/webpack.config.js @@ -0,0 +1,32 @@ +const slsw = require('serverless-webpack'); +const path = require('path'); + +module.exports = { + entry: slsw.lib.entries, + target: 'node', + mode: slsw.lib.webpack.isLocal ? 'development' : 'production', + optimization: { + minimize: process.env.EUBFR_ENV && process.env.EUBFR_ENV === 'prod', + }, + devtool: 'nosources-source-map', + externals: [{ 'aws-sdk': true }], + module: { + rules: [ + { + test: /\.js$/, + use: [ + { + loader: 'babel-loader', + }, + ], + include: __dirname, + exclude: /node_modules/, + }, + ], + }, + output: { + libraryTarget: 'commonjs2', + path: path.join(__dirname, '.webpack'), + filename: '[name].js', + }, +}; From 95d128072a3be6bc00e46cc25d26a1ea32c19966 Mon Sep 17 00:00:00 2001 From: Kalin Chernev Date: Wed, 8 May 2019 18:10:19 +0300 Subject: [PATCH 02/10] Add ESIF type --- config.example.json | 1 + scripts/documentation/docs-md.js | 1 + .../etl/2014uk16rfop001/xls/package.json | 1 + .../2014uk16rfop001/xls/src/lib/getRecords.js | 42 +++-- .../xls/src/lib/transform/ESIF/README.md | 19 +- .../xls/src/lib/transform/ESIF/transform.js | 176 ++++++++++++------ .../xls/test/stubs/ESIF/record.json | 17 +- .../lib/__snapshots__/transform.spec.js.snap | 93 ++++++++- .../xls/test/unit/lib/transform.spec.js | 4 + tools/eubfr-cli/lib/getServices.js | 1 + 10 files changed, 276 insertions(+), 79 deletions(-) diff --git a/config.example.json b/config.example.json index 33b71c089..7a2d0bff8 100644 --- a/config.example.json +++ b/config.example.json @@ -11,6 +11,7 @@ "2014tc16rfcb047", "2014tc16rfpc001", "2014tc16rftn002", + "2014uk16rfop001", "bulgaria", "cordis", "devco", diff --git a/scripts/documentation/docs-md.js b/scripts/documentation/docs-md.js index b61dec1a7..f078bab7e 100755 --- a/scripts/documentation/docs-md.js +++ b/scripts/documentation/docs-md.js @@ -19,6 +19,7 @@ const transforms = [ '2014tc16rfcb047-xls', '2014tc16rfpc001-xls', '2014tc16rftn002-xls', + '2014uk16rfop001-xls', 'bulgaria-xls', 'cordis-csv', 'devco-xls', diff --git a/services/ingestion/etl/2014uk16rfop001/xls/package.json b/services/ingestion/etl/2014uk16rfop001/xls/package.json index 8eb542637..c2e7f7316 100644 --- a/services/ingestion/etl/2014uk16rfop001/xls/package.json +++ b/services/ingestion/etl/2014uk16rfop001/xls/package.json @@ -9,6 +9,7 @@ "dependencies": { "@eubfr/lib": "^0.6.0", "@eubfr/logger-messenger": "^0.6.0", + "i18n-iso-countries": "3.7.8", "xlsx": "0.14.2" }, "devDependencies": { diff --git a/services/ingestion/etl/2014uk16rfop001/xls/src/lib/getRecords.js b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/getRecords.js index 51c626c52..39de55594 100644 --- a/services/ingestion/etl/2014uk16rfop001/xls/src/lib/getRecords.js +++ b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/getRecords.js @@ -7,36 +7,50 @@ import improveObjectKeys from './improveObjectKeys'; * @returns {Array} Improved list of records. */ const getRecords = ({ rows, type }) => { + let headerRow = {}; const records = []; switch (type) { case 'ESF': { // First row is the header. - const headerRow = rows.shift(); + headerRow = rows.shift(); // Remove row with information in Polish. rows.shift(); - // Normalize the list by replacing properties - rows - .map(record => { - const mapped = {}; - Object.keys(record).forEach(prop => { - mapped[headerRow[prop]] = record[prop]; - }); - return mapped; - }) - .map(improveObjectKeys) - .forEach(record => records.push(record)); - break; } - case 'ESIF': + + case 'ESIF': { + // The first few rows contain explanations. + rows.shift(); + rows.shift(); + + // English version of the columns. + headerRow = rows.shift(); + // French version + rows.shift(); + break; + } default: break; } + // Normalize the list by replacing properties + rows + .map(record => { + const remapped = {}; + + Object.keys(record).forEach(prop => { + remapped[headerRow[prop]] = record[prop]; + }); + + return remapped; + }) + .map(improveObjectKeys) + .forEach(record => records.push(record)); + return records; }; diff --git a/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/README.md b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/README.md index 558171dbf..173518890 100644 --- a/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/README.md +++ b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/README.md @@ -4,6 +4,19 @@ Funding type: ESIF (EUROPEAN STRUCTURAL AND INVESTMENT FUNDS) Model to compare with is available at: https://ec-europa.github.io/eubfr-data-lake/ -| Field | Target | -| ----- | ------ | -| | | +| Field | Target | +| ------------------------------------------------------ | ----------------- | +| Recipient of funds | third_parties | +| Name of Project | title | +| Type of fund | description | +| Priority Axis | description | +| Summary of project (max 100 words) | description | +| Start date | timeframe.from | +| End date | timeframe.to | +| ERDF/ESF investment £m | budget.eu_contrib | +| Total project costs £m | budget.total_cost | +| % of project funded by EU | | +| Location (postcode) | project_locations | +| Local Enterprise Partnership area | project_locations | +| Country | project_locations | +| Type and focus of support (_Category of intervention)_ | themes | diff --git a/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/transform.js b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/transform.js index 331042150..3a9982262 100644 --- a/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/transform.js +++ b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/transform.js @@ -1,13 +1,17 @@ // @flow +import crypto from 'crypto'; +import countries from 'i18n-iso-countries'; import type { Project } from '@eubfr/types'; +import getCountryCode from '@eubfr/lib/getCountryCode'; import sanitizeBudgetItem from '@eubfr/lib/budget/budgetFormatter'; /** * Preprocess `budget`. * * Input fields taken from the `record` are: - * - `Total Paid Amount` + * - `Total project costs £m` + * - `ERDF/ESF investment £m` * * @memberof 2014uk16rfop001XlsTransform * @param {Object} record The row received from parsed file @@ -16,11 +20,15 @@ import sanitizeBudgetItem from '@eubfr/lib/budget/budgetFormatter'; const getBudget = record => ({ total_cost: sanitizeBudgetItem({ - value: record['Total Paid Amount'], + value: record['Total project costs £m'], currency: 'GBP', - raw: record['Total Paid Amount'], + raw: record['Total project costs £m'], + }), + eu_contrib: sanitizeBudgetItem({ + value: record['ERDF/ESF investment £m'], + currency: 'GBP', + raw: record['ERDF/ESF investment £m'], }), - eu_contrib: sanitizeBudgetItem(), private_fund: sanitizeBudgetItem(), public_fund: sanitizeBudgetItem(), other_contrib: sanitizeBudgetItem(), @@ -28,47 +36,124 @@ const getBudget = record => ({ mmf_heading: '', }); +/** + * Preprocess `description`. + * + * Input fields taken from the `record` are: + * - `Type of fund` + * - `Priority Axis` + * - `Summary of project (max 100 words)` + * + * @memberof 2014uk16rfop001XlsTransform + * @param {Object} record The row received from parsed file + * @returns {String} + */ + +const getDescription = record => { + let description = ''; + const fields = [ + 'Type of fund', + 'Priority Axis', + 'Summary of project (max 100 words)', + ]; + + fields.forEach(descriptionField => { + description += `${descriptionField}: ${record[descriptionField]} \n`; + }); + + return description; +}; + /** * Preprocess `project_id`. * * Input fields taken from the `record` are: - * - `Ref. No` + * - `Name of Project` * * @memberof 2014uk16rfop001XlsTransform * @param {Object} record The row received from parsed file * @returns {String} */ -const getProjectId = record => record['Ref. No'] || ''; +const getProjectId = record => { + return crypto + .createHash('md5') + .update(record['Name of Project']) + .digest('hex'); +}; + +/** + * Gets country code from a country name. + * + * @memberof 2014uk16rfop001XlsTransform + * @param {String} countryName The name of the country + * @returns {String} The ISO 3166-1 country code + */ + +const getCodeByCountry = countryName => + countries.getAlpha2Code(countryName, 'en'); /** * Preprocess `project_locations`. * + * Input fields taken from the `record` are: + * - `Location (postcode)` + * - `Local Enterprise Partnership area` + * - `Country` + * * @memberof 2014uk16rfop001XlsTransform * @param {Object} record The row received from parsed file * @returns {Array} */ -const getLocations = record => [ - { +const getLocations = record => { + const locations = []; + const region = record['Local Enterprise Partnership area'] || ''; + const postCode = record['Location (postcode)'] || ''; + // We definitely expect England, UK or something similar, but if someone decides to include none of these, we respect it. + const countryByLib = getCountryCode(getCodeByCountry(record.Country)); + const country = countryByLib || 'GB'; + + locations.push({ address: '', centroid: null, - country_code: 'GB', // Takes into account lib/getCountryCode.js rules directly. + country_code: country, location: null, nuts: [], - postal_code: record['Project Postcode'] - ? record['Project Postcode'].trim() - : '', - region: '', + postal_code: postCode, + region, town: '', - }, -]; + }); + + return locations; +}; + +/** + * Preprocess `themes`. + * + * Input fields taken from the `record` are: + * - `Type and focus of support (*Category of intervention)*` + * + * @memberof 2014uk16rfop001XlsTransform + * @param {Object} record The row received from parsed file + * @returns {Array} + */ + +const getThemes = record => + record['Type and focus of support (*Category of intervention)*'] + ? [ + record['Type and focus of support (*Category of intervention)*'] + .trim() + .replace(/(\r\n|\n|\r)/gm, '') + .replace(/ {1,}/g, ' '), + ] + : []; /** * Preprocess `third_parties`. * * Input fields taken from the `record` are: - * - `Organisation Name` + * - `Recipient of funds` * * @memberof 2014uk16rfop001XlsTransform * @param {Object} record The row received from parsed file @@ -76,60 +161,30 @@ const getLocations = record => [ */ const getThirdParties = record => - record['Organisation Name'] + record['Recipient of funds'] ? [ { address: '', country: 'GB', email: '', - name: record['Organisation Name'] - ? record['Organisation Name'].trim() + name: record['Recipient of funds'] + ? record['Recipient of funds'].trim() : '', phone: '', region: '', - role: 'Lead organisation', + role: 'Beneficiary', type: '', website: '', }, ] : []; -/** - * Preprocess/format date. - * - * @memberof 2014uk16rfop001XlsTransform - * @param {Date} date Date - * - * Supported formats: - * - * - `DD/MM/YYYY` - * - * @returns {Date} The date formatted into an ISO 8601 date format - * - */ -const formatDate = date => { - if (!date || typeof date !== 'string') return null; - - const d = date.split(/\//); - if (d.length !== 3) return null; - - const [day, month, year] = d; - - if (!day || !month || !year) return null; - - try { - return new Date(Date.UTC(year, month - 1, day)).toISOString(); - } catch (e) { - return null; - } -}; - /** * Preprocess `timeframe`. * * Input fields taken from the `record` are: - * - `Operation Start Date` - * - `Operation End Date` + * - `Start date` + * - `End date` * * @memberof 2014uk16rfop001XlsTransform * @param {Object} record The row received from parsed file @@ -137,8 +192,8 @@ const formatDate = date => { */ const getTimeframe = record => { - const from = formatDate(record['Operation Start Date']); - const to = formatDate(record['Operation End Date']); + const from = record['Start date'] || null; + const to = record['End date'] || null; return { from, @@ -152,7 +207,7 @@ const getTimeframe = record => { * Preprocess `title`. * * Input fields taken from the `record` are: - * - `Project Title` + * - `Name of Project` * * @memberof 2014uk16rfop001XlsTransform * @param {Object} record The row received from parsed file @@ -160,14 +215,15 @@ const getTimeframe = record => { */ const getTitle = record => - record['Project Title'] ? record['Project Title'].trim() : ''; + record['Name of Project'] ? record['Name of Project'].trim() : ''; /** * Map fields for 2014uk16rfop001 producer, XLS file types * - * Example input data: {@link https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop001/xls/test/stubs/record.json|stub} + * Example input data: {@link https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop001/xls/test/stubs/ESIF/record.json|stub} + * + * Transform function: {@link https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/transform.js|implementation details} * - * Transform function: {@link https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform.js|implementation details} * @name 2014uk16rfop001XlsTransform * @param {Object} record Piece of data to transform before going to harmonized storage. * @returns {Project} JSON matching the type fields. @@ -180,7 +236,7 @@ export default (record: Object): Project | null => { action: '', budget: getBudget(record), call_year: '', - description: '', + description: getDescription(record), ec_priorities: [], media: [], programme_name: '', @@ -197,7 +253,7 @@ export default (record: Object): Project | null => { status: '', sub_programme_name: '', success_story: '', - themes: [], + themes: getThemes(record), third_parties: getThirdParties(record), timeframe: getTimeframe(record), title: getTitle(record), diff --git a/services/ingestion/etl/2014uk16rfop001/xls/test/stubs/ESIF/record.json b/services/ingestion/etl/2014uk16rfop001/xls/test/stubs/ESIF/record.json index 0967ef424..a813d23f3 100644 --- a/services/ingestion/etl/2014uk16rfop001/xls/test/stubs/ESIF/record.json +++ b/services/ingestion/etl/2014uk16rfop001/xls/test/stubs/ESIF/record.json @@ -1 +1,16 @@ -{} +{ + "Recipient of funds": "Hethel Innovation Ltd", + "Name of Project": "Breakthrough", + "Type of fund": "ERDF", + "Priority Axis": "1", + "Summary of project (max 100 words)": "BREAKTHROUGH creates unique environments that stimulate innovation, by leading diverse and cross-cutting teams on expeditions to new cross-cluster market opportunities. ", + "Start date": "2018-12-31T21:00:00.000Z", + "End date": "2021-12-30T21:00:00.000Z", + "ERDF/ESF investment £m": 598176, + "Total project costs £m": 1196352, + "% of project funded by EU": 0.5, + "Location (postcode)": "NR14 8FB", + "Local Enterprise Partnership area": "New Anglia", + "Country": "England", + "Type and focus of support (*Category of intervention)*": "065 Research and Innovation processes, technology transfer and c…" +} diff --git a/services/ingestion/etl/2014uk16rfop001/xls/test/unit/lib/__snapshots__/transform.spec.js.snap b/services/ingestion/etl/2014uk16rfop001/xls/test/unit/lib/__snapshots__/transform.spec.js.snap index 19be68d09..8c76a99a6 100644 --- a/services/ingestion/etl/2014uk16rfop001/xls/test/unit/lib/__snapshots__/transform.spec.js.snap +++ b/services/ingestion/etl/2014uk16rfop001/xls/test/unit/lib/__snapshots__/transform.spec.js.snap @@ -38,7 +38,7 @@ Object { "ec_priorities": Array [], "media": Array [], "programme_name": "", - "project_id": "42ce684eaf8ac11f096d80fe5d84cde5", + "project_id": "c4e03cd67eedb94a8ecf3df27f16dfbf", "project_locations": Array [ Object { "address": "", @@ -87,3 +87,94 @@ Object { "type": Array [], } `; + +exports[`XLS transformers for 2014uk16rfop001 Type ESIF: produces correct JSON output structure 1`] = ` +Object { + "action": "", + "budget": Object { + "eu_contrib": Object { + "currency": "GBP", + "raw": 598176, + "value": 598176, + }, + "funding_area": Array [], + "mmf_heading": "", + "other_contrib": Object { + "currency": "", + "raw": "", + "value": 0, + }, + "private_fund": Object { + "currency": "", + "raw": "", + "value": 0, + }, + "public_fund": Object { + "currency": "", + "raw": "", + "value": 0, + }, + "total_cost": Object { + "currency": "GBP", + "raw": 1196352, + "value": 1196352, + }, + }, + "call_year": "", + "complete": false, + "description": "Type of fund: ERDF +Priority Axis: 1 +Summary of project (max 100 words): BREAKTHROUGH creates unique environments that stimulate innovation, by leading diverse and cross-cutting teams on expeditions to new cross-cluster market opportunities.  +", + "ec_priorities": Array [], + "media": Array [], + "programme_name": "", + "project_id": "176457b26915b02e6f2fc20b2b7af749", + "project_locations": Array [ + Object { + "address": "", + "centroid": null, + "country_code": "GB", + "location": null, + "nuts": Array [], + "postal_code": "NR14 8FB", + "region": "New Anglia", + "town": "", + }, + ], + "project_website": "", + "related_links": Array [], + "reporting_organisation": "Member states", + "results": Object { + "available": "", + "result": "", + }, + "status": "", + "sub_programme_name": "", + "success_story": "", + "themes": Array [ + "065 Research and Innovation processes, technology transfer and c…", + ], + "third_parties": Array [ + Object { + "address": "", + "country": "GB", + "email": "", + "name": "Hethel Innovation Ltd", + "phone": "", + "region": "", + "role": "Beneficiary", + "type": "", + "website": "", + }, + ], + "timeframe": Object { + "from": "2018-12-31T21:00:00.000Z", + "from_precision": "day", + "to": "2021-12-30T21:00:00.000Z", + "to_precision": "day", + }, + "title": "Breakthrough", + "type": Array [], +} +`; diff --git a/services/ingestion/etl/2014uk16rfop001/xls/test/unit/lib/transform.spec.js b/services/ingestion/etl/2014uk16rfop001/xls/test/unit/lib/transform.spec.js index 8e9b27f82..a8b16e66a 100644 --- a/services/ingestion/etl/2014uk16rfop001/xls/test/unit/lib/transform.spec.js +++ b/services/ingestion/etl/2014uk16rfop001/xls/test/unit/lib/transform.spec.js @@ -25,4 +25,8 @@ describe('XLS transformers for 2014uk16rfop001', () => { test('Type ESF: produces correct JSON output structure', () => { expect(esf).toMatchSnapshot(); }); + + test('Type ESIF: produces correct JSON output structure', () => { + expect(esif).toMatchSnapshot(); + }); }); diff --git a/tools/eubfr-cli/lib/getServices.js b/tools/eubfr-cli/lib/getServices.js index 05f526421..1279233ad 100644 --- a/tools/eubfr-cli/lib/getServices.js +++ b/tools/eubfr-cli/lib/getServices.js @@ -16,6 +16,7 @@ const allServices = [ { service: 'ingestion-etl-2014tc16rfcb047-xls', exportEnv: false }, { service: 'ingestion-etl-2014tc16rfpc001-xls', exportEnv: false }, { service: 'ingestion-etl-2014tc16rftn002-xls', exportEnv: false }, + { service: 'ingestion-etl-2014uk16rfop001-xls', exportEnv: false }, { service: 'ingestion-etl-bulgaria-xls', exportEnv: false }, { service: 'ingestion-etl-cordis-csv', exportEnv: false }, { service: 'ingestion-etl-devco-xls', exportEnv: false }, From 0f85ae7be627917ff32a7dc1b517dbeba9bbe692 Mon Sep 17 00:00:00 2001 From: Kalin Chernev Date: Mon, 13 May 2019 10:59:14 +0300 Subject: [PATCH 03/10] Add data from CSV file --- docs/types/README.md | 1 + docs/types/etls/2014tc16i5cb005-csv.md | 7 + docs/types/etls/2014uk16rfop001-csv.md | 162 ++++++ docs/types/etls/2014uk16rfop001-xls.md | 532 ++++++++++++++++++ scripts/documentation/docs-md.js | 1 + .../etl/2014uk16rfop001/csv/README.md | 19 + .../etl/2014uk16rfop001/csv/babel.config.js | 29 + .../etl/2014uk16rfop001/csv/package.json | 33 ++ .../etl/2014uk16rfop001/csv/serverless.yml | 107 ++++ .../csv/src/events/onParseCSV.js | 139 +++++ .../2014uk16rfop001/csv/src/lib/transform.js | 315 +++++++++++ .../csv/test/stubs/record.json | 15 + .../lib/__snapshots__/transform.spec.js.snap | 183 ++++++ .../csv/test/unit/lib/transform.spec.js | 22 + .../etl/2014uk16rfop001/csv/webpack.config.js | 32 ++ tools/eubfr-cli/lib/getServices.js | 1 + 16 files changed, 1598 insertions(+) create mode 100644 docs/types/etls/2014uk16rfop001-csv.md create mode 100644 docs/types/etls/2014uk16rfop001-xls.md create mode 100644 services/ingestion/etl/2014uk16rfop001/csv/README.md create mode 100644 services/ingestion/etl/2014uk16rfop001/csv/babel.config.js create mode 100644 services/ingestion/etl/2014uk16rfop001/csv/package.json create mode 100644 services/ingestion/etl/2014uk16rfop001/csv/serverless.yml create mode 100644 services/ingestion/etl/2014uk16rfop001/csv/src/events/onParseCSV.js create mode 100644 services/ingestion/etl/2014uk16rfop001/csv/src/lib/transform.js create mode 100644 services/ingestion/etl/2014uk16rfop001/csv/test/stubs/record.json create mode 100644 services/ingestion/etl/2014uk16rfop001/csv/test/unit/lib/__snapshots__/transform.spec.js.snap create mode 100644 services/ingestion/etl/2014uk16rfop001/csv/test/unit/lib/transform.spec.js create mode 100644 services/ingestion/etl/2014uk16rfop001/csv/webpack.config.js diff --git a/docs/types/README.md b/docs/types/README.md index 7a5334c0a..19b5498af 100644 --- a/docs/types/README.md +++ b/docs/types/README.md @@ -17,6 +17,7 @@ Here's a list of the transformations made in ETLs around the `Project` model. - [2014tc16rfcb047 - XLS](./etls/2014tc16rfcb047-xls.md) - [2014tc16rfpc001 - XLS](./etls/2014tc16rfpc001-xls.md) - [2014tc16rftn002 - XLS](./etls/2014tc16rftn002-xls.md) +- [2014uk16rfop001 - CSV](./etls/2014uk16rfop001-csv.md) - [bulgaria - XLS](./etls/bulgaria-xls.md) - [CORDIS - CSV](./etls/cordis-csv.md) - [DEVCO - XLS](./etls/devco-xls.md) diff --git a/docs/types/etls/2014tc16i5cb005-csv.md b/docs/types/etls/2014tc16i5cb005-csv.md index 9ef97d5ae..454d87792 100644 --- a/docs/types/etls/2014tc16i5cb005-csv.md +++ b/docs/types/etls/2014tc16i5cb005-csv.md @@ -146,6 +146,13 @@ Input fields taken from the `record` are: Returns **[String][4]** +## numeral + +Numeral understands percentages: + +- "500665.00%" => 5006.650000000001 +- "85.00%" => 0.85 + [1]: https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014tc16i5cb005/csv/test/stubs/record.json [2]: https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014tc16i5cb005/csv/src/lib/transform.js [3]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object diff --git a/docs/types/etls/2014uk16rfop001-csv.md b/docs/types/etls/2014uk16rfop001-csv.md new file mode 100644 index 000000000..2fa5ade36 --- /dev/null +++ b/docs/types/etls/2014uk16rfop001-csv.md @@ -0,0 +1,162 @@ + + +## 2014uk16rfop001CsvTransform + +Map fields for 2014uk16rfop001 producer, CSV file types + +Example input data: [stub][1] + +Transform function: [implementation details][2] + +### Parameters + +- `record` **[Object][3]** Piece of data to transform before going to harmonized storage. + +Returns **Project** JSON matching the type fields. + +### getBudget + +Preprocess `budget`. + +Input fields taken from the `record` are: + +- `Total project costs �m (eligible project costs only)` +- `% of project funded by EU (Co-financing rate%)` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **Budget** + +### getDescription + +Preprocess `description`. + +Input fields taken from the `record` are: + +- `Summary of project(max 100 words)` +- `Local enterprise partnership area` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[String][4]** + +### getProjectId + +Generates an ID for `project_id`. + +Input fields taken from the `record` are: + +- `Name of project` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[String][4]** + +### getLocations + +Preprocess `project_locations`. + +Input fields taken from the `record` are: + +- `Location (postcode)` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[Array][5]<[Location][6]>** + +### getThemes + +Preprocess `themes`. + +Input fields taken from the `record` are: + +- `Type and focus support (category of intervention)` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[Array][5]<[String][4]>** + +### getThirdParties + +Preprocess `third_parties`. + +Input fields taken from the `record` are: + +- `Recipient of funds(ERDF/ESF beneficiary)` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[Array][5]<ThirdParty>** + +### formatDate + +Format date. + +#### Parameters + +- `date` **[Date][7]** + +Returns **[Date][7]** The date formatted into an ISO 8601 date format + +### getTimeframe + +Preprocess `timeframe`. + +Input fields taken from the `record` are: + +- `Start date` +- `End date` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **Timeframe** + +### getTitle + +Preprocess `title`. + +Input fields taken from the `record` are: + +- `Name of project` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[String][4]** + +### getType + +Preprocess `type`. + +Input fields taken from the `record` are: + +- `Type of fund` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[Array][5]<[String][4]>** + +[1]: https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop001/csv/test/stubs/record.json +[2]: https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop001/csv/src/lib/transform.js +[3]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object +[4]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String +[5]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array +[6]: https://developer.mozilla.org/docs/Web/API/Location +[7]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Date diff --git a/docs/types/etls/2014uk16rfop001-xls.md b/docs/types/etls/2014uk16rfop001-xls.md new file mode 100644 index 000000000..ff68679c2 --- /dev/null +++ b/docs/types/etls/2014uk16rfop001-xls.md @@ -0,0 +1,532 @@ + + +## 2014uk16rfop001XlsTransform + +Map fields for 2014uk16rfop001 producer, XLS file types + +Example input data: [stub][1] + +Transform function: [implementation details][2] + +### Parameters + +- `record` **[Object][3]** Piece of data to transform before going to harmonized storage. + +Returns **Project** JSON matching the type fields. + +### getBudget + +Preprocess `budget`. + +Input fields taken from the `record` are: + +- `Total Eligible Expenditure Allocated to the Operation;Current` +- `Union co‑financing rate, as per priority axis;` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **Budget** + +### getBudget + +Preprocess `budget`. + +Input fields taken from the `record` are: + +- `Total project costs £m` +- `ERDF/ESF investment £m` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **Budget** + +### getDescription + +Preprocess `description`. + +Input fields taken from the `record` are: + +- `Operation Summary` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[String][4]** + +### getDescription + +Preprocess `description`. + +Input fields taken from the `record` are: + +- `Type of fund` +- `Priority Axis` +- `Summary of project (max 100 words)` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[String][4]** + +### getProjectId + +Preprocess `project_id`. + +There are rows with overlapping information about beneficiaries and operations. +In order to keep them separate, as they are in the ingested file, we take into account the budgetary information as well. + +Input fields taken from the `record` are: + +- `Operation Name` +- `Total Eligible Expenditure Allocated to the Operation;Current` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[String][4]** + +### getProjectId + +Preprocess `project_id`. + +Input fields taken from the `record` are: + +- `Name of Project` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[String][4]** + +### getLocations + +Preprocess `project_locations`. + +Input fields taken from the `record` are: + +- `Operation postcode; or other appropriate location indicator;` +- `Country` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[Array][5]<[Location][6]>** + +### getLocations + +Preprocess `project_locations`. + +Input fields taken from the `record` are: + +- `Location (postcode)` +- `Local Enterprise Partnership area` +- `Country` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[Array][5]<[Location][6]>** + +### getThemes + +Preprocess `themes`. + +Input fields taken from the `record` are: + +- `Category of Intervention` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[Array][5]<[String][4]>** + +### getThemes + +Preprocess `themes`. + +Input fields taken from the `record` are: + +- `Type and focus of support (*Category of intervention)*` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[Array][5]<[String][4]>** + +### getThirdParties + +Preprocess `third_parties`. + +Input fields taken from the `record` are: + +- `Beneficiary Name` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[Array][5]<ThirdParty>** + +### getThirdParties + +Preprocess `third_parties`. + +Input fields taken from the `record` are: + +- `Recipient of funds` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[Array][5]<ThirdParty>** + +### getTimeframe + +Preprocess `timeframe`. + +Input fields taken from the `record` are: + +- `Operation Start Date` +- `Operation End Date` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **Timeframe** + +### getTimeframe + +Preprocess `timeframe`. + +Input fields taken from the `record` are: + +- `Start date` +- `End date` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **Timeframe** + +### getTitle + +Preprocess `title`. + +Input fields taken from the `record` are: + +- `Operation Name` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[String][4]** + +### getTitle + +Preprocess `title`. + +Input fields taken from the `record` are: + +- `Name of Project` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[String][4]** + +### getCodeByCountry + +Gets country code from a country name. + +#### Parameters + +- `countryName` **[String][4]** The name of the country + +Returns **[String][4]** The ISO 3166-1 country code + +## 2014uk16rfop001XlsTransform + +Map fields for 2014uk16rfop001 producer, XLS file types + +Example input data: [stub][7] + +Transform function: [implementation details][8] + +### Parameters + +- `record` **[Object][3]** Piece of data to transform before going to harmonized storage. + +Returns **Project** JSON matching the type fields. + +### getBudget + +Preprocess `budget`. + +Input fields taken from the `record` are: + +- `Total Eligible Expenditure Allocated to the Operation;Current` +- `Union co‑financing rate, as per priority axis;` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **Budget** + +### getBudget + +Preprocess `budget`. + +Input fields taken from the `record` are: + +- `Total project costs £m` +- `ERDF/ESF investment £m` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **Budget** + +### getDescription + +Preprocess `description`. + +Input fields taken from the `record` are: + +- `Operation Summary` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[String][4]** + +### getDescription + +Preprocess `description`. + +Input fields taken from the `record` are: + +- `Type of fund` +- `Priority Axis` +- `Summary of project (max 100 words)` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[String][4]** + +### getProjectId + +Preprocess `project_id`. + +There are rows with overlapping information about beneficiaries and operations. +In order to keep them separate, as they are in the ingested file, we take into account the budgetary information as well. + +Input fields taken from the `record` are: + +- `Operation Name` +- `Total Eligible Expenditure Allocated to the Operation;Current` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[String][4]** + +### getProjectId + +Preprocess `project_id`. + +Input fields taken from the `record` are: + +- `Name of Project` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[String][4]** + +### getLocations + +Preprocess `project_locations`. + +Input fields taken from the `record` are: + +- `Operation postcode; or other appropriate location indicator;` +- `Country` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[Array][5]<[Location][6]>** + +### getLocations + +Preprocess `project_locations`. + +Input fields taken from the `record` are: + +- `Location (postcode)` +- `Local Enterprise Partnership area` +- `Country` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[Array][5]<[Location][6]>** + +### getThemes + +Preprocess `themes`. + +Input fields taken from the `record` are: + +- `Category of Intervention` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[Array][5]<[String][4]>** + +### getThemes + +Preprocess `themes`. + +Input fields taken from the `record` are: + +- `Type and focus of support (*Category of intervention)*` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[Array][5]<[String][4]>** + +### getThirdParties + +Preprocess `third_parties`. + +Input fields taken from the `record` are: + +- `Beneficiary Name` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[Array][5]<ThirdParty>** + +### getThirdParties + +Preprocess `third_parties`. + +Input fields taken from the `record` are: + +- `Recipient of funds` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[Array][5]<ThirdParty>** + +### getTimeframe + +Preprocess `timeframe`. + +Input fields taken from the `record` are: + +- `Operation Start Date` +- `Operation End Date` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **Timeframe** + +### getTimeframe + +Preprocess `timeframe`. + +Input fields taken from the `record` are: + +- `Start date` +- `End date` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **Timeframe** + +### getTitle + +Preprocess `title`. + +Input fields taken from the `record` are: + +- `Operation Name` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[String][4]** + +### getTitle + +Preprocess `title`. + +Input fields taken from the `record` are: + +- `Name of Project` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[String][4]** + +### getCodeByCountry + +Gets country code from a country name. + +#### Parameters + +- `countryName` **[String][4]** The name of the country + +Returns **[String][4]** The ISO 3166-1 country code + +[1]: https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop001/xls/test/stubs/ESF/record.json +[2]: https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESF/transform.js +[3]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object +[4]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String +[5]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array +[6]: https://developer.mozilla.org/docs/Web/API/Location +[7]: https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop001/xls/test/stubs/ESIF/record.json +[8]: https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/transform.js diff --git a/scripts/documentation/docs-md.js b/scripts/documentation/docs-md.js index f078bab7e..cc6e6d365 100755 --- a/scripts/documentation/docs-md.js +++ b/scripts/documentation/docs-md.js @@ -20,6 +20,7 @@ const transforms = [ '2014tc16rfpc001-xls', '2014tc16rftn002-xls', '2014uk16rfop001-xls', + '2014uk16rfop001-csv', 'bulgaria-xls', 'cordis-csv', 'devco-xls', diff --git a/services/ingestion/etl/2014uk16rfop001/csv/README.md b/services/ingestion/etl/2014uk16rfop001/csv/README.md new file mode 100644 index 000000000..2f3b94241 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/csv/README.md @@ -0,0 +1,19 @@ +# 2014uk16rfop001 CSV ETL mapping rules + +Model to compare with is available at: https://ec-europa.github.io/eubfr-data-lake/ + +| Field | Target | +| ---------------------------------------------------- | ----------------- | +| Recipient of funds(ERDF/ESF beneficiary) | third_parties | +| Name of project | title | +| Type of fund | type | +| Summary of project(max 100 words) | description | +| Start date | timeframe.from | +| End date | timeframe.to | +| ERDF/ESF investment �m | | +| Total project costs �m (eligible project costs only) | budget.total_cost | +| % of project funded by EU (Co-financing rate%) | budget.eu_contrib | +| Location (postcode) | project_locations | +| Local enterprise partnership area | description | +| Country | | +| Type and focus support (category of intervention) | themes | diff --git a/services/ingestion/etl/2014uk16rfop001/csv/babel.config.js b/services/ingestion/etl/2014uk16rfop001/csv/babel.config.js new file mode 100644 index 000000000..0397ff2b1 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/csv/babel.config.js @@ -0,0 +1,29 @@ +module.exports = { + presets: [ + '@babel/preset-flow', + [ + '@babel/preset-env', + { + targets: { + node: '8.10', + }, + modules: false, + loose: true, + }, + ], + ], + env: { + test: { + presets: [ + [ + '@babel/preset-env', + { + targets: { + node: '8.10', + }, + }, + ], + ], + }, + }, +}; diff --git a/services/ingestion/etl/2014uk16rfop001/csv/package.json b/services/ingestion/etl/2014uk16rfop001/csv/package.json new file mode 100644 index 000000000..25dcf1bd8 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/csv/package.json @@ -0,0 +1,33 @@ +{ + "private": true, + "name": "@eubfr/ingestion-etl-2014uk16rfop001-csv", + "version": "0.6.0", + "scripts": { + "deploy": "sls deploy -v", + "test:unit": "jest --testPathPattern=unit" + }, + "dependencies": { + "@eubfr/lib": "^0.6.0", + "@eubfr/logger-messenger": "^0.6.0", + "csv-parse": "4.3.4", + "numeral": "2.0.6" + }, + "devDependencies": { + "@babel/core": "7.4.3", + "@babel/preset-env": "7.4.3", + "@babel/preset-flow": "7.0.0", + "@eubfr/types": "^0.6.0", + "aws-sdk": "2.434.0", + "babel-jest": "24.7.0", + "babel-loader": "8.0.5", + "jest": "24.7.0", + "serverless": "1.40.0", + "serverless-webpack": "5.2.0", + "webpack": "4.29.6" + }, + "jest": { + "transform": { + "^.+\\.js$": "babel-jest" + } + } +} diff --git a/services/ingestion/etl/2014uk16rfop001/csv/serverless.yml b/services/ingestion/etl/2014uk16rfop001/csv/serverless.yml new file mode 100644 index 000000000..258774be9 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/csv/serverless.yml @@ -0,0 +1,107 @@ +service: ingestion-etl-2014uk16rfop001-csv + +plugins: + - serverless-webpack + +custom: + webpack: + webpackConfig: ./webpack.config.js + includeModules: + forceExclude: + - aws-sdk + packager: yarn + eubfrEnvironment: ${opt:eubfr_env, file(../../../../../config.json):eubfr_env, env:EUBFR_ENV, 'dev'} + bucketName: ${file(../../../../../resources/harmonized-storage/serverless.yml):custom.bucketName} + +package: + individually: true + +provider: + name: aws + runtime: nodejs8.10 + timeout: 180 + stage: ${opt:stage, file(../../../../../config.json):stage, env:EUBFR_STAGE, 'dev'} + region: ${opt:region, file(../../../../../config.json):region, env:EUBFR_AWS_REGION, 'eu-central-1'} + deploymentBucket: + name: eubfr-${self:custom.eubfrEnvironment}-deploy + stackTags: + ENV: ${self:custom.eubfrEnvironment} + iamRoleStatements: + - Effect: 'Allow' + Action: + - 's3:PutObject' + Resource: + Fn::Join: + - '' + - - 'arn:aws:s3:::' + - ${self:custom.bucketName} + - '/*' + # Allow queueing messages to the DLQ https://docs.aws.amazon.com/lambda/latest/dg/dlq.html + - Effect: 'Allow' + Action: + - sqs:SendMessage + Resource: '*' + +functions: + parseCsv: + handler: src/events/onParseCSV.handler + name: ${self:provider.stage}-${self:service}-parseCsv + memorySize: 512 + environment: + BUCKET: ${self:custom.bucketName} + REGION: ${self:provider.region} + STAGE: ${self:provider.stage} + events: + - sns: + arn: + Fn::Join: + - '' + - - 'arn:aws:sns:' + - Ref: 'AWS::Region' + - ':' + - Ref: 'AWS::AccountId' + - ':${self:provider.stage}-etl-2014uk16rfop001-csv' + topicName: ${self:provider.stage}-etl-2014uk16rfop001-csv + +resources: + Resources: + ParseCsvLambdaFunction: + Type: 'AWS::Lambda::Function' + Properties: + DeadLetterConfig: + TargetArn: + Fn::ImportValue: ${self:provider.stage}:ingestion-dead-letter-queue:LambdaFailureQueue + SNSTopic2014uk16rfop001CSV: + Type: AWS::SNS::Topic + Properties: + TopicName: ${self:provider.stage}-etl-2014uk16rfop001-csv + DisplayName: 2014uk16rfop001 CSV ETL + SNSTopic2014uk16rfop001CSVPolicy: + Type: AWS::SNS::TopicPolicy + Properties: + PolicyDocument: + Version: '2012-10-17' + Statement: + - Sid: Allow-IngestionManager-Publish + Action: + - sns:Publish + Effect: Allow + Resource: + Fn::Join: + - '' + - - 'arn:aws:sns:' + - Ref: 'AWS::Region' + - ':' + - Ref: 'AWS::AccountId' + - ':${self:provider.stage}-etl-2014uk16rfop001-csv' + Principal: + AWS: + Fn::Join: + - '' + - - 'arn:aws:sts::' + - Ref: 'AWS::AccountId' + - ':assumed-role/ingestion-manager-${self:provider.stage}-' + - Ref: 'AWS::Region' + - '-lambdaRole/${self:provider.stage}-ingestion-manager-onObjectCreated' + Topics: + - Ref: SNSTopic2014uk16rfop001CSV diff --git a/services/ingestion/etl/2014uk16rfop001/csv/src/events/onParseCSV.js b/services/ingestion/etl/2014uk16rfop001/csv/src/events/onParseCSV.js new file mode 100644 index 000000000..d1644586c --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/csv/src/events/onParseCSV.js @@ -0,0 +1,139 @@ +import AWS from 'aws-sdk'; // eslint-disable-line import/no-extraneous-dependencies +import parse from 'csv-parse/lib/sync'; + +// ETL utilities. +import ensureExtensions from '@eubfr/lib/etl/ensureExtensions'; +import extractMessage from '@eubfr/lib/etl/extractMessage'; +import handleError from '@eubfr/lib/etl/handleError'; + +import MessengerFactory from '@eubfr/logger-messenger/src/lib/MessengerFactory'; +import { STATUS } from '@eubfr/logger-messenger/src/lib/status'; + +import transformRecord from '../lib/transform'; + +export const handler = async (event, context) => { + const { BUCKET, REGION, STAGE } = process.env; + + if (!BUCKET || !REGION || !STAGE) { + throw new Error( + 'BUCKET, REGION and STAGE environment variables are required!' + ); + } + + const snsMessage = extractMessage(event); + const { key } = snsMessage.object; + + if (!ensureExtensions({ file: key, extensions: ['.csv'] })) { + throw new Error('CSV file expected for this ETL.'); + } + + const messenger = MessengerFactory.Create({ context }); + const s3 = new AWS.S3(); + + try { + await messenger.send({ + message: { + computed_key: key, + status_message: 'Start parsing CSV...', + status_code: STATUS.PARSING, + }, + to: ['logs'], + }); + + let projects = ''; + const recordsAgg = []; + const separator = ';'; + const mergeFields = [ + 'ERDF/ESF investment �m', + 'Location (postcode)', + '% of project funded by EU (Co-financing rate%)', + 'Total project costs �m (eligible project costs only)', + ]; + + const file = await s3 + .getObject({ Bucket: snsMessage.bucket.name, Key: key }) + .promise(); + + const csvData = file.Body.toString(); + const records = parse(csvData); + + // CSV file is not formatted well for this ETL. + // We need to selectively take information from it. + + records.shift(); // Title/Type of fund. + records.shift(); // Empty row + records.shift(); // Empty row + records.shift(); // Empty row + + const headerRow = records + .shift() + .map(el => el.trim().replace(/(\r\n|\n|\r)/gm, '')); + + records.pop(); // Notes + records.pop(); // Empty row + + // Normalize the list by replacing properties. + const mappedRecords = records.map(record => { + const mapped = {}; + + Object.keys(record).forEach((field, i) => { + mapped[headerRow[i]] = record[field]; + }); + + return mapped; + }); + + mappedRecords.forEach(record => { + const indexExisting = recordsAgg.findIndex( + needle => needle['Name of project'] === record['Name of project'] + ); + + // If not present yet. + if (indexExisting === -1) { + recordsAgg.push(record); + } + // If there's an existing object with that name, we have to update it. + else { + const existing = recordsAgg.splice(indexExisting, 1)[0]; + + mergeFields.forEach(field => { + // Concatenate old and current values for the given field. + existing[field] = `${existing[field]}${separator}${record[field]}`; + }); + + recordsAgg.push(existing); + } + }); + + recordsAgg.forEach(record => { + const data = transformRecord(record); + projects += `${JSON.stringify(data)}\n`; + }); + + // Upload the data to the harmonized storage bucket. + const params = { + Bucket: BUCKET, + Key: `${key}.ndjson`, + Body: projects, + ContentType: 'application/x-ndjson', + }; + + await s3.upload(params).promise(); + + await messenger.send({ + message: { + computed_key: key, + status_message: + 'CSV parsed successfully. Results will be uploaded to ElasticSearch soon...', + status_code: STATUS.PARSED, + }, + to: ['logs'], + }); + + return console.log('Done'); + } catch (error) { + return handleError({ messenger, key, statusCode: STATUS.ERROR }, { error }); + } +}; + +export default handler; diff --git a/services/ingestion/etl/2014uk16rfop001/csv/src/lib/transform.js b/services/ingestion/etl/2014uk16rfop001/csv/src/lib/transform.js new file mode 100644 index 000000000..b1bc2dc07 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/csv/src/lib/transform.js @@ -0,0 +1,315 @@ +// @flow + +import crypto from 'crypto'; +import numeral from 'numeral'; +import sanitizeBudgetItem from '@eubfr/lib/budget/budgetFormatter'; +import type { Project } from '@eubfr/types'; + +/* + * Transform message (2014uk16rfop001 CSV) + */ + +/** + * Preprocess `budget`. + * + * Input fields taken from the `record` are: + * + * - `Total project costs �m (eligible project costs only)` + * - `% of project funded by EU (Co-financing rate%)` + * + * @memberof 2014uk16rfop001CsvTransform + * @param {Object} record The row received from parsed file + * @returns {Budget} + */ + +const getBudget = record => { + const rates = record['% of project funded by EU (Co-financing rate%)'].split( + ';' + ); + const costs = record[ + 'Total project costs �m (eligible project costs only)' + ].split(';'); + + let rate = 0; + + rates.forEach(percentage => { + const { _value: percent } = numeral(percentage); + rate += percent; + }); + + let cost = 0; + + costs.forEach(costItem => { + const { _value: costValue } = numeral(costItem); + cost += costValue; + }); + + return { + eu_contrib: sanitizeBudgetItem({ + value: cost * rate, + currency: 'GBP', + raw: record['% of project funded by EU (Co-financing rate%)'], + }), + funding_area: [], + mmf_heading: '', + other_contrib: sanitizeBudgetItem(), + private_fund: sanitizeBudgetItem(), + public_fund: sanitizeBudgetItem(), + total_cost: sanitizeBudgetItem({ + value: cost, + currency: 'GBP', + raw: record['Total project costs �m (eligible project costs only)'], + }), + }; +}; + +/** + * Preprocess `description`. + * + * Input fields taken from the `record` are: + * + * - `Summary of project(max 100 words)` + * - `Local enterprise partnership area` + * + * @memberof 2014uk16rfop001CsvTransform + * @param {Object} record The row received from parsed file + * @returns {String} + */ + +const getDescription = record => { + const fields = [ + 'Summary of project(max 100 words)', + 'Local enterprise partnership area', + ]; + + let description = ''; + + fields.forEach(field => { + if (record[field]) { + description += `${field}: ${record[field]} \n`; + } + }); + + return description; +}; + +/** + * Generates an ID for `project_id`. + * + * Input fields taken from the `record` are: + * - `Name of project` + * + * @memberof 2014uk16rfop001CsvTransform + * @param {Object} record The row received from parsed file + * @returns {String} + */ + +const getProjectId = record => + crypto + .createHash('md5') + .update(record['Name of project']) + .digest('hex'); + +/** + * Preprocess `project_locations`. + * + * Input fields taken from the `record` are: + * + * - `Location (postcode)` + * + * @memberof 2014uk16rfop001CsvTransform + * @param {Object} record The row received from parsed file + * @returns {Array} + */ + +const getLocations = record => { + const locations = []; + + const regions = record['Location (postcode)'] + .split(';') + .filter(a => a) + .map(a => a.trim()); + + regions.forEach(region => { + locations.push({ + address: '', + centroid: null, + country_code: 'GB', + location: null, + nuts: [], + postal_code: '', + region, + town: '', + }); + }); + + return locations; +}; + +/** + * Preprocess `themes`. + * + * Input fields taken from the `record` are: + * + * - `Type and focus support (category of intervention)` + * + * @memberof 2014uk16rfop001CsvTransform + * @param {Object} record The row received from parsed file + * @returns {Array} + */ + +const getThemes = record => [ + record['Type and focus support (category of intervention)'].trim(), +]; + +/** + * Preprocess `third_parties`. + * + * Input fields taken from the `record` are: + * + * - `Recipient of funds(ERDF/ESF beneficiary)` + * + * @memberof 2014uk16rfop001CsvTransform + * @param {Object} record The row received from parsed file + * @returns {Array} + */ + +const getThirdParties = record => + record['Recipient of funds(ERDF/ESF beneficiary)'] + ? [ + { + address: '', + country: 'England', + email: '', + name: record['Recipient of funds(ERDF/ESF beneficiary)'].trim(), + phone: '', + region: '', + role: 'Beneficiary', + type: '', + website: '', + }, + ] + : []; + +/** + * Format date. + * + * @memberof 2014uk16rfop001CsvTransform + * @param {Date} date + * @returns {Date} The date formatted into an ISO 8601 date format + * + */ +const formatDate = date => { + if (!date || typeof date !== 'string') return null; + const d = date.split('-'); + if (d.length !== 2) return null; + + const [month, yy] = d; + + if (!month || !yy) return null; + + try { + return new Date(`${month} 20${yy}`).toISOString(); + } catch (e) { + return null; + } +}; + +/** + * Preprocess `timeframe`. + * + * Input fields taken from the `record` are: + * + * - `Start date` + * - `End date` + * + * @memberof 2014uk16rfop001CsvTransform + * @param {Object} record The row received from parsed file + * @returns {Timeframe} + */ + +const getTimeframe = record => { + const from = record['Start date'] || null; + const to = record['End date'] || null; + + return { + from: formatDate(from), + from_precision: 'day', + to: formatDate(to), + to_precision: 'day', + }; +}; + +/** + * Preprocess `title`. + * + * Input fields taken from the `record` are: + * + * - `Name of project` + * + * @memberof 2014uk16rfop001CsvTransform + * @param {Object} record The row received from parsed file + * @returns {String} + */ + +const getTitle = record => + record['Name of project'] ? record['Name of project'].trim() : ''; + +/** + * Preprocess `type`. + * + * Input fields taken from the `record` are: + * + * - `Type of fund` + * + * @memberof 2014uk16rfop001CsvTransform + * @param {Object} record The row received from parsed file + * @returns {Array} + */ + +const getType = record => + record['Type of fund'] ? [record['Type of fund'].trim()] : []; + +/** + * Map fields for 2014uk16rfop001 producer, CSV file types + * + * Example input data: {@link https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop001/csv/test/stubs/record.json|stub} + * + * Transform function: {@link https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop001/csv/src/lib/transform.js|implementation details} + * @name 2014uk16rfop001CsvTransform + * @param {Object} record Piece of data to transform before going to harmonized storage. + * @returns {Project} JSON matching the type fields. + */ + +export default (record: Object): Project | null => { + if (!record) return null; + + // Map the fields + return { + action: '', + budget: getBudget(record), + call_year: '', + description: getDescription(record), + ec_priorities: [], + media: [], + programme_name: '', + project_id: getProjectId(record), + project_locations: getLocations(record), + project_website: '', + complete: false, + related_links: [], + reporting_organisation: 'Member states', + results: { + available: '', + result: '', + }, + status: '', + sub_programme_name: '', + success_story: '', + themes: getThemes(record), + third_parties: getThirdParties(record), + timeframe: getTimeframe(record), + title: getTitle(record), + type: getType(record), + }; +}; diff --git a/services/ingestion/etl/2014uk16rfop001/csv/test/stubs/record.json b/services/ingestion/etl/2014uk16rfop001/csv/test/stubs/record.json new file mode 100644 index 000000000..78bf2e339 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/csv/test/stubs/record.json @@ -0,0 +1,15 @@ +{ + "Recipient of funds(ERDF/ESF beneficiary)": "NOMS CFO", + "Name of project": "CFO3", + "Type of fund": "ESF", + "Summary of project(max 100 words)": "NOMS CFO delivers services which prepare offenders to access mainstream employment, training and education. There is a focus on offenders with multiple barriers and those who fall into designated hard to reach categories. A strong focus on hard to reach groups remains at the centre of CFO delivery. Current mainstream provision is not accessible for many offenders with often limited benefits for those who have been excluded from such activity. NOMS CFO will give them the skills to engage with the mainstream. ", + "Start date": "Apr-15", + "End date": "Dec-20", + "ERDF/ESF investment �m": "�131,000,000;�8,478,280;�18,358,652;�16,267,309;�11,225,168;�11,656,581;�13,444,587;�13,591,200;�8,535,577;�13,722,646", + "Total project costs �m (eligible project costs only)": "�247,000,000;;;;;;;;;", + "% of project funded by EU (Co-financing rate%)": "53%;;;;;;;;;", + "Location (postcode)": "National;South West;North West;South East;East;London;East Midlands;West Midlands;North East;Yorkshire", + "Local enterprise partnership area": "National", + "Country": "England", + "Type and focus support (category of intervention)": "Social Inclusion" +} diff --git a/services/ingestion/etl/2014uk16rfop001/csv/test/unit/lib/__snapshots__/transform.spec.js.snap b/services/ingestion/etl/2014uk16rfop001/csv/test/unit/lib/__snapshots__/transform.spec.js.snap new file mode 100644 index 000000000..0c647023a --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/csv/test/unit/lib/__snapshots__/transform.spec.js.snap @@ -0,0 +1,183 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`2014uk16rfop001 CSV transformer Produces correct JSON output structure 1`] = ` +Object { + "action": "", + "budget": Object { + "eu_contrib": Object { + "currency": "GBP", + "raw": "53%;;;;;;;;;", + "value": 130910000, + }, + "funding_area": Array [], + "mmf_heading": "", + "other_contrib": Object { + "currency": "", + "raw": "", + "value": 0, + }, + "private_fund": Object { + "currency": "", + "raw": "", + "value": 0, + }, + "public_fund": Object { + "currency": "", + "raw": "", + "value": 0, + }, + "total_cost": Object { + "currency": "GBP", + "raw": "�247,000,000;;;;;;;;;", + "value": 247000000, + }, + }, + "call_year": "", + "complete": false, + "description": "Summary of project(max 100 words): NOMS CFO delivers services which prepare offenders to access mainstream employment, training and education. There is a focus on offenders with multiple barriers and those who fall into designated hard to reach categories. A strong focus on hard to reach groups remains at the centre of CFO delivery. Current mainstream provision is not accessible for many offenders with often limited benefits for those who have been excluded from such activity. NOMS CFO will give them the skills to engage with the mainstream. +Local enterprise partnership area: National +", + "ec_priorities": Array [], + "media": Array [], + "programme_name": "", + "project_id": "67a9d35a1b9d8c961b81138799089b78", + "project_locations": Array [ + Object { + "address": "", + "centroid": null, + "country_code": "GB", + "location": null, + "nuts": Array [], + "postal_code": "", + "region": "National", + "town": "", + }, + Object { + "address": "", + "centroid": null, + "country_code": "GB", + "location": null, + "nuts": Array [], + "postal_code": "", + "region": "South West", + "town": "", + }, + Object { + "address": "", + "centroid": null, + "country_code": "GB", + "location": null, + "nuts": Array [], + "postal_code": "", + "region": "North West", + "town": "", + }, + Object { + "address": "", + "centroid": null, + "country_code": "GB", + "location": null, + "nuts": Array [], + "postal_code": "", + "region": "South East", + "town": "", + }, + Object { + "address": "", + "centroid": null, + "country_code": "GB", + "location": null, + "nuts": Array [], + "postal_code": "", + "region": "East", + "town": "", + }, + Object { + "address": "", + "centroid": null, + "country_code": "GB", + "location": null, + "nuts": Array [], + "postal_code": "", + "region": "London", + "town": "", + }, + Object { + "address": "", + "centroid": null, + "country_code": "GB", + "location": null, + "nuts": Array [], + "postal_code": "", + "region": "East Midlands", + "town": "", + }, + Object { + "address": "", + "centroid": null, + "country_code": "GB", + "location": null, + "nuts": Array [], + "postal_code": "", + "region": "West Midlands", + "town": "", + }, + Object { + "address": "", + "centroid": null, + "country_code": "GB", + "location": null, + "nuts": Array [], + "postal_code": "", + "region": "North East", + "town": "", + }, + Object { + "address": "", + "centroid": null, + "country_code": "GB", + "location": null, + "nuts": Array [], + "postal_code": "", + "region": "Yorkshire", + "town": "", + }, + ], + "project_website": "", + "related_links": Array [], + "reporting_organisation": "Member states", + "results": Object { + "available": "", + "result": "", + }, + "status": "", + "sub_programme_name": "", + "success_story": "", + "themes": Array [ + "Social Inclusion", + ], + "third_parties": Array [ + Object { + "address": "", + "country": "England", + "email": "", + "name": "NOMS CFO", + "phone": "", + "region": "", + "role": "Beneficiary", + "type": "", + "website": "", + }, + ], + "timeframe": Object { + "from": "2015-03-31T21:00:00.000Z", + "from_precision": "day", + "to": "2020-11-30T22:00:00.000Z", + "to_precision": "day", + }, + "title": "CFO3", + "type": Array [ + "ESF", + ], +} +`; diff --git a/services/ingestion/etl/2014uk16rfop001/csv/test/unit/lib/transform.spec.js b/services/ingestion/etl/2014uk16rfop001/csv/test/unit/lib/transform.spec.js new file mode 100644 index 000000000..9522b17f0 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/csv/test/unit/lib/transform.spec.js @@ -0,0 +1,22 @@ +/** + * @jest-environment node + */ + +import mapper from '../../../src/lib/transform'; +import testRecord from '../../stubs/record.json'; + +describe('2014uk16rfop001 CSV transformer', () => { + let result = {}; + + beforeAll(() => { + result = mapper(testRecord); + }); + + test('Returns null when record is not provided', () => { + expect(mapper()).toBe(null); + }); + + test('Produces correct JSON output structure', () => { + expect(result).toMatchSnapshot(); + }); +}); diff --git a/services/ingestion/etl/2014uk16rfop001/csv/webpack.config.js b/services/ingestion/etl/2014uk16rfop001/csv/webpack.config.js new file mode 100644 index 000000000..30fd8ced7 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/csv/webpack.config.js @@ -0,0 +1,32 @@ +const slsw = require('serverless-webpack'); +const path = require('path'); + +module.exports = { + entry: slsw.lib.entries, + target: 'node', + mode: slsw.lib.webpack.isLocal ? 'development' : 'production', + optimization: { + minimize: process.env.EUBFR_ENV && process.env.EUBFR_ENV === 'prod', + }, + devtool: 'nosources-source-map', + externals: [{ 'aws-sdk': true }], + module: { + rules: [ + { + test: /\.js$/, + use: [ + { + loader: 'babel-loader', + }, + ], + include: __dirname, + exclude: /node_modules/, + }, + ], + }, + output: { + libraryTarget: 'commonjs2', + path: path.join(__dirname, '.webpack'), + filename: '[name].js', + }, +}; diff --git a/tools/eubfr-cli/lib/getServices.js b/tools/eubfr-cli/lib/getServices.js index 1279233ad..37c14a28f 100644 --- a/tools/eubfr-cli/lib/getServices.js +++ b/tools/eubfr-cli/lib/getServices.js @@ -17,6 +17,7 @@ const allServices = [ { service: 'ingestion-etl-2014tc16rfpc001-xls', exportEnv: false }, { service: 'ingestion-etl-2014tc16rftn002-xls', exportEnv: false }, { service: 'ingestion-etl-2014uk16rfop001-xls', exportEnv: false }, + { service: 'ingestion-etl-2014uk16rfop001-csv', exportEnv: false }, { service: 'ingestion-etl-bulgaria-xls', exportEnv: false }, { service: 'ingestion-etl-cordis-csv', exportEnv: false }, { service: 'ingestion-etl-devco-xls', exportEnv: false }, From f56a955c713437d8852d4197b6bd39ba1bfe0ba6 Mon Sep 17 00:00:00 2001 From: Kalin Chernev Date: Mon, 13 May 2019 11:37:49 +0300 Subject: [PATCH 04/10] Correct date formatting --- .../etl/2014uk16rfop001/csv/src/lib/transform.js | 8 +++++--- .../test/unit/lib/__snapshots__/transform.spec.js.snap | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/services/ingestion/etl/2014uk16rfop001/csv/src/lib/transform.js b/services/ingestion/etl/2014uk16rfop001/csv/src/lib/transform.js index b1bc2dc07..74ccc2570 100644 --- a/services/ingestion/etl/2014uk16rfop001/csv/src/lib/transform.js +++ b/services/ingestion/etl/2014uk16rfop001/csv/src/lib/transform.js @@ -204,12 +204,14 @@ const formatDate = date => { const d = date.split('-'); if (d.length !== 2) return null; - const [month, yy] = d; + const [m, y] = d; - if (!month || !yy) return null; + if (!m || !y) return null; + + const month = new Date(d).getMonth(); try { - return new Date(`${month} 20${yy}`).toISOString(); + return new Date(Date.UTC(`20${y}`, month)).toISOString(); } catch (e) { return null; } diff --git a/services/ingestion/etl/2014uk16rfop001/csv/test/unit/lib/__snapshots__/transform.spec.js.snap b/services/ingestion/etl/2014uk16rfop001/csv/test/unit/lib/__snapshots__/transform.spec.js.snap index 0c647023a..92ed59d52 100644 --- a/services/ingestion/etl/2014uk16rfop001/csv/test/unit/lib/__snapshots__/transform.spec.js.snap +++ b/services/ingestion/etl/2014uk16rfop001/csv/test/unit/lib/__snapshots__/transform.spec.js.snap @@ -170,9 +170,9 @@ Local enterprise partnership area: National }, ], "timeframe": Object { - "from": "2015-03-31T21:00:00.000Z", + "from": "2015-04-01T00:00:00.000Z", "from_precision": "day", - "to": "2020-11-30T22:00:00.000Z", + "to": "2020-12-01T00:00:00.000Z", "to_precision": "day", }, "title": "CFO3", From 64527927d1c1ea52debff97ee97720349e1b7d16 Mon Sep 17 00:00:00 2001 From: Kalin Chernev Date: Mon, 13 May 2019 11:56:43 +0300 Subject: [PATCH 05/10] Correct date formatting --- .../etl/2014uk16rfop001/csv/src/lib/transform.js | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/services/ingestion/etl/2014uk16rfop001/csv/src/lib/transform.js b/services/ingestion/etl/2014uk16rfop001/csv/src/lib/transform.js index 74ccc2570..98a77e80b 100644 --- a/services/ingestion/etl/2014uk16rfop001/csv/src/lib/transform.js +++ b/services/ingestion/etl/2014uk16rfop001/csv/src/lib/transform.js @@ -201,17 +201,18 @@ const getThirdParties = record => */ const formatDate = date => { if (!date || typeof date !== 'string') return null; + + // Since input format cannot be parsed by native Date() constructor directly: const d = date.split('-'); if (d.length !== 2) return null; - + // We extract information about month and year manually. const [m, y] = d; - if (!m || !y) return null; - + // Get month natively from original date string for the UTC method. const month = new Date(d).getMonth(); try { - return new Date(Date.UTC(`20${y}`, month)).toISOString(); + return new Date(Date.UTC(Number(`20${y}`), month)).toISOString(); } catch (e) { return null; } From f45372f2658f49f27565926f2a85cfa3f8789c7e Mon Sep 17 00:00:00 2001 From: Kalin Chernev Date: Mon, 13 May 2019 14:38:01 +0300 Subject: [PATCH 06/10] Add ODS --- docs/types/README.md | 1 + docs/types/etls/2014uk16rfop001-ods.md | 109 +++++++++ scripts/documentation/docs-md.js | 1 + .../2014uk16rfop001/csv/src/lib/transform.js | 4 +- .../lib/__snapshots__/transform.spec.js.snap | 4 +- .../etl/2014uk16rfop001/ods/README.md | 17 ++ .../etl/2014uk16rfop001/ods/babel.config.js | 29 +++ .../etl/2014uk16rfop001/ods/package.json | 32 +++ .../etl/2014uk16rfop001/ods/serverless.yml | 107 +++++++++ .../ods/src/events/onParseODS.js | 126 +++++++++++ .../2014uk16rfop001/ods/src/lib/transform.js | 209 ++++++++++++++++++ .../ods/test/stubs/record.json | 13 ++ .../ods/test/unit/events/onParseODS.spec.js | 20 ++ .../lib/__snapshots__/transform.spec.js.snap | 98 ++++++++ .../ods/test/unit/lib/transform.spec.js | 22 ++ .../etl/2014uk16rfop001/ods/webpack.config.js | 32 +++ tools/eubfr-cli/lib/getServices.js | 1 + 17 files changed, 821 insertions(+), 4 deletions(-) create mode 100644 docs/types/etls/2014uk16rfop001-ods.md create mode 100644 services/ingestion/etl/2014uk16rfop001/ods/README.md create mode 100644 services/ingestion/etl/2014uk16rfop001/ods/babel.config.js create mode 100644 services/ingestion/etl/2014uk16rfop001/ods/package.json create mode 100644 services/ingestion/etl/2014uk16rfop001/ods/serverless.yml create mode 100644 services/ingestion/etl/2014uk16rfop001/ods/src/events/onParseODS.js create mode 100644 services/ingestion/etl/2014uk16rfop001/ods/src/lib/transform.js create mode 100644 services/ingestion/etl/2014uk16rfop001/ods/test/stubs/record.json create mode 100644 services/ingestion/etl/2014uk16rfop001/ods/test/unit/events/onParseODS.spec.js create mode 100644 services/ingestion/etl/2014uk16rfop001/ods/test/unit/lib/__snapshots__/transform.spec.js.snap create mode 100644 services/ingestion/etl/2014uk16rfop001/ods/test/unit/lib/transform.spec.js create mode 100644 services/ingestion/etl/2014uk16rfop001/ods/webpack.config.js diff --git a/docs/types/README.md b/docs/types/README.md index 19b5498af..736117652 100644 --- a/docs/types/README.md +++ b/docs/types/README.md @@ -18,6 +18,7 @@ Here's a list of the transformations made in ETLs around the `Project` model. - [2014tc16rfpc001 - XLS](./etls/2014tc16rfpc001-xls.md) - [2014tc16rftn002 - XLS](./etls/2014tc16rftn002-xls.md) - [2014uk16rfop001 - CSV](./etls/2014uk16rfop001-csv.md) +- [2014uk16rfop001 - ODS](./etls/2014uk16rfop001-ods.md) - [bulgaria - XLS](./etls/bulgaria-xls.md) - [CORDIS - CSV](./etls/cordis-csv.md) - [DEVCO - XLS](./etls/devco-xls.md) diff --git a/docs/types/etls/2014uk16rfop001-ods.md b/docs/types/etls/2014uk16rfop001-ods.md new file mode 100644 index 000000000..335f6a0ff --- /dev/null +++ b/docs/types/etls/2014uk16rfop001-ods.md @@ -0,0 +1,109 @@ + + +## 2014uk16rfop001XlsTransform + +Map fields for 2014uk16rfop001 producer, XLS file types + +Example input data: [stub][1] + +Transform function: [implementation details][2] + +### Parameters + +- `record` **[Object][3]** Piece of data to transform before going to harmonized storage. + +Returns **Project** JSON matching the type fields. + +### getBudget + +Preprocess `budget`. + +Input fields taken from the `record` are: + +- `Aid element £` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **Budget** + +### getDescription + +Preprocess `description`. + +Input fields taken from the `record` are: + +- `Beneficiary identifier (E-claims ref)` +- `Sector NACE group level` +- `SANI reference of the aid measure` +- `Objective of the aid` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[String][4]** + +### getProjectId + +Preprocess `project_id`. + +Input fields taken from the `record` are: + +- `Objective of the aid` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[String][4]** + +### getLocations + +Preprocess `project_locations`. + +Input fields taken from the `record` are: + +- `Location of Benficiary NUTS level II (drop down)` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[Array][5]<[Location][6]>** + +### getThirdParties + +Preprocess `third_parties`. + +Input fields taken from the `record` are: + +- `Name of beneficiary` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[Array][5]<ThirdParty>** + +### getTimeframe + +Preprocess `timeframe`. + +Input fields taken from the `record` are: + +- `Date of granting` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **Timeframe** + +[1]: https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop001/ods/test/stubs/ESF/record.json +[2]: https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop001/ods/src/lib/transform/ESF/transform.js +[3]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object +[4]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String +[5]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array +[6]: https://developer.mozilla.org/docs/Web/API/Location diff --git a/scripts/documentation/docs-md.js b/scripts/documentation/docs-md.js index cc6e6d365..1159558db 100755 --- a/scripts/documentation/docs-md.js +++ b/scripts/documentation/docs-md.js @@ -21,6 +21,7 @@ const transforms = [ '2014tc16rftn002-xls', '2014uk16rfop001-xls', '2014uk16rfop001-csv', + '2014uk16rfop001-ods', 'bulgaria-xls', 'cordis-csv', 'devco-xls', diff --git a/services/ingestion/etl/2014uk16rfop001/csv/src/lib/transform.js b/services/ingestion/etl/2014uk16rfop001/csv/src/lib/transform.js index 98a77e80b..fb5c50384 100644 --- a/services/ingestion/etl/2014uk16rfop001/csv/src/lib/transform.js +++ b/services/ingestion/etl/2014uk16rfop001/csv/src/lib/transform.js @@ -237,9 +237,9 @@ const getTimeframe = record => { return { from: formatDate(from), - from_precision: 'day', + from_precision: 'month', to: formatDate(to), - to_precision: 'day', + to_precision: 'month', }; }; diff --git a/services/ingestion/etl/2014uk16rfop001/csv/test/unit/lib/__snapshots__/transform.spec.js.snap b/services/ingestion/etl/2014uk16rfop001/csv/test/unit/lib/__snapshots__/transform.spec.js.snap index 92ed59d52..08ceb9cbe 100644 --- a/services/ingestion/etl/2014uk16rfop001/csv/test/unit/lib/__snapshots__/transform.spec.js.snap +++ b/services/ingestion/etl/2014uk16rfop001/csv/test/unit/lib/__snapshots__/transform.spec.js.snap @@ -171,9 +171,9 @@ Local enterprise partnership area: National ], "timeframe": Object { "from": "2015-04-01T00:00:00.000Z", - "from_precision": "day", + "from_precision": "month", "to": "2020-12-01T00:00:00.000Z", - "to_precision": "day", + "to_precision": "month", }, "title": "CFO3", "type": Array [ diff --git a/services/ingestion/etl/2014uk16rfop001/ods/README.md b/services/ingestion/etl/2014uk16rfop001/ods/README.md new file mode 100644 index 000000000..7b3a3ad3f --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/ods/README.md @@ -0,0 +1,17 @@ +# 2014uk16rfop001 XLS ETL mapping rules + +Model to compare with is available at: https://ec-europa.github.io/eubfr-data-lake/ + +| Field | Target | +| ------------------------------------------------ | ----------------------- | +| Name of beneficiary | third_parties | +| Beneficiary identifier (E-claims ref) | description | +| Type of enterprise (drop down) | | +| Location of Benficiary NUTS level II (drop down) | project_locations | +| Sector NACE group level | description | +| Aid element £ | budget.total_cost | +| Aid instrument (drop down) | | +| Date of granting | timeframe.from | +| Objective of the aid | description, project_id | +| Granting authority | | +| SANI reference of the aid measure | description | diff --git a/services/ingestion/etl/2014uk16rfop001/ods/babel.config.js b/services/ingestion/etl/2014uk16rfop001/ods/babel.config.js new file mode 100644 index 000000000..0397ff2b1 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/ods/babel.config.js @@ -0,0 +1,29 @@ +module.exports = { + presets: [ + '@babel/preset-flow', + [ + '@babel/preset-env', + { + targets: { + node: '8.10', + }, + modules: false, + loose: true, + }, + ], + ], + env: { + test: { + presets: [ + [ + '@babel/preset-env', + { + targets: { + node: '8.10', + }, + }, + ], + ], + }, + }, +}; diff --git a/services/ingestion/etl/2014uk16rfop001/ods/package.json b/services/ingestion/etl/2014uk16rfop001/ods/package.json new file mode 100644 index 000000000..5a55b30e9 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/ods/package.json @@ -0,0 +1,32 @@ +{ + "private": true, + "name": "@eubfr/ingestion-etl-2014uk16rfop001-ods", + "version": "0.6.0", + "scripts": { + "deploy": "sls deploy -v", + "test:unit": "jest --testPathPattern=unit" + }, + "dependencies": { + "@eubfr/lib": "^0.6.0", + "@eubfr/logger-messenger": "^0.6.0", + "xlsx": "0.14.2" + }, + "devDependencies": { + "@babel/core": "7.4.3", + "@babel/preset-env": "7.4.3", + "@babel/preset-flow": "7.0.0", + "@eubfr/types": "^0.6.0", + "aws-sdk": "2.434.0", + "babel-jest": "24.7.0", + "babel-loader": "8.0.5", + "jest": "24.7.0", + "serverless": "1.40.0", + "serverless-webpack": "5.2.0", + "webpack": "4.29.6" + }, + "jest": { + "transform": { + "^.+\\.js$": "babel-jest" + } + } +} diff --git a/services/ingestion/etl/2014uk16rfop001/ods/serverless.yml b/services/ingestion/etl/2014uk16rfop001/ods/serverless.yml new file mode 100644 index 000000000..c6ea9443f --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/ods/serverless.yml @@ -0,0 +1,107 @@ +service: ingestion-etl-2014uk16rfop001-ods + +plugins: + - serverless-webpack + +custom: + webpack: + webpackConfig: ./webpack.config.js + includeModules: + forceExclude: + - aws-sdk + packager: yarn + eubfrEnvironment: ${opt:eubfr_env, file(../../../../../config.json):eubfr_env, env:EUBFR_ENV, 'dev'} + bucketName: ${file(../../../../../resources/harmonized-storage/serverless.yml):custom.bucketName} + +package: + individually: true + +provider: + name: aws + runtime: nodejs8.10 + timeout: 180 + stage: ${opt:stage, file(../../../../../config.json):stage, env:EUBFR_STAGE, 'dev'} + region: ${opt:region, file(../../../../../config.json):region, env:EUBFR_AWS_REGION, 'eu-central-1'} + deploymentBucket: + name: eubfr-${self:custom.eubfrEnvironment}-deploy + stackTags: + ENV: ${self:custom.eubfrEnvironment} + iamRoleStatements: + - Effect: 'Allow' + Action: + - 's3:PutObject' + Resource: + Fn::Join: + - '' + - - 'arn:aws:s3:::' + - ${self:custom.bucketName} + - '/*' + # Allow queueing messages to the DLQ https://docs.aws.amazon.com/lambda/latest/dg/dlq.html + - Effect: 'Allow' + Action: + - sqs:SendMessage + Resource: '*' + +functions: + parseOds: + handler: src/events/onParseODS.handler + name: ${self:provider.stage}-${self:service}-parseOds + memorySize: 1024 + environment: + BUCKET: ${self:custom.bucketName} + REGION: ${self:provider.region} + STAGE: ${self:provider.stage} + events: + - sns: + arn: + Fn::Join: + - '' + - - 'arn:aws:sns:' + - Ref: 'AWS::Region' + - ':' + - Ref: 'AWS::AccountId' + - ':${self:provider.stage}-etl-2014uk16rfop001-ods' + topicName: ${self:provider.stage}-etl-2014uk16rfop001-ods + +resources: + Resources: + ParseOdsLambdaFunction: + Type: 'AWS::Lambda::Function' + Properties: + DeadLetterConfig: + TargetArn: + Fn::ImportValue: ${self:provider.stage}:ingestion-dead-letter-queue:LambdaFailureQueue + SNSTopic2014uk16rfop001ODS: + Type: AWS::SNS::Topic + Properties: + TopicName: ${self:provider.stage}-etl-2014uk16rfop001-ods + DisplayName: 2014uk16rfop001 ODS ETL + SNSTopic2014uk16rfop001ODSPolicy: + Type: AWS::SNS::TopicPolicy + Properties: + PolicyDocument: + Version: '2012-10-17' + Statement: + - Sid: Allow-IngestionManager-Publish + Action: + - sns:Publish + Effect: Allow + Resource: + Fn::Join: + - '' + - - 'arn:aws:sns:' + - Ref: 'AWS::Region' + - ':' + - Ref: 'AWS::AccountId' + - ':${self:provider.stage}-etl-2014uk16rfop001-*' + Principal: + AWS: + Fn::Join: + - '' + - - 'arn:aws:sts::' + - Ref: 'AWS::AccountId' + - ':assumed-role/ingestion-manager-${self:provider.stage}-' + - Ref: 'AWS::Region' + - '-lambdaRole/${self:provider.stage}-ingestion-manager-onObjectCreated' + Topics: + - Ref: SNSTopic2014uk16rfop001ODS diff --git a/services/ingestion/etl/2014uk16rfop001/ods/src/events/onParseODS.js b/services/ingestion/etl/2014uk16rfop001/ods/src/events/onParseODS.js new file mode 100644 index 000000000..25bb2b929 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/ods/src/events/onParseODS.js @@ -0,0 +1,126 @@ +import AWS from 'aws-sdk'; // eslint-disable-line import/no-extraneous-dependencies +import XLSX from 'xlsx'; + +// ETL utilities. +import ensureExtensions from '@eubfr/lib/etl/ensureExtensions'; +import extractMessage from '@eubfr/lib/etl/extractMessage'; +import handleError from '@eubfr/lib/etl/handleError'; + +import MessengerFactory from '@eubfr/logger-messenger/src/lib/MessengerFactory'; +import { STATUS } from '@eubfr/logger-messenger/src/lib/status'; + +import transformRecord from '../lib/transform'; + +export const handler = async (event, context) => { + const { BUCKET, REGION, STAGE } = process.env; + + if (!BUCKET || !REGION || !STAGE) { + throw new Error( + 'BUCKET, REGION and STAGE environment variables are required!' + ); + } + + try { + const snsMessage = extractMessage(event); + const { key } = snsMessage.object; + + if (!ensureExtensions({ file: key, extensions: ['.ods'] })) { + throw new Error('ODS file expected for this ETL.'); + } + + const messenger = MessengerFactory.Create({ context }); + const s3 = new AWS.S3(); + + await messenger.send({ + message: { + computed_key: key, + status_message: 'Start parsing ODS...', + status_code: STATUS.PARSING, + }, + to: ['logs'], + }); + + // Get file + const readStream = s3 + .getObject({ Bucket: snsMessage.bucket.name, Key: key }) + .createReadStream(); + + return new Promise((resolve, reject) => { + // Put data in buffer + const buffers = []; + readStream.on('data', data => { + buffers.push(data); + }); + + readStream.on('error', async e => + handleError( + { messenger, key, statusCode: STATUS.ERROR }, + { error: e, callback: reject } + ) + ); + + // Manage data + readStream.on('end', async () => { + let dataString = ''; + + // Parse file + const buffer = Buffer.concat(buffers); + const workbook = XLSX.read(buffer, { + cellText: false, + cellDates: true, + }); + const sheetNameList = workbook.SheetNames; + const parsedRows = XLSX.utils.sheet_to_json( + workbook.Sheets[sheetNameList[0]] + ); + + parsedRows.shift(); + const columnsMap = parsedRows.shift(); + + const improvedData = parsedRows.map(row => { + const improvedRow = {}; + + Object.keys(row).forEach(columnKey => { + if (columnsMap[columnKey]) { + const columnName = columnsMap[columnKey].trim(); + improvedRow[columnName] = row[columnKey]; + } + }); + + return improvedRow; + }); + + improvedData.forEach(record => { + const data = transformRecord(record); + dataString += `${JSON.stringify(data)}\n`; + }); + + // Load data + const params = { + Bucket: BUCKET, + Key: `${key}.ndjson`, + Body: dataString, + ContentType: 'application/x-ndjson', + }; + + await s3.upload(params).promise(); + + await messenger.send({ + message: { + computed_key: key, + status_message: + 'ODS parsed successfully. Results will be uploaded to ElasticSearch soon...', + status_code: STATUS.PARSED, + }, + to: ['logs'], + }); + + return resolve('ODS parsed successfully'); + }); + }); + } catch (e) { + throw e; + } +}; + +export default handler; diff --git a/services/ingestion/etl/2014uk16rfop001/ods/src/lib/transform.js b/services/ingestion/etl/2014uk16rfop001/ods/src/lib/transform.js new file mode 100644 index 000000000..76c28d13e --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/ods/src/lib/transform.js @@ -0,0 +1,209 @@ +// @flow + +import crypto from 'crypto'; +import type { Project } from '@eubfr/types'; +import sanitizeBudgetItem from '@eubfr/lib/budget/budgetFormatter'; + +/** + * Preprocess `budget`. + * + * Input fields taken from the `record` are: + * - `Aid element £` + * + * @memberof 2014uk16rfop001XlsTransform + * @param {Object} record The row received from parsed file + * @returns {Budget} + */ + +const getBudget = record => ({ + total_cost: sanitizeBudgetItem({ + value: record['Aid element £'], + currency: 'GBP', + raw: record['Aid element £'], + }), + eu_contrib: sanitizeBudgetItem(), + private_fund: sanitizeBudgetItem(), + public_fund: sanitizeBudgetItem(), + other_contrib: sanitizeBudgetItem(), + funding_area: [], + mmf_heading: '', +}); + +/** + * Preprocess `description`. + * + * Input fields taken from the `record` are: + * - `Beneficiary identifier (E-claims ref)` + * - `Sector NACE group level` + * - `SANI reference of the aid measure` + * - `Objective of the aid` + * + * @memberof 2014uk16rfop001XlsTransform + * @param {Object} record The row received from parsed file + * @returns {String} + */ + +const getDescription = record => { + let description = ''; + + const fields = [ + 'Beneficiary identifier (E-claims ref)', + 'Sector NACE group level', + 'SANI reference of the aid measure', + 'Objective of the aid', + ]; + + fields.forEach(descriptionField => { + description += `${descriptionField}: ${record[descriptionField]} \n`; + }); + + return description; +}; + +/** + * Preprocess `project_id`. + * + * Input fields taken from the `record` are: + * - `Objective of the aid` + * + * @memberof 2014uk16rfop001XlsTransform + * @param {Object} record The row received from parsed file + * @returns {String} + */ + +const getProjectId = record => + record['Objective of the aid'] + ? crypto + .createHash('md5') + .update(record['Objective of the aid']) + .digest('hex') + : ''; + +/** + * Preprocess `project_locations`. + * + * Input fields taken from the `record` are: + * - `Location of Benficiary NUTS level II (drop down)` + * + * @memberof 2014uk16rfop001XlsTransform + * @param {Object} record The row received from parsed file + * @returns {Array} + */ + +const getLocations = record => [ + { + address: '', + centroid: null, + country_code: 'GB', + location: null, + nuts: record['Location of Benficiary NUTS level II (drop down)'] + ? [ + { + code: record['Location of Benficiary NUTS level II (drop down)'], + name: '', + level: 2, + year: null, + }, + ] + : [], + postal_code: '', + region: '', + town: '', + }, +]; + +/** + * Preprocess `third_parties`. + * + * Input fields taken from the `record` are: + * - `Name of beneficiary` + * + * @memberof 2014uk16rfop001XlsTransform + * @param {Object} record The row received from parsed file + * @returns {Array} + */ + +const getThirdParties = record => + record['Name of beneficiary'] + ? [ + { + address: '', + country: 'GB', + email: '', + name: record['Name of beneficiary'] + ? record['Name of beneficiary'].trim() + : '', + phone: '', + region: '', + role: 'Beneficiary', + type: '', + website: '', + }, + ] + : []; + +/** + * Preprocess `timeframe`. + * + * Input fields taken from the `record` are: + * - `Date of granting` + * + * @memberof 2014uk16rfop001XlsTransform + * @param {Object} record The row received from parsed file + * @returns {Timeframe} + */ + +const getTimeframe = record => { + const from = record['Date of granting'] || null; + + return { + from, + from_precision: 'day', + to: null, + to_precision: 'day', + }; +}; + +/** + * Map fields for 2014uk16rfop001 producer, XLS file types + * + * Example input data: {@link https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop001/ods/test/stubs/ESF/record.json|stub} + * + * Transform function: {@link https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop001/ods/src/lib/transform/ESF/transform.js|implementation details} + * + * @name 2014uk16rfop001XlsTransform + * @param {Object} record Piece of data to transform before going to harmonized storage. + * @returns {Project} JSON matching the type fields. + */ +export default (record: Object): Project | null => { + if (!record) return null; + + // Map the fields + return { + action: '', + budget: getBudget(record), + call_year: '', + description: getDescription(record), + ec_priorities: [], + media: [], + programme_name: '', + project_id: getProjectId(record), + project_locations: getLocations(record), + project_website: '', + complete: false, + related_links: [], + reporting_organisation: 'Member states', + results: { + available: '', + result: '', + }, + status: '', + sub_programme_name: '', + success_story: '', + themes: [], + third_parties: getThirdParties(record), + timeframe: getTimeframe(record), + title: 'European Regional Development Fund State Aid', + type: [], + }; +}; diff --git a/services/ingestion/etl/2014uk16rfop001/ods/test/stubs/record.json b/services/ingestion/etl/2014uk16rfop001/ods/test/stubs/record.json new file mode 100644 index 000000000..2de0bcb00 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/ods/test/stubs/record.json @@ -0,0 +1,13 @@ +{ + "Name of beneficiary": "CETO Wave Energy UK Ltd", + "Beneficiary identifier (E-claims ref)": "05R16P00351", + "Type of enterprise (drop down)": "SME", + "Location of Benficiary NUTS level II (drop down)": "Cornwall and Isles of Scilly", + "Sector NACE group level": "M71.20 Technical testing and analysis", + "Aid element £": 3636525, + "Aid instrument (drop down)": "Grant/Interest rate subsidy", + "Date of granting": "2016-11-21T22:00:00.000Z", + "Objective of the aid": "Fundamental research (Art. 25(2)(a))", + "Granting authority": " DCLG", + "SANI reference of the aid measure": "SA 39161" +} diff --git a/services/ingestion/etl/2014uk16rfop001/ods/test/unit/events/onParseODS.spec.js b/services/ingestion/etl/2014uk16rfop001/ods/test/unit/events/onParseODS.spec.js new file mode 100644 index 000000000..81e55582c --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/ods/test/unit/events/onParseODS.spec.js @@ -0,0 +1,20 @@ +/** + * @jest-environment node + */ + +import onParseODS from '../../../src/events/onParseODS'; + +describe(`Function onParseODS in "@eubfr/ingestion-etl-2014uk16rfop001-ods"`, () => { + test('The function requires BUCKET, REGION and STAGE environment variables', async () => { + const event = {}; + const context = {}; + + try { + await onParseODS(event, context); + } catch (error) { + expect(error.message).toEqual( + 'BUCKET, REGION and STAGE environment variables are required!' + ); + } + }); +}); diff --git a/services/ingestion/etl/2014uk16rfop001/ods/test/unit/lib/__snapshots__/transform.spec.js.snap b/services/ingestion/etl/2014uk16rfop001/ods/test/unit/lib/__snapshots__/transform.spec.js.snap new file mode 100644 index 000000000..49a2047b1 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/ods/test/unit/lib/__snapshots__/transform.spec.js.snap @@ -0,0 +1,98 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`2014uk16rfop001 ODS transformer Produces correct JSON output structure 1`] = ` +Object { + "action": "", + "budget": Object { + "eu_contrib": Object { + "currency": "", + "raw": "", + "value": 0, + }, + "funding_area": Array [], + "mmf_heading": "", + "other_contrib": Object { + "currency": "", + "raw": "", + "value": 0, + }, + "private_fund": Object { + "currency": "", + "raw": "", + "value": 0, + }, + "public_fund": Object { + "currency": "", + "raw": "", + "value": 0, + }, + "total_cost": Object { + "currency": "GBP", + "raw": 3636525, + "value": 3636525, + }, + }, + "call_year": "", + "complete": false, + "description": "Beneficiary identifier (E-claims ref): 05R16P00351 +Sector NACE group level: M71.20 Technical testing and analysis +SANI reference of the aid measure: SA 39161 +Objective of the aid: Fundamental research (Art. 25(2)(a)) +", + "ec_priorities": Array [], + "media": Array [], + "programme_name": "", + "project_id": "3d3de125a4aa564b78a2979649ed587a", + "project_locations": Array [ + Object { + "address": "", + "centroid": null, + "country_code": "GB", + "location": null, + "nuts": Array [ + Object { + "code": "Cornwall and Isles of Scilly", + "level": 2, + "name": "", + "year": null, + }, + ], + "postal_code": "", + "region": "", + "town": "", + }, + ], + "project_website": "", + "related_links": Array [], + "reporting_organisation": "Member states", + "results": Object { + "available": "", + "result": "", + }, + "status": "", + "sub_programme_name": "", + "success_story": "", + "themes": Array [], + "third_parties": Array [ + Object { + "address": "", + "country": "GB", + "email": "", + "name": "CETO Wave Energy UK Ltd", + "phone": "", + "region": "", + "role": "Beneficiary", + "type": "", + "website": "", + }, + ], + "timeframe": Object { + "from": "2016-11-21T22:00:00.000Z", + "from_precision": "day", + "to": null, + "to_precision": "day", + }, + "title": "European Regional Development Fund State Aid", + "type": Array [], +} +`; diff --git a/services/ingestion/etl/2014uk16rfop001/ods/test/unit/lib/transform.spec.js b/services/ingestion/etl/2014uk16rfop001/ods/test/unit/lib/transform.spec.js new file mode 100644 index 000000000..fd159aeb0 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/ods/test/unit/lib/transform.spec.js @@ -0,0 +1,22 @@ +/** + * @jest-environment node + */ + +import mapper from '../../../src/lib/transform'; +import testRecord from '../../stubs/record.json'; + +describe('2014uk16rfop001 ODS transformer', () => { + let result = {}; + + beforeAll(() => { + result = mapper(testRecord); + }); + + test('Returns null when record is not provided', () => { + expect(mapper()).toBe(null); + }); + + test('Produces correct JSON output structure', () => { + expect(result).toMatchSnapshot(); + }); +}); diff --git a/services/ingestion/etl/2014uk16rfop001/ods/webpack.config.js b/services/ingestion/etl/2014uk16rfop001/ods/webpack.config.js new file mode 100644 index 000000000..30fd8ced7 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop001/ods/webpack.config.js @@ -0,0 +1,32 @@ +const slsw = require('serverless-webpack'); +const path = require('path'); + +module.exports = { + entry: slsw.lib.entries, + target: 'node', + mode: slsw.lib.webpack.isLocal ? 'development' : 'production', + optimization: { + minimize: process.env.EUBFR_ENV && process.env.EUBFR_ENV === 'prod', + }, + devtool: 'nosources-source-map', + externals: [{ 'aws-sdk': true }], + module: { + rules: [ + { + test: /\.js$/, + use: [ + { + loader: 'babel-loader', + }, + ], + include: __dirname, + exclude: /node_modules/, + }, + ], + }, + output: { + libraryTarget: 'commonjs2', + path: path.join(__dirname, '.webpack'), + filename: '[name].js', + }, +}; diff --git a/tools/eubfr-cli/lib/getServices.js b/tools/eubfr-cli/lib/getServices.js index 37c14a28f..ae307d47f 100644 --- a/tools/eubfr-cli/lib/getServices.js +++ b/tools/eubfr-cli/lib/getServices.js @@ -18,6 +18,7 @@ const allServices = [ { service: 'ingestion-etl-2014tc16rftn002-xls', exportEnv: false }, { service: 'ingestion-etl-2014uk16rfop001-xls', exportEnv: false }, { service: 'ingestion-etl-2014uk16rfop001-csv', exportEnv: false }, + { service: 'ingestion-etl-2014uk16rfop001-ods', exportEnv: false }, { service: 'ingestion-etl-bulgaria-xls', exportEnv: false }, { service: 'ingestion-etl-cordis-csv', exportEnv: false }, { service: 'ingestion-etl-devco-xls', exportEnv: false }, From e1476781070ea8b8949eebfaf009f1a3ca28d149 Mon Sep 17 00:00:00 2001 From: Kalin Chernev Date: Mon, 13 May 2019 15:35:06 +0300 Subject: [PATCH 07/10] Improve documentation a bit --- docs/types/README.md | 1 + docs/types/etls/2014uk16rfop001-xls.md | 4 ++-- scripts/documentation/docs-md.js | 4 ++-- .../2014uk16rfop001/xls/src/lib/transform/ESF/transform.js | 2 +- .../2014uk16rfop001/xls/src/lib/transform/ESIF/transform.js | 2 +- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/types/README.md b/docs/types/README.md index 736117652..8289ade64 100644 --- a/docs/types/README.md +++ b/docs/types/README.md @@ -19,6 +19,7 @@ Here's a list of the transformations made in ETLs around the `Project` model. - [2014tc16rftn002 - XLS](./etls/2014tc16rftn002-xls.md) - [2014uk16rfop001 - CSV](./etls/2014uk16rfop001-csv.md) - [2014uk16rfop001 - ODS](./etls/2014uk16rfop001-ods.md) +- [2014uk16rfop001 - XLS](./etls/2014uk16rfop001-xls.md) - [bulgaria - XLS](./etls/bulgaria-xls.md) - [CORDIS - CSV](./etls/cordis-csv.md) - [DEVCO - XLS](./etls/devco-xls.md) diff --git a/docs/types/etls/2014uk16rfop001-xls.md b/docs/types/etls/2014uk16rfop001-xls.md index ff68679c2..0350c996b 100644 --- a/docs/types/etls/2014uk16rfop001-xls.md +++ b/docs/types/etls/2014uk16rfop001-xls.md @@ -2,7 +2,7 @@ ## 2014uk16rfop001XlsTransform -Map fields for 2014uk16rfop001 producer, XLS file types +Map fields for 2014uk16rfop001 producer, XLS file types, ESF funding type. Example input data: [stub][1] @@ -263,7 +263,7 @@ Returns **[String][4]** The ISO 3166-1 country code ## 2014uk16rfop001XlsTransform -Map fields for 2014uk16rfop001 producer, XLS file types +Map fields for 2014uk16rfop001 producer, XLS file types, ESIF funding type. Example input data: [stub][7] diff --git a/scripts/documentation/docs-md.js b/scripts/documentation/docs-md.js index 1159558db..969f3b463 100755 --- a/scripts/documentation/docs-md.js +++ b/scripts/documentation/docs-md.js @@ -38,10 +38,10 @@ const transforms = [ ]; transforms.forEach(transform => { - const etl = transform.split('-'); + const [name, format] = transform.split('-'); documentation - .build(`**/etl/${etl[0]}/${etl[1]}/**/transform.js`, {}) + .build(`**/etl/${name}/${format}/**/transform.js`, {}) .then(documentation.formats.md) .then(output => { fs.writeFileSync( diff --git a/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESF/transform.js b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESF/transform.js index 18829ddba..525f940f0 100644 --- a/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESF/transform.js +++ b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESF/transform.js @@ -224,7 +224,7 @@ const getTitle = record => record['Operation Name'] ? record['Operation Name'].trim() : ''; /** - * Map fields for 2014uk16rfop001 producer, XLS file types + * Map fields for 2014uk16rfop001 producer, XLS file types, ESF funding type. * * Example input data: {@link https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop001/xls/test/stubs/ESF/record.json|stub} * diff --git a/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/transform.js b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/transform.js index 3a9982262..0e405f8ab 100644 --- a/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/transform.js +++ b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/transform.js @@ -218,7 +218,7 @@ const getTitle = record => record['Name of Project'] ? record['Name of Project'].trim() : ''; /** - * Map fields for 2014uk16rfop001 producer, XLS file types + * Map fields for 2014uk16rfop001 producer, XLS file types, ESIF funding type. * * Example input data: {@link https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop001/xls/test/stubs/ESIF/record.json|stub} * From f274ed1e1f65765d32b7d290353c7ecd4084b4da Mon Sep 17 00:00:00 2001 From: Kalin Chernev Date: Mon, 20 May 2019 16:13:27 +0300 Subject: [PATCH 08/10] Update --- services/ingestion/etl/2014uk16rfop001/csv/package.json | 6 +++--- services/ingestion/etl/2014uk16rfop001/ods/package.json | 6 +++--- services/ingestion/etl/2014uk16rfop001/xls/package.json | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/services/ingestion/etl/2014uk16rfop001/csv/package.json b/services/ingestion/etl/2014uk16rfop001/csv/package.json index 25dcf1bd8..b842dcb29 100644 --- a/services/ingestion/etl/2014uk16rfop001/csv/package.json +++ b/services/ingestion/etl/2014uk16rfop001/csv/package.json @@ -7,8 +7,8 @@ "test:unit": "jest --testPathPattern=unit" }, "dependencies": { - "@eubfr/lib": "^0.6.0", - "@eubfr/logger-messenger": "^0.6.0", + "@eubfr/lib": "^0.7.0", + "@eubfr/logger-messenger": "^0.7.0", "csv-parse": "4.3.4", "numeral": "2.0.6" }, @@ -16,7 +16,7 @@ "@babel/core": "7.4.3", "@babel/preset-env": "7.4.3", "@babel/preset-flow": "7.0.0", - "@eubfr/types": "^0.6.0", + "@eubfr/types": "^0.7.0", "aws-sdk": "2.434.0", "babel-jest": "24.7.0", "babel-loader": "8.0.5", diff --git a/services/ingestion/etl/2014uk16rfop001/ods/package.json b/services/ingestion/etl/2014uk16rfop001/ods/package.json index 5a55b30e9..745e79654 100644 --- a/services/ingestion/etl/2014uk16rfop001/ods/package.json +++ b/services/ingestion/etl/2014uk16rfop001/ods/package.json @@ -7,15 +7,15 @@ "test:unit": "jest --testPathPattern=unit" }, "dependencies": { - "@eubfr/lib": "^0.6.0", - "@eubfr/logger-messenger": "^0.6.0", + "@eubfr/lib": "^0.7.0", + "@eubfr/logger-messenger": "^0.7.0", "xlsx": "0.14.2" }, "devDependencies": { "@babel/core": "7.4.3", "@babel/preset-env": "7.4.3", "@babel/preset-flow": "7.0.0", - "@eubfr/types": "^0.6.0", + "@eubfr/types": "^0.7.0", "aws-sdk": "2.434.0", "babel-jest": "24.7.0", "babel-loader": "8.0.5", diff --git a/services/ingestion/etl/2014uk16rfop001/xls/package.json b/services/ingestion/etl/2014uk16rfop001/xls/package.json index c2e7f7316..45d11d581 100644 --- a/services/ingestion/etl/2014uk16rfop001/xls/package.json +++ b/services/ingestion/etl/2014uk16rfop001/xls/package.json @@ -7,8 +7,8 @@ "test:unit": "jest --testPathPattern=unit" }, "dependencies": { - "@eubfr/lib": "^0.6.0", - "@eubfr/logger-messenger": "^0.6.0", + "@eubfr/lib": "^0.7.0", + "@eubfr/logger-messenger": "^0.7.0", "i18n-iso-countries": "3.7.8", "xlsx": "0.14.2" }, @@ -16,7 +16,7 @@ "@babel/core": "7.4.3", "@babel/preset-env": "7.4.3", "@babel/preset-flow": "7.0.0", - "@eubfr/types": "^0.6.0", + "@eubfr/types": "^0.7.0", "aws-sdk": "2.434.0", "babel-jest": "24.7.0", "babel-loader": "8.0.5", From 8f5b2cba364e7ff136bcddf45fa5292084dc0aff Mon Sep 17 00:00:00 2001 From: Kalin Chernev Date: Wed, 22 May 2019 14:32:45 +0300 Subject: [PATCH 09/10] Version bump --- services/ingestion/etl/2014uk16rfop001/xls/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/ingestion/etl/2014uk16rfop001/xls/package.json b/services/ingestion/etl/2014uk16rfop001/xls/package.json index 45d11d581..f89c97772 100644 --- a/services/ingestion/etl/2014uk16rfop001/xls/package.json +++ b/services/ingestion/etl/2014uk16rfop001/xls/package.json @@ -1,7 +1,7 @@ { "private": true, "name": "@eubfr/ingestion-etl-2014uk16rfop001-xls", - "version": "0.6.0", + "version": "0.7.0", "scripts": { "deploy": "sls deploy -v", "test:unit": "jest --testPathPattern=unit" From 1b9ce9509649db797f5bb72c168a2f603fedd042 Mon Sep 17 00:00:00 2001 From: Kalin Chernev Date: Wed, 22 May 2019 15:18:52 +0300 Subject: [PATCH 10/10] Correct path --- .../etl/2014uk16rfop001/xls/src/lib/transform/ESF/transform.js | 2 +- .../etl/2014uk16rfop001/xls/src/lib/transform/ESIF/transform.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESF/transform.js b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESF/transform.js index 525f940f0..40ec28499 100644 --- a/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESF/transform.js +++ b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESF/transform.js @@ -2,7 +2,7 @@ import crypto from 'crypto'; import type { Project } from '@eubfr/types'; -import getCountryCode from '@eubfr/lib/getCountryCode'; +import getCountryCode from '@eubfr/lib/location/getCountryCode'; import sanitizeBudgetItem from '@eubfr/lib/budget/budgetFormatter'; /** diff --git a/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/transform.js b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/transform.js index 0e405f8ab..e94c6ea9d 100644 --- a/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/transform.js +++ b/services/ingestion/etl/2014uk16rfop001/xls/src/lib/transform/ESIF/transform.js @@ -3,7 +3,7 @@ import crypto from 'crypto'; import countries from 'i18n-iso-countries'; import type { Project } from '@eubfr/types'; -import getCountryCode from '@eubfr/lib/getCountryCode'; +import getCountryCode from '@eubfr/lib/location/getCountryCode'; import sanitizeBudgetItem from '@eubfr/lib/budget/budgetFormatter'; /**