Skip to content

Commit f7a2243

Browse files
feat: add taxonomy transform command [GROOT-1496] (#2797)
* feat: add taxonomy transform command [GROOT-1506] WIP * fix: type changes * feat: introduce class with setters for Concept * feat: add support for concept schemes, add example * feat: clean up * feat: add other csv example * feat: add integration tests * fix: update snapshot * fix: small cleean up * fix: remove space id from command requirements * fix: fix column references in example test * Update lib/cmds/organization_cmds/utils/taxonomy.ts Co-authored-by: Jared Jolton <[email protected]> * feat: update command params, add tests --------- Co-authored-by: Jared Jolton <[email protected]>
1 parent d8dc02c commit f7a2243

File tree

14 files changed

+1221
-0
lines changed

14 files changed

+1221
-0
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -192,3 +192,5 @@ contentful-import-error-log-*.json
192192

193193
.idea
194194
reports
195+
196+
data/*
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
animalia,animalia,A taxonomy focused on the categorization of various animals and their classifications.,,,,,,,,,,
2+
mammals,,,Mammals,mammals,,,,,,,,
3+
primates,,,,,Primates,primates,,,,,,
4+
felidae,,,,,Felidae (Cats),felidae,,,,,,
5+
canidae,,,,,Canidae (Dogs),canidae,,,,,,
6+
rodentia,,,,,Rodentia (Rodents),rodentia,,,,,,
7+
aves,,,Birds,aves,,,,,,,,
8+
passeriformes,,,,,Passeriformes (Perching Birds),passeriformes,,,,,,
9+
raptors,,,,,Raptors (Birds of Prey),raptors,,,,,,
10+
waterfowl,,,,,Waterfowl,waterfowl,,,,,,
11+
reptilia,,,Reptiles,reptilia,,,,,,,,
12+
squamata,,,,,Squamata (Lizards and Snakes),squamata,,,,,,
13+
testudines,,,,,Testudines (Turtles),testudines,,,,,,
14+
crocodylia,,,,,Crocodylia (Crocodiles),crocodylia,,,,,,
15+
amphibia,,,Amphibians,amphibia,,,,,,,,
16+
anura,,,,,Anura (Frogs and Toads),anura,,,,,,
17+
caudata,,,,,Caudata (Salamanders),caudata,,,,,,
18+
gymnophiona,,,,,Gymnophiona (Caecilians),gymnophiona,,,,,,
19+
pisces,,,Fish,fish,,,,,,,,
20+
chondrichthyes,,,,,Chondrichthyes (Cartilaginous Fish),chondrichthyes,,,,,,
21+
osteichthyes,,,,,Osteichthyes (Bony Fish),osteichthyes,,,,,,
22+
insecta,,,Insects,insecta,,,,,,,,
23+
lepidoptera,,,,,Lepidoptera (Butterflies and Moths),lepidoptera,,,,,,
24+
coleoptera,,,,,Coleoptera (Beetles),coleoptera,,,,,,
25+
diptera,,,,,Diptera (Flies),diptera,,,,,,
26+
hymenoptera,,,,,Hymenoptera (Bees Wasps Ants),hymenoptera,,,,,,
27+
arachnida,,,Arachnids,arachnida,,,,,,,,
28+
araneae,,,,,Araneae (Spiders),araneae,,,,,,
29+
scorpiones,,,,,Scorpiones (Scorpions),scorpiones,,,,,,
30+
opiliones,,,,,Opiliones (Harvestmen),opiliones,,,,,,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
module.exports = async function ({ csv, taxonomy, fs }) {
2+
const findParent = (data, index) => {
3+
let notEmptyIndex = data[index].findIndex(
4+
(c, index) => c != '' && index > 0
5+
)
6+
7+
//Parent is concept scheme
8+
if (notEmptyIndex == 3) {
9+
return null
10+
}
11+
12+
let parent = null
13+
14+
while (index > 1) {
15+
index -= 1
16+
if (data[index][notEmptyIndex - 2] != '') {
17+
parent = data[index][0]
18+
break
19+
}
20+
}
21+
22+
return parent
23+
}
24+
25+
const csvFile = await fs.readFile(
26+
fs.cwd() + '/docs/organization/taxonomy-transform/examples/example-data.csv'
27+
)
28+
const { data } = await csv.parse(csvFile.toString(), { header: false })
29+
let conceptScheme = null
30+
let parent = null
31+
32+
for (let i = 0; i < data.length; i++) {
33+
const row = data[i]
34+
35+
//Cocneptscheme
36+
if (row[1] != '') {
37+
conceptScheme = taxonomy.addConceptScheme(row[0], {
38+
prefLabel: row[1]
39+
})
40+
} else {
41+
const notEmptyIndex = row.findIndex((c, index) => c != '' && index > 0)
42+
let concept = taxonomy.getConcept(row[0])
43+
44+
if (!concept) {
45+
concept = taxonomy.addConcept(row[0], {
46+
prefLabel: row[notEmptyIndex]
47+
})
48+
}
49+
50+
conceptScheme.addConcept(row[0])
51+
parent = findParent(data, i)
52+
if (parent) {
53+
concept.addBroader(parent)
54+
}
55+
}
56+
}
57+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
/* eslint-disable @typescript-eslint/no-empty-function */
2+
import Listr from 'listr'
3+
import { noop } from 'lodash'
4+
import path from 'path'
5+
import type { Argv } from 'yargs'
6+
import { handleAsyncError as handle } from '../../utils/async'
7+
import { createPlainClient } from '../../utils/contentful-clients'
8+
import { cursorPaginate } from '../../utils/cursor-pagninate'
9+
import { ensureDir, getPath, readFileP, writeFileP } from '../../utils/fs'
10+
import { getHeadersFromOption } from '../../utils/headers'
11+
import { success, log } from '../../utils/log'
12+
import * as Papa from 'papaparse'
13+
import { Taxonomy } from './utils/taxonomy'
14+
15+
module.exports.command = 'taxonomy-transform'
16+
17+
module.exports.desc =
18+
'transform taxonomy from external format to contentful json format'
19+
20+
module.exports.builder = (yargs: Argv) => {
21+
return yargs
22+
.usage('Usage: contentful organization taxonomy-transform')
23+
.option('management-token', {
24+
alias: 'mt',
25+
describe: 'Contentful management API token',
26+
type: 'string'
27+
})
28+
.option('organization-id', {
29+
alias: 'oid',
30+
describe: 'ID of Organization with source data',
31+
type: 'string',
32+
demandOption: true
33+
})
34+
.option('header', {
35+
alias: 'H',
36+
type: 'string',
37+
describe: 'Pass an additional HTTP Header'
38+
})
39+
.option('output-file', {
40+
alias: 'o',
41+
type: 'string',
42+
describe:
43+
'Output file. It defaults to ./data/<timestamp>-<organization-id>-transformed.json'
44+
})
45+
.option('transform-script', {
46+
alias: 't',
47+
describe: 'Script used to transform the taxonomy data',
48+
type: 'string',
49+
demandOption: true
50+
})
51+
.option('save-file', {
52+
describe: 'Save the transformed taxonomies as a json file',
53+
type: 'boolean',
54+
default: true
55+
})
56+
.option('silent', {
57+
alias: 's',
58+
type: 'boolean',
59+
describe: 'Suppress any log output',
60+
default: false
61+
})
62+
}
63+
64+
interface Params {
65+
context: { managementToken: string }
66+
header?: string
67+
organizationId: string
68+
outputFile?: string
69+
transformScript: string
70+
saveFile?: boolean
71+
silent?: boolean
72+
}
73+
74+
export const defaultLocale = 'en-US'
75+
76+
export interface TransformContext {
77+
csv: {
78+
// parses any CSV to a JSON
79+
parse:
80+
| (<T>(
81+
csvString: string,
82+
config?: Papa.ParseConfig
83+
) => Papa.ParseResult<T>)
84+
| (<T>(
85+
file: File,
86+
config?: Papa.ParseConfig
87+
) => Promise<Papa.ParseResult<T>>)
88+
}
89+
fs: {
90+
// reads any file from disc
91+
readFile: typeof readFileP
92+
cwd: typeof process.cwd
93+
}
94+
taxonomy: Taxonomy
95+
}
96+
97+
const transformContext: TransformContext = {
98+
csv: {
99+
parse: Papa.parse
100+
},
101+
fs: {
102+
readFile: readFileP,
103+
cwd: process.cwd
104+
},
105+
taxonomy: new Taxonomy()
106+
}
107+
108+
async function taxonomyTransform({
109+
context,
110+
header,
111+
organizationId,
112+
outputFile,
113+
saveFile,
114+
transformScript,
115+
silent
116+
}: Params) {
117+
const { managementToken } = context
118+
119+
const client = await createPlainClient({
120+
accessToken: managementToken,
121+
feature: 'taxonomy-transform',
122+
headers: getHeadersFromOption(header),
123+
throttle: 8,
124+
logHandler: noop
125+
})
126+
127+
const outputTarget = getPath(
128+
outputFile ||
129+
path.join('data', `${Date.now()}-${organizationId}-transformed.json`)
130+
)
131+
await ensureDir(path.dirname(outputTarget))
132+
133+
const tasks = new Listr(
134+
[
135+
{
136+
title: 'Transforming taxonomy data',
137+
task: async ctx => {
138+
return new Listr([
139+
{
140+
title: 'Exporting Concepts',
141+
task: async () => {
142+
ctx.taxonomy.setExistingConcepts(
143+
await cursorPaginate({
144+
queryPage: pageUrl =>
145+
client.concept.getMany({
146+
organizationId,
147+
query: { pageUrl }
148+
})
149+
})
150+
)
151+
}
152+
},
153+
{
154+
title: 'Exporting Concept Schemes',
155+
task: async () => {
156+
ctx.taxonomy.setExistingConceptSchemes(
157+
await cursorPaginate({
158+
queryPage: pageUrl =>
159+
client.conceptScheme.getMany({
160+
organizationId,
161+
query: { pageUrl }
162+
})
163+
})
164+
)
165+
}
166+
},
167+
{
168+
title: 'Running transform script',
169+
task: async () => {
170+
const filePath = path.resolve(process.cwd(), transformScript)
171+
const transform = await import(filePath)
172+
173+
await transform.default(ctx)
174+
}
175+
}
176+
])
177+
}
178+
}
179+
],
180+
{ renderer: silent ? 'silent' : 'default' }
181+
)
182+
183+
await tasks.run(transformContext)
184+
185+
const result = transformContext.taxonomy.toJson()
186+
187+
if (saveFile) {
188+
await writeFileP(outputTarget, JSON.stringify(result, null, 2))
189+
!silent &&
190+
success(`✅ Data transformed successfully and saved to ${outputTarget}`)
191+
} else {
192+
log(JSON.stringify(result, null, 2))
193+
!silent && success(`✅ Data transformed successfully`)
194+
}
195+
}
196+
197+
module.exports.taxonomyTransform = taxonomyTransform
198+
199+
module.exports.handler = handle(taxonomyTransform)
200+
201+
export default taxonomyTransform

0 commit comments

Comments
 (0)