-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconvert.js
More file actions
72 lines (59 loc) · 2.35 KB
/
Copy pathconvert.js
File metadata and controls
72 lines (59 loc) · 2.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
const fs = require('fs');
const path = require('path');
const { parse } = require('csv-parse/sync');
// Read the classifications mapping
const classificationsPath = path.join(__dirname, 'classifications.json');
let classificationMap = {};
if (fs.existsSync(classificationsPath)) {
const classifications = JSON.parse(fs.readFileSync(classificationsPath, 'utf-8'));
// Create a map from code to name
classifications.forEach(c => {
classificationMap[c.code] = c.name;
});
console.log(`✓ Loaded ${Object.keys(classificationMap).length} classification mappings`);
}
// Read the CSV file
const csvPath = path.join(__dirname, 'classics.csv');
const csvData = fs.readFileSync(csvPath, 'utf-8');
// Parse CSV using csv-parse
const rows = parse(csvData, {
columns: true,
skip_empty_lines: true,
trim: true
});
// Replace spaces with underscores in column names and update classification codes
const processedRows = rows.map(row => {
const newRow = {};
for (const [key, value] of Object.entries(row)) {
// Replace spaces and dots with underscores in column names
const newKey = key.replace(/[ .]/g, '_');
newRow[newKey] = value;
}
return newRow;
});
// Filter out rows without classification or with multiple classifications
const filteredRows = processedRows.filter(row => {
const codesField = 'bibliography_congress_classifications';
const code = row[codesField];
// Keep only if code exists, doesn't contain a comma, and is in classificationMap
return code &&
code.trim() !== '' &&
!code.includes(',') &&
classificationMap[code.trim()];
});
console.log(`✓ Filtered ${processedRows.length - filteredRows.length} rows (missing, multiple, or unmapped classifications)`);
// Replace classification codes with names
filteredRows.forEach(row => {
const codesField = 'bibliography_congress_classifications';
if (row[codesField]) {
const code = row[codesField].trim();
row[codesField] = classificationMap[code] || code;
}
});
// Write JSON file
const jsonPath = path.join(__dirname, 'classics.json');
fs.writeFileSync(jsonPath, JSON.stringify(filteredRows, null, 2));
console.log(`✓ Converted CSV to JSON`);
console.log(` Input: ${csvPath}`);
console.log(` Output: ${jsonPath}`);
console.log(` Records: ${filteredRows.length}`);