Skip to content

Commit d0734db

Browse files
committed
refactor: remove redundant csv parsing and use custom column pruning
1 parent e1e568f commit d0734db

1 file changed

Lines changed: 32 additions & 28 deletions

File tree

src/parser.ts

Lines changed: 32 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { Type, TypeGuard, type Static } from '@sinclair/typebox';
22
import { Value, type ValueErrorIterator } from '@sinclair/typebox/value';
3-
import { csvParse, csvParseRows } from 'd3-dsv';
3+
import { csvParseRows } from 'd3-dsv';
4+
import { Column, type TColumn } from './column';
45
import type { TAnonymousTable, TColumnsDefinition } from './table';
56

67
const ROW_INDEX_OFFSET = 1;
@@ -89,33 +90,46 @@ export function parseCSVFromString<
8990
};
9091

9192
const outputSchema = Type.Array(columnsSchema);
92-
const expectedColumnSchemas = Object.entries(columnsSchema.properties);
9393

94-
const [headers] = csvParseRows(csvString);
94+
const [headerRow, ...bodyRows] = csvParseRows(csvString);
9595

96-
const missingHeaders = expectedColumnSchemas.filter(
97-
([name]) => !headers.includes(name),
96+
const missingHeaders = Object.keys(columnsSchema.properties).filter(
97+
(name) => !headerRow.includes(name),
9898
);
9999

100100
if (missingHeaders.length > 0) {
101-
const listFormatter = (str: string[]) =>
102-
new Intl.ListFormat('en', {
103-
type: 'conjunction',
104-
}).format(str.map((str) => `"${str}"`));
101+
const formatArray = (str: string[]) =>
102+
listFormatter.format(str.map((str) => `"${str}"`));
105103

106-
throw `Column ${listFormatter(
104+
throw `Column ${formatArray(
107105
missingHeaders.map(([name]) => name),
108-
)} are missing from the header row (received ${listFormatter(headers)})`;
106+
)} are missing from the header row (received ${formatArray(headerRow)})`;
109107
}
110108

111-
const data = Value.Convert(
112-
outputSchema,
113-
csvParse(csvString, processRow(trim)),
109+
const headerSchemas = headerRow.map<[string, TColumn | undefined]>(
110+
(name) => [
111+
name,
112+
name in columnsSchema.properties
113+
? columnsSchema.properties[name]
114+
: includeUnknownColumns
115+
? Column.String()
116+
: undefined,
117+
],
114118
);
115119

116-
if (!includeUnknownColumns) {
117-
Value.Clean(outputSchema, data);
118-
}
120+
const data = bodyRows.map((cells) =>
121+
headerSchemas.reduce<Record<string, unknown>>(
122+
(rowObj, [name, schema], i) => {
123+
if (schema) {
124+
const newValue = trim ? cells[i].trim() : cells[i];
125+
rowObj[name] =
126+
newValue.length > 0 ? Value.Convert(schema, newValue) : null;
127+
}
128+
return rowObj;
129+
},
130+
{},
131+
),
132+
);
119133

120134
if (!Value.Check(outputSchema, data)) {
121135
reject(Error(formatParsingError(Value.Errors(outputSchema, data))));
@@ -125,16 +139,6 @@ export function parseCSVFromString<
125139
});
126140
}
127141

128-
const processRow = (trim: boolean) => (obj: Record<any, string>) =>
129-
Object.entries(obj).reduce<Record<any, string | null>>(
130-
(newObj, [key, value]) => {
131-
const newValue = trim ? value.trim() : value;
132-
newObj[key] = newValue.length > 0 ? newValue : null;
133-
return newObj;
134-
},
135-
{},
136-
);
137-
138142
const formatParsingError = (errors: ValueErrorIterator): string =>
139143
[
140144
'The following values mismatch the column type:',
@@ -158,5 +162,5 @@ const formatParsingError = (errors: ValueErrorIterator): string =>
158162
].join('\n');
159163

160164
const listFormatter = new Intl.ListFormat('en', {
161-
type: 'disjunction',
165+
type: 'conjunction',
162166
});

0 commit comments

Comments
 (0)