11import { Type , TypeGuard , type Static } from '@sinclair/typebox' ;
22import { Value , type ValueErrorIterator } from '@sinclair/typebox/value' ;
3- import { csvParse , csvParseRows } from 'd3-dsv' ;
3+ import { csvParseRows } from 'd3-dsv' ;
4+ import { Column , type TColumn } from './column' ;
45import type { TAnonymousTable , TColumnsDefinition } from './table' ;
56
67const ROW_INDEX_OFFSET = 1 ;
@@ -89,33 +90,46 @@ export function parseCSVFromString<
8990 } ;
9091
9192 const outputSchema = Type . Array ( columnsSchema ) ;
92- const expectedColumnSchemas = Object . entries ( columnsSchema . properties ) ;
9393
94- const [ headers ] = csvParseRows ( csvString ) ;
94+ const [ headerRow , ... bodyRows ] = csvParseRows ( csvString ) ;
9595
96- const missingHeaders = expectedColumnSchemas . filter (
97- ( [ name ] ) => ! headers . includes ( name ) ,
96+ const missingHeaders = Object . keys ( columnsSchema . properties ) . filter (
97+ ( name ) => ! headerRow . includes ( name ) ,
9898 ) ;
9999
100100 if ( missingHeaders . length > 0 ) {
101- const listFormatter = ( str : string [ ] ) =>
102- new Intl . ListFormat ( 'en' , {
103- type : 'conjunction' ,
104- } ) . format ( str . map ( ( str ) => `"${ str } "` ) ) ;
101+ const formatArray = ( str : string [ ] ) =>
102+ listFormatter . format ( str . map ( ( str ) => `"${ str } "` ) ) ;
105103
106- throw `Column ${ listFormatter (
104+ throw `Column ${ formatArray (
107105 missingHeaders . map ( ( [ name ] ) => name ) ,
108- ) } are missing from the header row (received ${ listFormatter ( headers ) } )`;
106+ ) } are missing from the header row (received ${ formatArray ( headerRow ) } )`;
109107 }
110108
111- const data = Value . Convert (
112- outputSchema ,
113- csvParse ( csvString , processRow ( trim ) ) ,
109+ const headerSchemas = headerRow . map < [ string , TColumn | undefined ] > (
110+ ( name ) => [
111+ name ,
112+ name in columnsSchema . properties
113+ ? columnsSchema . properties [ name ]
114+ : includeUnknownColumns
115+ ? Column . String ( )
116+ : undefined ,
117+ ] ,
114118 ) ;
115119
116- if ( ! includeUnknownColumns ) {
117- Value . Clean ( outputSchema , data ) ;
118- }
120+ const data = bodyRows . map ( ( cells ) =>
121+ headerSchemas . reduce < Record < string , unknown > > (
122+ ( rowObj , [ name , schema ] , i ) => {
123+ if ( schema ) {
124+ const newValue = trim ? cells [ i ] . trim ( ) : cells [ i ] ;
125+ rowObj [ name ] =
126+ newValue . length > 0 ? Value . Convert ( schema , newValue ) : null ;
127+ }
128+ return rowObj ;
129+ } ,
130+ { } ,
131+ ) ,
132+ ) ;
119133
120134 if ( ! Value . Check ( outputSchema , data ) ) {
121135 reject ( Error ( formatParsingError ( Value . Errors ( outputSchema , data ) ) ) ) ;
@@ -125,16 +139,6 @@ export function parseCSVFromString<
125139 } ) ;
126140}
127141
128- const processRow = ( trim : boolean ) => ( obj : Record < any , string > ) =>
129- Object . entries ( obj ) . reduce < Record < any , string | null > > (
130- ( newObj , [ key , value ] ) => {
131- const newValue = trim ? value . trim ( ) : value ;
132- newObj [ key ] = newValue . length > 0 ? newValue : null ;
133- return newObj ;
134- } ,
135- { } ,
136- ) ;
137-
138142const formatParsingError = ( errors : ValueErrorIterator ) : string =>
139143 [
140144 'The following values mismatch the column type:' ,
@@ -158,5 +162,5 @@ const formatParsingError = (errors: ValueErrorIterator): string =>
158162 ] . join ( '\n' ) ;
159163
160164const listFormatter = new Intl . ListFormat ( 'en' , {
161- type : 'disjunction ' ,
165+ type : 'conjunction ' ,
162166} ) ;
0 commit comments