@@ -147,26 +147,34 @@ exports.SessionGetColumnsNeedingFormats = (sid) => {
147147// Given guesses as returned by SessionGuessTypes and a list of domain-model
148148// fields (with name and type fields), returns the type guesses with each column
149149// augmented with a `fields` field - a Map mapping likely field names for this
150- // column to an array of possible formats for that field.
150+ // column to an array of safe formats for that field - and a `maybeFields` field
151+ // - the same but for "maybe" fields that might be a partial match for the data.
151152
152153exports . SessionSuggestFields = ( sid , typeGuesses , domainModelFields ) => {
153154 let result = [ ] ;
154155
155156 typeGuesses . forEach ( ( colTypeGuess ) => {
156- let fieldFormats = new Map ( ) ; // Map from field names to likely formats
157+ let safeFieldFormats = new Map ( ) ; // Map from field names to safe formats
158+ let maybeFieldFormats = new Map ( ) ; // Map from field names to possible formats
157159 // Examine every field, and check if they have a type that this column might
158160 // contain
159161 domainModelFields . forEach ( ( field ) => {
160- if ( colTypeGuess . types . has ( field . type ) ) {
161- // Add this field to the possibilities for the column, using the guessed
162+ if ( colTypeGuess . safeTypes . has ( field . type ) ) {
163+ // Add this field to the safe possibilities for the column, using the guessed
162164 // formats
163- fieldFormats . set ( field . name , colTypeGuess . types . get ( field . type ) ) ;
165+ safeFieldFormats . set ( field . name , colTypeGuess . safeTypes . get ( field . type ) ) ;
166+ }
167+ if ( colTypeGuess . maybeTypes . has ( field . type ) ) {
168+ // Add this field to the maybe possibilities for the column, using the guessed
169+ // formats
170+ maybeFieldFormats . set ( field . name , colTypeGuess . maybeTypes . get ( field . type ) ) ;
164171 }
165172 } ) ;
166173
167174 // Extend the column type guess with the "fields" field
168175 let typeGuessesAndFields = Object . assign ( { } , colTypeGuess ) ;
169- typeGuessesAndFields . fields = fieldFormats ;
176+ typeGuessesAndFields . safeFields = safeFieldFormats ;
177+ typeGuessesAndFields . maybeFields = maybeFieldFormats ;
170178 result . push ( typeGuessesAndFields ) ;
171179 } ) ;
172180
@@ -532,10 +540,12 @@ exports.SessionGetInputSampleRows = (sid, range, startCount, middleCount, endCou
532540// interpreted as extra text values.
533541
534542// The return value is an array, indexed on the relative column number within
535- // the range; each element is an object with elements "types" listing a range of
536- // possible types in a map whose values are either 'false' for types without
537- // formats, or an Array of possible formats for that type; and a field "full"
538- // indicating if the column has no missing values.
543+ // the range; each element is an object with elements "safeTypes" listing a
544+ // range of types that all cells in the column match, in a map whose values are
545+ // either 'false' for types without formats, or an Array of possible formats for
546+ // that type; "maybeTypes" in the same format, but listing type/format pairs
547+ // which *some* but not all cells match; and a field "full" indicating if the
548+ // column has no missing values.
539549exports . SessionGuessTypes = ( sid , range ) => {
540550 assert ( sessionStore . has ( sid ) , `No such session ${ sid } when guessing types` ) ;
541551 assert ( sessionStore . get ( sid ) . wb . Sheets [ range . sheet ] , `No such sheet ${ range . sheet } in session ${ sid } when guessing types` ) ;
@@ -556,7 +566,8 @@ exports.SessionGuessTypes = (sid, range) => {
556566
557567 // Initialise state, that we will refine as we go
558568 for ( let col = 0 ; col < columns ; col ++ ) {
559- guesses [ col ] = { types : undefined ,
569+ guesses [ col ] = { safeTpes : undefined ,
570+ maybeTypes : undefined ,
560571 full :true } ; // This is unset if we find a blank/missing
561572 // value
562573 }
@@ -577,32 +588,87 @@ exports.SessionGuessTypes = (sid, range) => {
577588 // Blank cell, so mark the column as not full but don't reduce the list of guessed types
578589 guesses [ col ] . full = false ;
579590 } else if ( possibleCellTypes ) {
580- if ( guesses [ col ] . types ) {
581- // Remove any types in the current guess that aren't possibly valid for
591+ // Check if this is the first time we've seen this column. safeTypes and
592+ // maybeTypes are both set at the same time, so we can use safeTypes
593+ // being set as a proxy for both.
594+ if ( guesses [ col ] . safeTypes ) {
595+ // Remove any safeTypes in the current guess that aren't possibly valid for
582596 // this cell
583- let newGuesses = new Map ( ) ;
584- guesses [ col ] . types . forEach ( ( formats , typeName ) => {
597+ let newSafeGuesses = new Map ( ) ;
598+ guesses [ col ] . safeTypes . forEach ( ( formats , typeName ) => {
585599 if ( possibleCellTypes . has ( typeName ) ) {
586600 if ( formats ) {
587601 // Type with formats, so find the intersection of the current possible formats and this cell's possible formats
588602 let possibleCellFormats = possibleCellTypes . get ( typeName ) ;
589603 let newPossibleFormats = formats . filter ( value => possibleCellFormats . includes ( value ) ) ;
590- newGuesses . set ( typeName , newPossibleFormats ) ;
604+ newSafeGuesses . set ( typeName , newPossibleFormats ) ;
591605 } else {
592606 // Type without formats
593- newGuesses . set ( typeName , formats ) ;
607+ newSafeGuesses . set ( typeName , formats ) ;
594608 }
595609 }
610+ // Add any types in the current guess to maybeTypes
611+ possibleCellTypes . forEach ( ( formats , typeName ) => {
612+ const maybe = guesses [ col ] . maybeTypes ;
613+ if ( formats ) {
614+ // Type with formats, so make sure we've added all the formats to the list
615+ if ( ! maybe . has ( typeName ) ) {
616+ maybe . set ( typeName , [ ] ) ;
617+ }
618+ formats . forEach ( ( format ) => {
619+ if ( ! maybe . get ( typeName ) . includes ( format ) ) {
620+ maybe . get ( typeName ) . push ( format ) ;
621+ }
622+ } ) ;
623+ } else {
624+ // Type without formats, just set it to false
625+ maybe . set ( typeName , false ) ;
626+ }
627+ guesses [ col ] . maybeTypes = maybe ;
628+ } ) ;
596629 } ) ;
597- guesses [ col ] . types = newGuesses ;
630+ guesses [ col ] . safeTypes = newSafeGuesses ;
598631 } else {
599632 // First cell examined, so start off with its possible types
600- guesses [ col ] . types = possibleCellTypes ;
633+ guesses [ col ] . safeTypes = possibleCellTypes ;
634+ guesses [ col ] . maybeTypes = possibleCellTypes ;
601635 }
602636 }
603637 }
604638 }
605639
640+ // Now we must go back through the guesses and remove any safe types/formats
641+ // from the maybe types/formats, so "maybe" ONLY contains options that are
642+ // valid for some *but not all* values; every safe type/format will be listed
643+ // as a maybe type/format due to the process used above.
644+
645+ guesses . forEach ( ( colGuesses ) => {
646+ let maybeTypes = colGuesses . maybeTypes ;
647+ colGuesses . safeTypes . forEach ( ( safeFormats , typeName ) => {
648+ if ( safeFormats ) {
649+ // It's a type with formats
650+
651+ // typeName will always be present in maybeTypes at this point
652+ let formats = maybeTypes . get ( typeName ) ;
653+ safeFormats . forEach ( ( format ) => {
654+ const index = formats . indexOf ( format ) ;
655+ // index will never be -1
656+ formats . splice ( index , 1 ) ;
657+ } ) ;
658+ // If this is the last format for this type in maybeTypes, remove it. We
659+ // won't get any other safeTypes for this maybeType so there's no chance
660+ // of the above code being invoked again, and failing because typeName is
661+ // no longe rin maybeTypes.
662+ if ( formats . length == 0 ) {
663+ maybeTypes . delete ( typeName ) ;
664+ }
665+ } else {
666+ // It's a type without formats
667+ maybeTypes . delete ( typeName ) ;
668+ }
669+ } ) ;
670+ } ) ;
671+
606672 return guesses ;
607673} ;
608674
0 commit comments