Skip to content

Commit 5f448a1

Browse files
committed
Review feedback (part 1): Extend type/format guesses to return both "safe" and "maybe" options
"Safe" options are valid for EVERY value in the column. "Maybe" options are valid for SOME values in the column (but not every). The frontend needs update to reflect this distinction, only showing safe/maybe options (rather than ALL options as currently).
1 parent bacc101 commit 5f448a1

2 files changed

Lines changed: 168 additions & 67 deletions

File tree

lib/importer/src/dudk/backend.js

Lines changed: 85 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -147,26 +147,34 @@ exports.SessionGetColumnsNeedingFormats = (sid) => {
147147
// Given guesses as returned by SessionGuessTypes and a list of domain-model
148148
// fields (with name and type fields), returns the type guesses with each column
149149
// augmented with a `fields` field - a Map mapping likely field names for this
150-
// column to an array of possible formats for that field.
150+
// column to an array of safe formats for that field - and a `maybeFields` field
151+
// - the same but for "maybe" fields that might be a partial match for the data.
151152

152153
exports.SessionSuggestFields = (sid, typeGuesses, domainModelFields) => {
153154
let result = [];
154155

155156
typeGuesses.forEach((colTypeGuess) => {
156-
let fieldFormats = new Map(); // Map from field names to likely formats
157+
let safeFieldFormats = new Map(); // Map from field names to safe formats
158+
let maybeFieldFormats = new Map(); // Map from field names to possible formats
157159
// Examine every field, and check if they have a type that this column might
158160
// contain
159161
domainModelFields.forEach((field) => {
160-
if(colTypeGuess.types.has(field.type)) {
161-
// Add this field to the possibilities for the column, using the guessed
162+
if(colTypeGuess.safeTypes.has(field.type)) {
163+
// Add this field to the safe possibilities for the column, using the guessed
162164
// formats
163-
fieldFormats.set(field.name, colTypeGuess.types.get(field.type));
165+
safeFieldFormats.set(field.name, colTypeGuess.safeTypes.get(field.type));
166+
}
167+
if(colTypeGuess.maybeTypes.has(field.type)) {
168+
// Add this field to the maybe possibilities for the column, using the guessed
169+
// formats
170+
maybeFieldFormats.set(field.name, colTypeGuess.maybeTypes.get(field.type));
164171
}
165172
});
166173

167174
// Extend the column type guess with the "fields" field
168175
let typeGuessesAndFields = Object.assign({}, colTypeGuess);
169-
typeGuessesAndFields.fields = fieldFormats;
176+
typeGuessesAndFields.safeFields = safeFieldFormats;
177+
typeGuessesAndFields.maybeFields = maybeFieldFormats;
170178
result.push(typeGuessesAndFields);
171179
});
172180

@@ -532,10 +540,12 @@ exports.SessionGetInputSampleRows = (sid, range, startCount, middleCount, endCou
532540
// interpreted as extra text values.
533541

534542
// The return value is an array, indexed on the relative column number within
535-
// the range; each element is an object with elements "types" listing a range of
536-
// possible types in a map whose values are either 'false' for types without
537-
// formats, or an Array of possible formats for that type; and a field "full"
538-
// indicating if the column has no missing values.
543+
// the range; each element is an object with elements "safeTypes" listing a
544+
// range of types that all cells in the column match, in a map whose values are
545+
// either 'false' for types without formats, or an Array of possible formats for
546+
// that type; "maybeTypes" in the same format, but listing type/format pairs
547+
// which *some* but not all cells match; and a field "full" indicating if the
548+
// column has no missing values.
539549
exports.SessionGuessTypes = (sid, range) => {
540550
assert(sessionStore.has(sid), `No such session ${sid} when guessing types`);
541551
assert(sessionStore.get(sid).wb.Sheets[range.sheet], `No such sheet ${range.sheet} in session ${sid} when guessing types`);
@@ -556,7 +566,8 @@ exports.SessionGuessTypes = (sid, range) => {
556566

557567
// Initialise state, that we will refine as we go
558568
for (let col=0;col<columns;col++) {
559-
guesses[col] = {types: undefined,
569+
guesses[col] = {safeTpes: undefined,
570+
maybeTypes: undefined,
560571
full:true}; // This is unset if we find a blank/missing
561572
// value
562573
}
@@ -577,32 +588,87 @@ exports.SessionGuessTypes = (sid, range) => {
577588
// Blank cell, so mark the column as not full but don't reduce the list of guessed types
578589
guesses[col].full = false;
579590
} else if(possibleCellTypes) {
580-
if(guesses[col].types) {
581-
// Remove any types in the current guess that aren't possibly valid for
591+
// Check if this is the first time we've seen this column. safeTypes and
592+
// maybeTypes are both set at the same time, so we can use safeTypes
593+
// being set as a proxy for both.
594+
if(guesses[col].safeTypes) {
595+
// Remove any safeTypes in the current guess that aren't possibly valid for
582596
// this cell
583-
let newGuesses = new Map();
584-
guesses[col].types.forEach((formats, typeName) => {
597+
let newSafeGuesses = new Map();
598+
guesses[col].safeTypes.forEach((formats, typeName) => {
585599
if(possibleCellTypes.has(typeName)) {
586600
if(formats) {
587601
// Type with formats, so find the intersection of the current possible formats and this cell's possible formats
588602
let possibleCellFormats = possibleCellTypes.get(typeName);
589603
let newPossibleFormats = formats.filter(value => possibleCellFormats.includes(value));
590-
newGuesses.set(typeName, newPossibleFormats);
604+
newSafeGuesses.set(typeName, newPossibleFormats);
591605
} else {
592606
// Type without formats
593-
newGuesses.set(typeName, formats);
607+
newSafeGuesses.set(typeName, formats);
594608
}
595609
}
610+
// Add any types in the current guess to maybeTypes
611+
possibleCellTypes.forEach((formats, typeName) => {
612+
const maybe = guesses[col].maybeTypes;
613+
if(formats) {
614+
// Type with formats, so make sure we've added all the formats to the list
615+
if(!maybe.has(typeName)) {
616+
maybe.set(typeName, []);
617+
}
618+
formats.forEach((format) => {
619+
if(!maybe.get(typeName).includes(format)) {
620+
maybe.get(typeName).push(format);
621+
}
622+
});
623+
} else {
624+
// Type without formats, just set it to false
625+
maybe.set(typeName, false);
626+
}
627+
guesses[col].maybeTypes = maybe;
628+
});
596629
});
597-
guesses[col].types = newGuesses;
630+
guesses[col].safeTypes = newSafeGuesses;
598631
} else {
599632
// First cell examined, so start off with its possible types
600-
guesses[col].types = possibleCellTypes;
633+
guesses[col].safeTypes = possibleCellTypes;
634+
guesses[col].maybeTypes = possibleCellTypes;
601635
}
602636
}
603637
}
604638
}
605639

640+
// Now we must go back through the guesses and remove any safe types/formats
641+
// from the maybe types/formats, so "maybe" ONLY contains options that are
642+
// valid for some *but not all* values; every safe type/format will be listed
643+
// as a maybe type/format due to the process used above.
644+
645+
guesses.forEach((colGuesses) => {
646+
let maybeTypes = colGuesses.maybeTypes;
647+
colGuesses.safeTypes.forEach((safeFormats, typeName) => {
648+
if(safeFormats) {
649+
// It's a type with formats
650+
651+
// typeName will always be present in maybeTypes at this point
652+
let formats = maybeTypes.get(typeName);
653+
safeFormats.forEach((format) => {
654+
const index = formats.indexOf(format);
655+
// index will never be -1
656+
formats.splice(index, 1);
657+
});
658+
// If this is the last format for this type in maybeTypes, remove it. We
659+
// won't get any other safeTypes for this maybeType so there's no chance
660+
// of the above code being invoked again, and failing because typeName is
661+
// no longe rin maybeTypes.
662+
if(formats.length == 0) {
663+
maybeTypes.delete(typeName);
664+
}
665+
} else {
666+
// It's a type without formats
667+
maybeTypes.delete(typeName);
668+
}
669+
});
670+
});
671+
606672
return guesses;
607673
};
608674

lib/importer/src/dudk/backend.test.js

Lines changed: 83 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -609,15 +609,33 @@ test('guessing types', () => {
609609
const guesses = backend.SessionGuessTypes(sid, dataRange);
610610
expect(guesses).toMatchObject([
611611
// Order is actually irrelevant for all formats lists, so this test over-specifies a bit
612-
{full:true, types:new Map([["number", false],["text", false]])},
613-
{full:true, types:new Map([["date", ["ymd"]],["text", false]])},
614-
{full:true, types:new Map([["date", ["ydm"]],["text", false]])},
615-
{full:true, types:new Map([["date", ["dmy"]],["text", false]])},
616-
{full:true, types:new Map([["date", ["mdy"]],["text", false]])},
617-
{full:true, types:new Map([["date", ["ymd","ydm"]],["text", false]])},
618-
{full:false, types:new Map([["number", false],["text", false]])},
619-
{full:true, types:new Map([["text", false]])},
620-
{full:false, types:new Map([["text", false]])}
612+
{full:true,
613+
safeTypes:new Map([["number", false],["text", false]]),
614+
maybeTypes:new Map([])},
615+
{full:true,
616+
safeTypes:new Map([["date", ["ymd"]],["text", false]]),
617+
maybeTypes: new Map([["date", ["ydm","dmy","mdy"]]])},
618+
{full:true,
619+
safeTypes:new Map([["date", ["ydm"]],["text", false]]),
620+
maybeTypes: new Map([["date", ["ymd","dmy","mdy"]]])},
621+
{full:true,
622+
safeTypes:new Map([["date", ["dmy"]],["text", false]]),
623+
maybeTypes: new Map([["date", ["mdy", "ymd"]]])},
624+
{full:true,
625+
safeTypes:new Map([["date", ["mdy"]],["text", false]]),
626+
maybeTypes: new Map([["date", ["dmy"]]])},
627+
{full:true,
628+
safeTypes:new Map([["date", ["ymd","ydm"]],["text", false]]),
629+
maybeTypes: new Map([])},
630+
{full:false,
631+
safeTypes:new Map([["number", false],["text", false]]),
632+
maybeTypes: new Map([])},
633+
{full:true,
634+
safeTypes:new Map([["text", false]]),
635+
maybeTypes: new Map([])},
636+
{full:false,
637+
safeTypes:new Map([["text", false]]),
638+
maybeTypes: new Map([])}
621639
]);
622640

623641
const fieldSuggestions = backend.SessionSuggestFields(sid, guesses, [
@@ -645,48 +663,65 @@ test('guessing types', () => {
645663

646664
expect(fieldSuggestions).toMatchObject([
647665
// Order is actually irrelevant for all formats lists, so this test over-specifies a bit
648-
{full:true, types:new Map([["number", false],["text", false]]),
649-
fields: new Map([
650-
["numberField",false],
651-
["textField",false]
666+
// ABS FIXME add maybeFields
667+
{safeFields: new Map([
668+
["numberField",false],
669+
["textField",false]
670+
]),
671+
maybeFields: new Map([])},
672+
673+
{safeFields: new Map([
674+
["dateField",["ymd"]],
675+
["textField",false]
676+
]),
677+
maybeFields: new Map([
678+
["dateField",["ydm","dmy","mdy"]]
652679
])},
653-
{full:true, types:new Map([["date", ["ymd"]],["text", false]]),
654-
fields: new Map([
655-
["dateField",["ymd"]],
656-
["textField",false]
657-
])},
658-
{full:true, types:new Map([["date", ["ydm"]],["text", false]]),
659-
fields: new Map([
660-
["dateField",["ydm"]],
661-
["textField",false]
662-
])},
663-
{full:true, types:new Map([["date", ["dmy"]],["text", false]]),
664-
fields: new Map([
665-
["dateField",["dmy"]],
666-
["textField",false]
667-
])},
668-
{full:true, types:new Map([["date", ["mdy"]],["text", false]]),
669-
fields: new Map([
670-
["dateField",["mdy"]],
671-
["textField",false]
672-
])},
673-
{full:true, types:new Map([["date", ["ymd","ydm"]],["text", false]]),
674-
fields: new Map([
675-
["dateField",["ymd","ydm"]],
676-
["textField",false]
680+
681+
{safeFields: new Map([
682+
["dateField",["ydm"]],
683+
["textField",false]
684+
]),
685+
maybeFields: new Map([
686+
["dateField",["ymd","dmy","mdy"]]
677687
])},
678-
{full:false, types:new Map([["number", false],["text", false]]),
679-
fields: new Map([
680-
["numberField",false],
681-
["textField",false]
688+
689+
{safeFields: new Map([
690+
["dateField",["dmy"]],
691+
["textField",false]
692+
]),
693+
maybeFields: new Map([
694+
["dateField",["mdy","ymd"]]
682695
])},
683-
{full:true, types:new Map([["text", false]]),
684-
fields: new Map([
685-
["textField",false]
696+
697+
{safeFields: new Map([
698+
["dateField",["mdy"]],
699+
["textField",false]
700+
]),
701+
maybeFields: new Map([
702+
["dateField",["dmy"]]
686703
])},
687-
{full:false, types:new Map([["text", false]]),
688-
fields: new Map([
689-
["textField",false]
690-
])}
704+
705+
{safeFields: new Map([
706+
["dateField",["ymd","ydm"]],
707+
["textField",false]
708+
]),
709+
maybeFields: new Map([])},
710+
711+
{safeFields: new Map([
712+
["numberField",false],
713+
["textField",false]
714+
]),
715+
maybeFields: new Map([])},
716+
717+
{safeFields: new Map([
718+
["textField",false]
719+
]),
720+
maybeFields: new Map([])},
721+
722+
{safeFields: new Map([
723+
["textField",false]
724+
]),
725+
maybeFields: new Map([])}
691726
]);
692727
});

0 commit comments

Comments
 (0)