Skip to content

Commit 9e2e120

Browse files
committed
#171 - a PoC of the idea of metadata value extractors. Extended syntax, unit tests, error handling
1 parent f210a41 commit 9e2e120

File tree

4 files changed

+166
-75
lines changed

4 files changed

+166
-75
lines changed
Lines changed: 42 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
import {
22
getNormalizedDate_NormalizerFn_for
33
} from "./matchers";
4+
import {NormalizerFn} from "./custom-sort-types";
45

5-
const DateExtractorSpecPattern1 = 'date(dd/mm/yyyy)'
6-
const DateExtractorRegex1 = new RegExp('\\d{2}/\\d{2}/\\d{4}')
7-
const DateExtractorNormalizer1 = getNormalizedDate_NormalizerFn_for('/', 0, 1, 2)
8-
const DateExtractorSpecPattern2 = 'date(mm/dd/yyyy)'
9-
const DateExtractorRegex2 = new RegExp('\\d{2}/\\d{2}/\\d{4}')
10-
const DateExtractorNormalizer2 = getNormalizedDate_NormalizerFn_for('/', 1, 0, 2)
6+
type ExtractorFn = (mdataValue: string) => string|undefined
7+
8+
interface DateExtractorSpec {
9+
specPattern: string|RegExp,
10+
extractorFn: ExtractorFn
11+
}
1112

1213
export interface MDataExtractor {
1314
(mdataValue: string): string|undefined
@@ -18,37 +19,46 @@ export interface MDataExtractorParseResult {
1819
remainder: string
1920
}
2021

21-
export const tryParseAsMDataExtractorSpec = (s: string): MDataExtractorParseResult|undefined => {
22-
// Simplistic initial implementation of the idea with hardcoded two extractors
23-
if (s.trim().startsWith(DateExtractorSpecPattern1)) {
24-
return {
25-
m: extractorForPattern1,
26-
remainder: s.substring(DateExtractorSpecPattern1.length).trim()
22+
function getGenericPlainRegexpExtractorFn(extractorRegexp: RegExp, extractedValueNormalizer: NormalizerFn) {
23+
return (mdataValue: string): string | undefined => {
24+
const hasMatch = mdataValue?.match(extractorRegexp)
25+
if (hasMatch && hasMatch[0]) {
26+
return extractedValueNormalizer(hasMatch[0]) ?? undefined
27+
} else {
28+
return undefined
2729
}
2830
}
29-
if (s.trim().startsWith(DateExtractorSpecPattern2)) {
30-
return {
31-
m: extractorForPattern2,
32-
remainder: s.substring(DateExtractorSpecPattern2.length).trim()
33-
}
34-
}
35-
return undefined
3631
}
3732

38-
export function extractorForPattern1(mdataValue: string): string|undefined {
39-
const hasDate = mdataValue?.match(DateExtractorRegex1)
40-
if (hasDate && hasDate[0]) {
41-
return DateExtractorNormalizer1(hasDate[0]) ?? undefined
42-
} else {
43-
return undefined
33+
const Extractors: DateExtractorSpec[] = [
34+
{ specPattern: 'date(dd/mm/yyyy)',
35+
extractorFn: getGenericPlainRegexpExtractorFn(
36+
new RegExp('\\d{2}/\\d{2}/\\d{4}'),
37+
getNormalizedDate_NormalizerFn_for('/', 0, 1, 2)
38+
)
39+
}, {
40+
specPattern: 'date(mm/dd/yyyy)',
41+
extractorFn: getGenericPlainRegexpExtractorFn(
42+
new RegExp('\\d{2}/\\d{2}/\\d{4}'),
43+
getNormalizedDate_NormalizerFn_for('/', 1, 0, 2)
44+
)
4445
}
45-
}
46+
]
4647

47-
export function extractorForPattern2(mdataValue: string): string|undefined {
48-
const hasDate = mdataValue?.match(DateExtractorRegex2)
49-
if (hasDate && hasDate[0]) {
50-
return DateExtractorNormalizer2(hasDate[0]) ?? undefined
51-
} else {
52-
return undefined
48+
export const tryParseAsMDataExtractorSpec = (s: string): MDataExtractorParseResult|undefined => {
49+
// Simplistic initial implementation of the idea with hardcoded two extractors
50+
for (const extrSpec of Extractors) {
51+
if ('string' === typeof extrSpec.specPattern && s.trim().startsWith(extrSpec.specPattern)) {
52+
return {
53+
m: extrSpec.extractorFn,
54+
remainder: s.substring(extrSpec.specPattern.length).trim()
55+
}
56+
}
5357
}
58+
return undefined
59+
}
60+
61+
export const _unitTests = {
62+
extractorFnForDate_ddmmyyyy: Extractors.find((it) => it.specPattern === 'date(dd/mm/yyyy)')?.extractorFn!,
63+
extractorFnForDate_mmddyyyy: Extractors.find((it) => it.specPattern === 'date(mm/dd/yyyy)')?.extractorFn!,
5464
}

src/custom-sort/sorting-spec-processor.ts

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ const OrderLiterals: { [key: string]: CustomSortOrderAscDescPair } = {
144144

145145
const OrderByMetadataLexeme: string = 'by-metadata:'
146146

147+
const ValueExtractorLexeme: string = 'using-extractor:'
148+
147149
const OrderLevelsSeparator: string = ','
148150

149151
enum Attribute {
@@ -1511,24 +1513,23 @@ export class SortingSpecProcessor {
15111513
applyToMetadata = true
15121514
const metadataNameAndOptionalExtractorSpec = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined
15131515
if (metadataNameAndOptionalExtractorSpec) {
1514-
if (metadataNameAndOptionalExtractorSpec.indexOf(' ') > -1) {
1515-
const metadataSpec = metadataNameAndOptionalExtractorSpec.split(' ')
1516-
metadataName = metadataSpec.shift()
1517-
const metadataExtractorSpec = metadataSpec?.shift()
1516+
if (metadataNameAndOptionalExtractorSpec.indexOf(ValueExtractorLexeme) > -1) {
1517+
const metadataSpec = metadataNameAndOptionalExtractorSpec.split(ValueExtractorLexeme)
1518+
metadataName = metadataSpec.shift()?.trim()
1519+
const metadataExtractorSpec = metadataSpec?.shift()?.trim()
15181520
const hasMetadataExtractor = metadataExtractorSpec ? tryParseAsMDataExtractorSpec(metadataExtractorSpec) : undefined
15191521
if (hasMetadataExtractor) {
15201522
metadataExtractor = hasMetadataExtractor.m
15211523
} else {
1522-
// TODO: raise error of syntax error - metadata name followed by unrecognized text
1523-
// take into account all of the texts resulting from the split(' ') - there could be more segments
1524+
return new AttrError(`${orderNameForErrorMsg} sorting order contains unrecognized value extractor: >>> ${metadataExtractorSpec} <<<`)
15241525
}
1525-
orderSpec = '' // Intentionally ignore anything beyond the metadata name and extractor
1526+
orderSpec = '' // all consumed as metadata and extractor
15261527
} else {
15271528
metadataName = metadataNameAndOptionalExtractorSpec
1528-
orderSpec = '' // Intentionally ignore anything beyond the metadata name (and no known extractor)
1529+
orderSpec = '' // all consumed as metadata name
15291530
}
15301531
} else {
1531-
orderSpec = ''
1532+
orderSpec = '' // no metadata name found
15321533
}
15331534
}
15341535

Lines changed: 14 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,29 @@
11
import {
2-
extractorForPattern1
2+
_unitTests
33
} from '../../custom-sort/mdata-extractors'
44

5-
describe('extractorForPattern1', () => {
5+
describe('extractor for date(dd/mm/yyyy)', () => {
66
const params = [
77
// Positive
88
['03/05/2019', '2019-05-03//'],
9+
['103/05/2019', '2019-05-03//'],
10+
['103/05/20193232', '2019-05-03//'],
11+
['99/99/9999', '9999-99-99//'],
12+
['00/00/0000', '0000-00-00//'],
913
['Created at: 03/05/2019', '2019-05-03//'],
1014
['03/05/2019 | 22:00', '2019-05-03//'],
1115
['Created at: 03/05/2019 | 22:00', '2019-05-03//'],
1216

13-
// TODO: more positive then negative examples
14-
15-
['13-Jan-2012', '2012-01-13//'],
16-
['3-Feb-2', '0002-02-03//'],
17-
['1-Mar-1900', '1900-03-01//'],
18-
['42-Apr-9999', '9999-04-42//'],
19-
['0-May-0', '0000-05-00//'],
20-
['21-Jun-2024', '2024-06-21//'],
21-
['7-Jul-1872', '1872-07-07//'],
22-
['15-Aug-1234', '1234-08-15//'],
23-
['1234-Sep-7777', '7777-09-1234//'],
24-
['3-Oct-2023', '2023-10-03//'],
25-
['8-Nov-2022', '2022-11-08//'],
26-
['18-Dec-2021', '2021-12-18//'],
2717
// Negative
28-
['88-Dec-2012', '2012-12-88//'], // Invalid case, Regexp on matcher in the caller should guard against this
29-
['13-JANUARY-2012', '2012-00-13//'], // Invalid case, Regexp on matcher in the caller should guard against this
30-
['1 .1', '0000-00-1 .1//'], // Invalid case, Regexp on matcher in the caller should guard against this
31-
['', '0000-00-00//'], // Invalid case, Regexp on matcher in the caller should guard against this
32-
['abc', '0000-00-abc//'], // Invalid case, Regexp on matcher in the caller should guard against this
33-
['def-abc', '0000-00-def//'], // Invalid case, Regexp on matcher in the caller should guard against this
18+
['88-Dec-2012', undefined],
19+
['13-JANUARY-2012', undefined],
20+
['1 .1', undefined],
21+
['', undefined],
22+
['abc', undefined],
23+
['def-abc', undefined],
24+
['3/5/2019', undefined],
3425
];
3526
it.each(params)('>%s< should become %s', (s: string, out: string) => {
36-
expect(extractorForPattern1(s)).toBe(out)
27+
expect(_unitTests.extractorFnForDate_ddmmyyyy(s)).toBe(out)
3728
})
3829
})

src/test/unit/sorting-spec-processor.spec.ts

Lines changed: 100 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ import {
44
CompoundDotNumberNormalizerFn,
55
ConsumedFolderMatchingRegexp,
66
consumeFolderByRegexpExpression,
7-
convertPlainStringToRegex, Date_dd_Mmm_yyyy_NormalizerFn,
7+
convertPlainStringToRegex,
8+
Date_dd_Mmm_yyyy_NormalizerFn,
89
detectSortingSymbols,
910
escapeRegexUnsafeCharacters,
1011
extractSortingSymbol,
@@ -14,8 +15,14 @@ import {
1415
RomanNumberNormalizerFn,
1516
SortingSpecProcessor
1617
} from "../../custom-sort/sorting-spec-processor"
17-
import {CustomSortGroupType, CustomSortOrder, CustomSortSpec, IdentityNormalizerFn} from "../../custom-sort/custom-sort-types";
18+
import {
19+
CustomSortGroupType,
20+
CustomSortOrder,
21+
CustomSortSpec,
22+
IdentityNormalizerFn
23+
} from "../../custom-sort/custom-sort-types";
1824
import {FolderMatchingRegexp, FolderMatchingTreeNode} from "../../custom-sort/folder-matching-rules";
25+
import {_unitTests} from "../../custom-sort/mdata-extractors";
1926

2027
const txtInputExampleA: string = `
2128
order-asc: a-z
@@ -356,6 +363,17 @@ const expectedSortSpecsExampleA: { [key: string]: CustomSortSpec } = {
356363
}
357364
}
358365

366+
const txtInputExampleSortingSymbols: string = `
367+
/folders Chapter \\.d+ ...
368+
/:files ...section \\-r+.
369+
% Appendix \\-d+ (attachments)
370+
Plain syntax\\R+ ... works?
371+
And this kind of... \\D+plain syntax???
372+
Here goes ASCII word \\a+
373+
\\A+. is for any modern language word
374+
\\[dd-Mmm-yyyy] for the specific date format of 12-Apr-2024
375+
`
376+
359377
const expectedSortSpecsExampleSortingSymbols: { [key: string]: CustomSortSpec } = {
360378
"mock-folder": {
361379
groups: [{
@@ -418,17 +436,67 @@ const expectedSortSpecsExampleSortingSymbols: { [key: string]: CustomSortSpec }
418436
}
419437
}
420438

421-
const txtInputExampleSortingSymbols: string = `
422-
/folders Chapter \\.d+ ...
423-
/:files ...section \\-r+.
424-
% Appendix \\-d+ (attachments)
425-
Plain syntax\\R+ ... works?
426-
And this kind of... \\D+plain syntax???
427-
Here goes ASCII word \\a+
428-
\\A+. is for any modern language word
429-
\\[dd-Mmm-yyyy] for the specific date format of 12-Apr-2024
439+
const txtInputExampleMDataExtractors1: string = `
440+
< a-z by-metadata: created-by using-extractor: date(dd/mm/yyyy)
441+
/folders Chapter...
442+
> a-z by-metadata: updated-on using-extractor: date(mm/dd/yyyy)
443+
`
444+
445+
// Tricky elements captured:
446+
// - Order a-z. for by metadata is transformed to a-z (there is no notion of 'file extension' in metadata values)
447+
448+
const txtInputExampleMDataExtractors2: string = `
449+
< a-z. by-metadata: created by using-extractor: date(mm/dd/yyyy), < true a-z. by-metadata: using-extractor: date(dd/mm/yyyy)
450+
/folders ...Chapter
451+
> a-z. by-metadata: updated-on using-extractor: date(dd/mm/yyyy), > true a-z by-metadata: md2 using-extractor: date(mm/dd/yyyy)
430452
`
431453

454+
const expectedSortSpecsExampleMDataExtractors1: { [key: string]: CustomSortSpec } = {
455+
"mock-folder": {
456+
defaultOrder: CustomSortOrder.byMetadataFieldAlphabetical,
457+
byMetadataField: 'created-by',
458+
metadataFieldValueExtractor: _unitTests.extractorFnForDate_ddmmyyyy,
459+
groups: [{
460+
foldersOnly: true,
461+
type: CustomSortGroupType.ExactPrefix,
462+
exactPrefix: 'Chapter',
463+
order: CustomSortOrder.byMetadataFieldAlphabeticalReverse,
464+
byMetadataField: 'updated-on',
465+
metadataFieldValueExtractor: _unitTests.extractorFnForDate_mmddyyyy
466+
}, {
467+
type: CustomSortGroupType.Outsiders
468+
}],
469+
targetFoldersPaths: ['mock-folder'],
470+
outsidersGroupIdx: 1
471+
}
472+
}
473+
474+
const expectedSortSpecsExampleMDataExtractors2: { [key: string]: CustomSortSpec } = {
475+
"mock-folder": {
476+
defaultOrder: CustomSortOrder.byMetadataFieldAlphabetical,
477+
byMetadataField: 'created by',
478+
metadataFieldValueExtractor: _unitTests.extractorFnForDate_mmddyyyy,
479+
defaultSecondaryOrder: CustomSortOrder.byMetadataFieldTrueAlphabetical,
480+
byMetadataFieldSecondary: '',
481+
metadataFieldSecondaryValueExtractor: _unitTests.extractorFnForDate_ddmmyyyy,
482+
groups: [{
483+
foldersOnly: true,
484+
type: CustomSortGroupType.ExactSuffix,
485+
exactSuffix: 'Chapter',
486+
order: CustomSortOrder.byMetadataFieldAlphabeticalReverse,
487+
byMetadataField: 'updated-on',
488+
metadataFieldValueExtractor: _unitTests.extractorFnForDate_ddmmyyyy,
489+
secondaryOrder: CustomSortOrder.byMetadataFieldTrueAlphabeticalReverse,
490+
byMetadataFieldSecondary: 'md2',
491+
metadataFieldSecondaryValueExtractor: _unitTests.extractorFnForDate_mmddyyyy
492+
}, {
493+
type: CustomSortGroupType.Outsiders
494+
}],
495+
targetFoldersPaths: ['mock-folder'],
496+
outsidersGroupIdx: 1
497+
}
498+
}
499+
432500
describe('SortingSpecProcessor', () => {
433501
let processor: SortingSpecProcessor;
434502
beforeEach(() => {
@@ -449,6 +517,16 @@ describe('SortingSpecProcessor', () => {
449517
const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
450518
expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleSortingSymbols)
451519
})
520+
it('should generate correct SortSpecs (example with mdata extractors)', () => {
521+
const inputTxtArr: Array<string> = txtInputExampleMDataExtractors1.split('\n')
522+
const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
523+
expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleMDataExtractors1)
524+
})
525+
it('should generate correct SortSpecs (example with mdata extractors, advanced)', () => {
526+
const inputTxtArr: Array<string> = txtInputExampleMDataExtractors2.split('\n')
527+
const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
528+
expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleMDataExtractors2)
529+
})
452530
})
453531

454532
const txtInputNotDuplicatedSortSpec: string = `
@@ -2922,6 +3000,17 @@ describe('SortingSpecProcessor error detection and reporting', () => {
29223000
`${ERR_PREFIX} 7:InvalidAttributeValue Secondary sorting direction order-asc: and desc are contradicting ${ERR_SUFFIX_IN_LINE(2)}`)
29233001
expect(errorsLogger).toHaveBeenNthCalledWith(2, ERR_LINE_TXT('sorting: standard, order-asc: modified desc by-metadata: xyz // <-- and it is checked earlier than the by-metadata incompatible order'))
29243002
})
3003+
it('should reject unknown value extractor', () => {
3004+
const inputTxtArr: Array<string> = `
3005+
< a-z. by-metadata: created by using-extractor: date(mm/dd/YYYY)
3006+
`.replace(/\t/gi, '').split('\n')
3007+
const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
3008+
expect(result).toBeNull()
3009+
expect(errorsLogger).toHaveBeenCalledTimes(2)
3010+
expect(errorsLogger).toHaveBeenNthCalledWith(1,
3011+
`${ERR_PREFIX} 7:InvalidAttributeValue Primary sorting order contains unrecognized value extractor: >>> date(mm/dd/YYYY) <<< ${ERR_SUFFIX_IN_LINE(2)}`)
3012+
expect(errorsLogger).toHaveBeenNthCalledWith(2, ERR_LINE_TXT('< a-z. by-metadata: created by using-extractor: date(mm/dd/YYYY)'))
3013+
})
29253014
})
29263015

29273016
const txtInputTargetFolderCCC: string = `

0 commit comments

Comments
 (0)