Skip to content

Commit 42a5f1f

Browse files
committed
#171 - started creation of a PoC of the idea of metadata value extractors. At a glance a low hanging fruit turned out to be far too complex to be worth it.
1 parent b096e4c commit 42a5f1f

File tree

4 files changed

+154
-13
lines changed

4 files changed

+154
-13
lines changed

src/custom-sort/matchers.ts

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -104,17 +104,35 @@ export function getNormalizedRomanNumber(s: string, separator?: string, places?:
104104
}
105105
}
106106

107-
const DAY_POSITIONS = '00'.length
108-
const MONTH_POSITIONS = '00'.length
109-
const YEAR_POSITIONS = '0000'.length
107+
export const DAY_POSITIONS = '00'.length
108+
export const MONTH_POSITIONS = '00'.length
109+
export const YEAR_POSITIONS = '0000'.length
110110

111111
const MONTHS = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
112112

113-
export function getNormalizedDate_dd_Mmm_yyyy_NormalizerFn(s: string): string | null {
114-
// Assumption - the regex date matched against input s, no extensive defensive coding needed
115-
const components = s.split('-')
116-
const day = prependWithZeros(components[0], DAY_POSITIONS)
117-
const month = prependWithZeros( `${1 + MONTHS.indexOf(components[1])}`, MONTH_POSITIONS)
118-
const year = prependWithZeros(components[2], YEAR_POSITIONS)
119-
return `${year}-${month}-${day}//`
113+
export function getNormalizedDate_NormalizerFn_for(separator: string, dayIdx: number, monthIdx: number, yearIdx: number, months?: string[]) {
114+
return (s: string): string | null => {
115+
// Assumption - the regex date matched against input s, no extensive defensive coding needed
116+
const components = s.split(separator)
117+
const day = prependWithZeros(components[dayIdx], DAY_POSITIONS)
118+
const monthValue = months ? `${1 + MONTHS.indexOf(components[monthIdx])}` : components[monthIdx]
119+
const month = prependWithZeros(monthValue, MONTH_POSITIONS)
120+
const year = prependWithZeros(components[yearIdx], YEAR_POSITIONS)
121+
return `${year}-${month}-${day}//`
122+
}
120123
}
124+
125+
export const getNormalizedDate_dd_Mmm_yyyy_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 0, 1, 2, MONTHS)
126+
127+
/*
128+
// Assumption - the regex date matched against input s, no extensive defensive coding needed
129+
const components = s.split('-')
130+
const day = prependWithZeros(components[0], DAY_POSITIONS)
131+
const month = prependWithZeros( `${1 + MONTHS.indexOf(components[1])}`, MONTH_POSITIONS)
132+
const year = prependWithZeros(components[2], YEAR_POSITIONS)
133+
return `${year}-${month}-${day}//`
134+
135+
*/
136+
137+
138+
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import {
2+
getNormalizedDate_NormalizerFn_for
3+
} from "./matchers";
4+
5+
const DateExtractorSpecPattern1 = 'date(dd/mm/yyyy)'
6+
const DateExtractorRegex1 = new RegExp('\\d{2}/\\d{2}/\\d{4}')
7+
const DateExtractorNormalizer1 = getNormalizedDate_NormalizerFn_for('/', 0, 1, 2)
8+
const DateExtractorSpecPattern2 = 'date(mm/dd/yyyy)'
9+
const DateExtractorRegex2 = new RegExp('\\d{2}/\\d{2}/\\d{4}')
10+
const DateExtractorNormalizer2 = getNormalizedDate_NormalizerFn_for('/', 1, 0, 2)
11+
12+
export interface MDataExtractor {
13+
(mdataValue: string): string|undefined
14+
}
15+
16+
export interface MDataExtractorParseResult {
17+
m: MDataExtractor
18+
remainder: string
19+
}
20+
21+
export const tryParseAsMDataExtractorSpec = (s: string): MDataExtractorParseResult|undefined => {
22+
// Simplistic initial implementation of the idea with hardcoded two extractors
23+
if (s.trim().startsWith(DateExtractorSpecPattern1)) {
24+
return {
25+
m: extractorForPattern1,
26+
remainder: s.substring(DateExtractorSpecPattern1.length).trim()
27+
}
28+
}
29+
if (s.trim().startsWith(DateExtractorSpecPattern2)) {
30+
return {
31+
m: extractorForPattern2,
32+
remainder: s.substring(DateExtractorSpecPattern2.length).trim()
33+
}
34+
}
35+
return undefined
36+
}
37+
38+
export function extractorForPattern1(mdataValue: string): string|undefined {
39+
const hasDate = mdataValue?.match(DateExtractorRegex1)
40+
if (hasDate && hasDate[0]) {
41+
return DateExtractorNormalizer1(hasDate[0]) ?? undefined
42+
} else {
43+
return undefined
44+
}
45+
}
46+
47+
export function extractorForPattern2(mdataValue: string): string|undefined {
48+
const hasDate = mdataValue?.match(DateExtractorRegex2)
49+
if (hasDate && hasDate[0]) {
50+
return DateExtractorNormalizer2(hasDate[0]) ?? undefined
51+
} else {
52+
return undefined
53+
}
54+
}

src/custom-sort/sorting-spec-processor.ts

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ import {
3333
MATCH_CHILDREN_2_SUFFIX,
3434
NO_PRIORITY
3535
} from "./folder-matching-rules"
36+
import {
37+
MDataExtractor,
38+
tryParseAsMDataExtractorSpec
39+
} from "./mdata-extractors";
3640

3741
interface ProcessingContext {
3842
folderPath: string
@@ -1497,10 +1501,30 @@ export class SortingSpecProcessor {
14971501
orderSpec = hasDirectionPostfix ? orderSpec.substring(hasDirectionPostfix.lexeme.length).trim() : orderSpec
14981502

14991503
let metadataName: string|undefined
1504+
let metadataExtractor: MDataExtractor|undefined
15001505
if (orderSpec.startsWith(OrderByMetadataLexeme)) {
15011506
applyToMetadata = true
1502-
metadataName = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined
1503-
orderSpec = '' // metadataName is unparsed, consumes the remainder string, even if malformed, e.g. with infix spaces
1507+
const metadataNameAndOptionalExtractorSpec = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined
1508+
if (metadataNameAndOptionalExtractorSpec) {
1509+
if (metadataNameAndOptionalExtractorSpec.indexOf(' ') > -1) {
1510+
const metadataSpec = metadataNameAndOptionalExtractorSpec.split(' ')
1511+
metadataName = metadataSpec.shift()
1512+
const metadataExtractorSpec = metadataSpec?.shift()
1513+
const hasMetadataExtractor = metadataExtractorSpec ? tryParseAsMDataExtractorSpec(metadataExtractorSpec) : undefined
1514+
if (hasMetadataExtractor) {
1515+
metadataExtractor = hasMetadataExtractor.m
1516+
} else {
1517+
// TODO: raise error of syntax error - metadata name followed by unrecognized text
1518+
// take into account all of the texts resulting from the split(' ') - there could be more segments
1519+
}
1520+
orderSpec = '' // Intentionally ignore anything beyond the metadata name and extractor
1521+
} else {
1522+
metadataName = metadataNameAndOptionalExtractorSpec
1523+
orderSpec = '' // Intentionally ignore anything beyond the metadata name (and no known extractor)
1524+
}
1525+
} else {
1526+
orderSpec = ''
1527+
}
15041528
}
15051529

15061530
// check for any superfluous text
@@ -1553,7 +1577,14 @@ export class SortingSpecProcessor {
15531577
}
15541578
sortOrderSpec[level] = {
15551579
order: order!,
1556-
byMetadataField: metadataName
1580+
byMetadataField: metadataName,
1581+
1582+
metadataFieldExtractor: metadataExtractor
1583+
1584+
... and the carry the metadataFieldExtractor attribute down the parser, handle correctly in the 4-levels mdata sorting options
1585+
and execute at runtime
1586+
1587+
Seems to be far too complex to be worth it.
15571588
}
15581589
}
15591590
return sortOrderSpec
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import {
2+
extractorForPattern1
3+
} from '../../custom-sort/mdata-extractors'
4+
5+
describe('extractorForPattern1', () => {
6+
const params = [
7+
// Positive
8+
['03/05/2019', '2019-05-03//'],
9+
['Created at: 03/05/2019', '2019-05-03//'],
10+
['03/05/2019 | 22:00', '2019-05-03//'],
11+
['Created at: 03/05/2019 | 22:00', '2019-05-03//'],
12+
13+
// TODO: more positive then negative examples
14+
15+
['13-Jan-2012', '2012-01-13//'],
16+
['3-Feb-2', '0002-02-03//'],
17+
['1-Mar-1900', '1900-03-01//'],
18+
['42-Apr-9999', '9999-04-42//'],
19+
['0-May-0', '0000-05-00//'],
20+
['21-Jun-2024', '2024-06-21//'],
21+
['7-Jul-1872', '1872-07-07//'],
22+
['15-Aug-1234', '1234-08-15//'],
23+
['1234-Sep-7777', '7777-09-1234//'],
24+
['3-Oct-2023', '2023-10-03//'],
25+
['8-Nov-2022', '2022-11-08//'],
26+
['18-Dec-2021', '2021-12-18//'],
27+
// Negative
28+
['88-Dec-2012', '2012-12-88//'], // Invalid case, Regexp on matcher in the caller should guard against this
29+
['13-JANUARY-2012', '2012-00-13//'], // Invalid case, Regexp on matcher in the caller should guard against this
30+
['1 .1', '0000-00-1 .1//'], // Invalid case, Regexp on matcher in the caller should guard against this
31+
['', '0000-00-00//'], // Invalid case, Regexp on matcher in the caller should guard against this
32+
['abc', '0000-00-abc//'], // Invalid case, Regexp on matcher in the caller should guard against this
33+
['def-abc', '0000-00-def//'], // Invalid case, Regexp on matcher in the caller should guard against this
34+
];
35+
it.each(params)('>%s< should become %s', (s: string, out: string) => {
36+
expect(extractorForPattern1(s)).toBe(out)
37+
})
38+
})

0 commit comments

Comments
 (0)