#171 - started creation of a PoC of the idea of metadata value extractors. At a glance a low hanging fruit turned out to be far too complex to be worth it.

SebastianMC · SebastianMC · commit 42a5f1feb27a · 2024-11-03T22:56:55.000+01:00
diff --git a/src/custom-sort/matchers.ts b/src/custom-sort/matchers.ts
@@ -104,17 +104,35 @@ export function getNormalizedRomanNumber(s: string, separator?: string, places?:
 	}
 }
 
-const DAY_POSITIONS = '00'.length
-const MONTH_POSITIONS = '00'.length
-const YEAR_POSITIONS = '0000'.length
+export const DAY_POSITIONS = '00'.length
+export const MONTH_POSITIONS = '00'.length
+export const YEAR_POSITIONS = '0000'.length
 
 const MONTHS = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
 
-export function getNormalizedDate_dd_Mmm_yyyy_NormalizerFn(s: string): string | null {
-	// Assumption - the regex date matched against input s, no extensive defensive coding needed
-	const components = s.split('-')
-	const day = prependWithZeros(components[0], DAY_POSITIONS)
-	const month = prependWithZeros( `${1 + MONTHS.indexOf(components[1])}`, MONTH_POSITIONS)
-	const year = prependWithZeros(components[2], YEAR_POSITIONS)
-	return `${year}-${month}-${day}//`
+export function getNormalizedDate_NormalizerFn_for(separator: string, dayIdx: number, monthIdx: number, yearIdx: number, months?: string[]) {
+	return (s: string): string | null => {
+		// Assumption - the regex date matched against input s, no extensive defensive coding needed
+		const components = s.split(separator)
+		const day = prependWithZeros(components[dayIdx], DAY_POSITIONS)
+		const monthValue = months ? `${1 + MONTHS.indexOf(components[monthIdx])}` : components[monthIdx]
+		const month = prependWithZeros(monthValue, MONTH_POSITIONS)
+		const year = prependWithZeros(components[yearIdx], YEAR_POSITIONS)
+		return `${year}-${month}-${day}//`
+	}
 }
+
+export const getNormalizedDate_dd_Mmm_yyyy_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 0, 1, 2, MONTHS)
+
+/*
+// Assumption - the regex date matched against input s, no extensive defensive coding needed
+const components = s.split('-')
+const day = prependWithZeros(components[0], DAY_POSITIONS)
+const month = prependWithZeros( `${1 + MONTHS.indexOf(components[1])}`, MONTH_POSITIONS)
+const year = prependWithZeros(components[2], YEAR_POSITIONS)
+return `${year}-${month}-${day}//`
+
+ */
+
+
+
diff --git a/src/custom-sort/mdata-extractors.ts b/src/custom-sort/mdata-extractors.ts
@@ -0,0 +1,54 @@
+import {
+    getNormalizedDate_NormalizerFn_for
+} from "./matchers";
+
+const DateExtractorSpecPattern1 = 'date(dd/mm/yyyy)'
+const DateExtractorRegex1 = new RegExp('\\d{2}/\\d{2}/\\d{4}')
+const DateExtractorNormalizer1 = getNormalizedDate_NormalizerFn_for('/', 0, 1, 2)
+const DateExtractorSpecPattern2 = 'date(mm/dd/yyyy)'
+const DateExtractorRegex2 = new RegExp('\\d{2}/\\d{2}/\\d{4}')
+const DateExtractorNormalizer2 = getNormalizedDate_NormalizerFn_for('/', 1, 0, 2)
+
+export interface MDataExtractor {
+    (mdataValue: string): string|undefined
+}
+
+export interface MDataExtractorParseResult {
+    m: MDataExtractor
+    remainder: string
+}
+
+export const tryParseAsMDataExtractorSpec = (s: string): MDataExtractorParseResult|undefined => {
+    // Simplistic initial implementation of the idea with hardcoded two extractors
+    if (s.trim().startsWith(DateExtractorSpecPattern1)) {
+        return {
+            m: extractorForPattern1,
+            remainder: s.substring(DateExtractorSpecPattern1.length).trim()
+        }
+    }
+    if (s.trim().startsWith(DateExtractorSpecPattern2)) {
+        return {
+            m: extractorForPattern2,
+            remainder: s.substring(DateExtractorSpecPattern2.length).trim()
+        }
+    }
+    return undefined
+}
+
+export function extractorForPattern1(mdataValue: string): string|undefined {
+    const hasDate = mdataValue?.match(DateExtractorRegex1)
+    if (hasDate && hasDate[0]) {
+        return DateExtractorNormalizer1(hasDate[0]) ?? undefined
+    } else {
+        return undefined
+    }
+}
+
+export function extractorForPattern2(mdataValue: string): string|undefined {
+    const hasDate = mdataValue?.match(DateExtractorRegex2)
+    if (hasDate && hasDate[0]) {
+        return DateExtractorNormalizer2(hasDate[0]) ?? undefined
+    } else {
+        return undefined
+    }
+}
diff --git a/src/custom-sort/sorting-spec-processor.ts b/src/custom-sort/sorting-spec-processor.ts
@@ -33,6 +33,10 @@ import {
 	MATCH_CHILDREN_2_SUFFIX,
 	NO_PRIORITY
 } from "./folder-matching-rules"
+import {
+	MDataExtractor,
+	tryParseAsMDataExtractorSpec
+} from "./mdata-extractors";
 
 interface ProcessingContext {
 	folderPath: string
@@ -1497,10 +1501,30 @@ export class SortingSpecProcessor {
 			orderSpec = hasDirectionPostfix ? orderSpec.substring(hasDirectionPostfix.lexeme.length).trim() : orderSpec
 
 			let metadataName: string|undefined
+			let metadataExtractor: MDataExtractor|undefined
 			if (orderSpec.startsWith(OrderByMetadataLexeme)) {
 				applyToMetadata = true
-				metadataName = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined
-				orderSpec = '' // metadataName is unparsed, consumes the remainder string, even if malformed, e.g. with infix spaces
+				const metadataNameAndOptionalExtractorSpec = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined
+				if (metadataNameAndOptionalExtractorSpec) {
+					if (metadataNameAndOptionalExtractorSpec.indexOf(' ') > -1) {
+						const metadataSpec = metadataNameAndOptionalExtractorSpec.split(' ')
+						metadataName = metadataSpec.shift()
+						const metadataExtractorSpec = metadataSpec?.shift()
+						const hasMetadataExtractor = metadataExtractorSpec ? tryParseAsMDataExtractorSpec(metadataExtractorSpec) : undefined
+						if (hasMetadataExtractor) {
+							metadataExtractor = hasMetadataExtractor.m
+						} else {
+							// TODO: raise error of syntax error - metadata name followed by unrecognized text
+							//       take into account all of the texts resulting from the split(' ') - there could be more segments
+						}
+						orderSpec = '' // Intentionally ignore anything beyond the metadata name and extractor
+					} else {
+						metadataName = metadataNameAndOptionalExtractorSpec
+						orderSpec = '' // Intentionally ignore anything beyond the metadata name (and no known extractor)
+					}
+				} else {
+					orderSpec = ''
+				}
 			}
 
 			// check for any superfluous text
@@ -1553,7 +1577,14 @@ export class SortingSpecProcessor {
 			}
 			sortOrderSpec[level] = {
 				order: order!,
-				byMetadataField: metadataName
+				byMetadataField: metadataName,
+
+				metadataFieldExtractor: metadataExtractor
+
+					... and the carry the metadataFieldExtractor attribute down the parser, handle correctly in the 4-levels mdata sorting options
+				        and execute at runtime
+
+				    Seems to be far too complex to be worth it.
 			}
 		}
 		return sortOrderSpec
diff --git a/src/test/unit/mdata-extractors.spec.ts b/src/test/unit/mdata-extractors.spec.ts
@@ -0,0 +1,38 @@
+import {
+    extractorForPattern1
+} from '../../custom-sort/mdata-extractors'
+
+describe('extractorForPattern1', () => {
+    const params = [
+        // Positive
+        ['03/05/2019', '2019-05-03//'],
+        ['Created at: 03/05/2019', '2019-05-03//'],
+        ['03/05/2019 | 22:00', '2019-05-03//'],
+        ['Created at: 03/05/2019 | 22:00', '2019-05-03//'],
+
+        // TODO: more positive then negative examples
+
+        ['13-Jan-2012', '2012-01-13//'],
+        ['3-Feb-2', '0002-02-03//'],
+        ['1-Mar-1900', '1900-03-01//'],
+        ['42-Apr-9999', '9999-04-42//'],
+        ['0-May-0', '0000-05-00//'],
+        ['21-Jun-2024', '2024-06-21//'],
+        ['7-Jul-1872', '1872-07-07//'],
+        ['15-Aug-1234', '1234-08-15//'],
+        ['1234-Sep-7777', '7777-09-1234//'],
+        ['3-Oct-2023', '2023-10-03//'],
+        ['8-Nov-2022', '2022-11-08//'],
+        ['18-Dec-2021', '2021-12-18//'],
+        // Negative
+        ['88-Dec-2012', '2012-12-88//'], // Invalid case, Regexp on matcher in the caller should guard against this
+        ['13-JANUARY-2012', '2012-00-13//'], // Invalid case, Regexp on matcher in the caller should guard against this
+        ['1 .1', '0000-00-1 .1//'],  // Invalid case, Regexp on matcher in the caller should guard against this
+        ['', '0000-00-00//'],  // Invalid case, Regexp on matcher in the caller should guard against this
+        ['abc', '0000-00-abc//'],  // Invalid case, Regexp on matcher in the caller should guard against this
+        ['def-abc', '0000-00-def//'],  // Invalid case, Regexp on matcher in the caller should guard against this
+    ];
+    it.each(params)('>%s< should become %s', (s: string, out: string) => {
+        expect(extractorForPattern1(s)).toBe(out)
+    })
+})