Skip to content

Commit 88327f6

Browse files
committed
#115 - Add inline regex support to match capital letters and lowercase letters explicitly
1 parent 39cddc3 commit 88327f6

File tree

3 files changed

+53
-14
lines changed

3 files changed

+53
-14
lines changed

src/custom-sort/custom-sort.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,13 +372,15 @@ export const determineSortingGroup = function (entry: TFile | TFolder, spec: Cus
372372
switch (group.type) {
373373
case CustomSortGroupType.ExactPrefix:
374374
if (group.exactPrefix) {
375+
console.log(`Exact prefix check`)
375376
if (nameForMatching.startsWith(group.exactPrefix)) {
376377
determined = true;
377378
}
378379
} else { // regexp is involved
379380
const [matched, matchedGroup] = matchGroupRegex(group.regexPrefix!, nameForMatching)
380381
determined = matched
381382
derivedText = matchedGroup ?? derivedText
383+
console.log(`Exact regexp prefix check ${group.regexPrefix?.regex?.toString()} vs. ${nameForMatching} = ${matched}`)
382384
}
383385
break;
384386
case CustomSortGroupType.ExactSuffix:

src/custom-sort/sorting-spec-processor.spec.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2957,17 +2957,23 @@ describe('convertPlainStringWithNumericSortingSymbolToRegex', () => {
29572957
it('should correctly include regex token for string end', () => {
29582958
const input1 = 'Part\\-D+:'
29592959
const input2 = ' \\[0-9]\\-D+'
2960+
const input3 = ' \\l\\[0-9]\\-D+'
29602961
const result1 = convertPlainStringToRegex(input1, RegexpUsedAs.Suffix)
29612962
const result2 = convertPlainStringToRegex(input2, RegexpUsedAs.Suffix)
2963+
const result3 = convertPlainStringToRegex(input3, RegexpUsedAs.Suffix)
29622964
expect(result1?.regexpSpec.regex).toEqual(/Part *(\d+(?:-\d+)*):$/i)
29632965
expect(result2?.regexpSpec.regex).toEqual(/ [0-9] *(\d+(?:-\d+)*)$/i)
2966+
expect(result3?.regexpSpec.regex).toEqual(/ \p{Ll}[0-9] *(\d+(?:-\d+)*)$/u)
29642967
})
29652968
it('should correctly include regex token for string begin and end', () => {
29662969
const input1 = 'Part\\.D+:'
29672970
const input2 = ' \\d \\[0-9] '
2971+
const input3 = ' \\d \\[0-9] \\C'
29682972
const result1 = convertPlainStringToRegex(input1, RegexpUsedAs.FullMatch)
29692973
const result2 = convertPlainStringToRegex(input2, RegexpUsedAs.FullMatch)
2974+
const result3 = convertPlainStringToRegex(input3, RegexpUsedAs.FullMatch)
29702975
expect(result1?.regexpSpec.regex).toEqual(/^Part *(\d+(?:\.\d+)*):$/i)
29712976
expect(result2?.regexpSpec.regex).toEqual(/^ \d [0-9] $/i)
2977+
expect(result3?.regexpSpec.regex).toEqual(/^ \d [0-9] [\p{Lu}\p{Lt}]$/u)
29722978
})
29732979
})

src/custom-sort/sorting-spec-processor.ts

Lines changed: 45 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,9 @@ const InlineRegexSymbol_Digit1: string = '\\d'
325325
const InlineRegexSymbol_Digit2: string = '\\[0-9]'
326326
const InlineRegexSymbol_0_to_3: string = '\\[0-3]'
327327

328+
const InlineRegexSymbol_CapitalLetter: string = '\\C'
329+
const InlineRegexSymbol_LowercaseLetter: string = '\\l'
330+
328331
const UnsafeRegexCharsRegex: RegExp = /[\^$.\-+\[\]{}()|*?=!\\]/g
329332

330333
export const escapeRegexUnsafeCharacters = (s: string): string => {
@@ -347,14 +350,24 @@ const sortingSymbolsRegex = new RegExp(sortingSymbolsArr.join('|'), 'gi')
347350
const inlineRegexSymbolsArrEscapedForRegex: Array<string> = [
348351
escapeRegexUnsafeCharacters(InlineRegexSymbol_Digit1),
349352
escapeRegexUnsafeCharacters(InlineRegexSymbol_Digit2),
350-
escapeRegexUnsafeCharacters(InlineRegexSymbol_0_to_3)
353+
escapeRegexUnsafeCharacters(InlineRegexSymbol_0_to_3),
354+
escapeRegexUnsafeCharacters(InlineRegexSymbol_CapitalLetter),
355+
escapeRegexUnsafeCharacters(InlineRegexSymbol_LowercaseLetter)
351356
]
352357

358+
interface RegexExpr {
359+
regexExpr: string
360+
isUnicode?: boolean
361+
isCaseSensitive?: boolean
362+
}
363+
353364
// Don't be confused if the source lexeme is equal to the resulting regex piece, logically these two distinct spaces
354-
const inlineRegexSymbolsToRegexExpressionsArr: { [key: string]: string} = {
355-
[InlineRegexSymbol_Digit1]: '\\d',
356-
[InlineRegexSymbol_Digit2]: '[0-9]',
357-
[InlineRegexSymbol_0_to_3]: '[0-3]',
365+
const inlineRegexSymbolsToRegexExpressionsArr: { [key: string]: RegexExpr} = {
366+
[InlineRegexSymbol_Digit1]: {regexExpr: '\\d'},
367+
[InlineRegexSymbol_Digit2]: {regexExpr: '[0-9]'},
368+
[InlineRegexSymbol_0_to_3]: {regexExpr: '[0-3]'},
369+
[InlineRegexSymbol_CapitalLetter]: {regexExpr: '[\\p{Lu}\\p{Lt}]', isUnicode: true, isCaseSensitive: true},
370+
[InlineRegexSymbol_LowercaseLetter]: {regexExpr: '\\p{Ll}', isUnicode: true, isCaseSensitive: true}
358371
}
359372

360373
const inlineRegexSymbolsDetectionRegex = new RegExp(inlineRegexSymbolsArrEscapedForRegex.join('|'), 'gi')
@@ -500,12 +513,14 @@ export const convertPlainStringToRegex = (s: string, actAs: RegexpUsedAs): Regex
500513
const [extractedPrefix, extractedSuffix] = s!.split(detectedSymbol)
501514
const regexPrefix: string = regexMatchesStart ? '^' : ''
502515
const regexSuffix: string = regexMatchesEnding ? '$' : ''
503-
const escapedProcessedPrefix: string = convertInlineRegexSymbolsAndEscapeTheRest(extractedPrefix)
504-
const escapedProcessedSuffix: string = convertInlineRegexSymbolsAndEscapeTheRest(extractedSuffix)
505-
const regexFlags: string = replacement.unicodeRegex ? 'ui' : 'i'
516+
const escapedProcessedPrefix: RegexAsString = convertInlineRegexSymbolsAndEscapeTheRest(extractedPrefix)
517+
const escapedProcessedSuffix: RegexAsString = convertInlineRegexSymbolsAndEscapeTheRest(extractedSuffix)
518+
const regexUnicode: boolean = !!replacement.unicodeRegex || !!escapedProcessedPrefix.isUnicodeRegex || !!escapedProcessedSuffix.isUnicodeRegex
519+
const regexCaseSensitive: boolean = !!escapedProcessedPrefix.isCaseSensitiveRegex || !!escapedProcessedSuffix.isCaseSensitiveRegex
520+
const regexFlags: string = `${regexUnicode?'u':''}${regexCaseSensitive?'':'i'}`
506521
return {
507522
regexpSpec: {
508-
regex: new RegExp(`${regexPrefix}${escapedProcessedPrefix}${replacement.regexpStr}${escapedProcessedSuffix}${regexSuffix}`, regexFlags),
523+
regex: new RegExp(`${regexPrefix}${escapedProcessedPrefix.s}${replacement.regexpStr}${escapedProcessedSuffix.s}${regexSuffix}`, regexFlags),
509524
normalizerFn: replacement.normalizerFn
510525
},
511526
prefix: extractedPrefix,
@@ -516,9 +531,10 @@ export const convertPlainStringToRegex = (s: string, actAs: RegexpUsedAs): Regex
516531
const replacement: RegexAsString = convertInlineRegexSymbolsAndEscapeTheRest(s)!
517532
const regexPrefix: string = regexMatchesStart ? '^' : ''
518533
const regexSuffix: string = regexMatchesEnding ? '$' : ''
534+
const regexFlags: string = `${replacement.isUnicodeRegex?'u':''}${replacement.isCaseSensitiveRegex?'':'i'}`
519535
return {
520536
regexpSpec: {
521-
regex: new RegExp(`${regexPrefix}${replacement}${regexSuffix}`, 'i')
537+
regex: new RegExp(`${regexPrefix}${replacement.s}${regexSuffix}`, regexFlags)
522538
},
523539
prefix: '', // shouldn't be used anyway because of the below containsAdvancedRegex: false
524540
suffix: '', // ---- // ----
@@ -529,14 +545,22 @@ export const convertPlainStringToRegex = (s: string, actAs: RegexpUsedAs): Regex
529545
}
530546
}
531547

532-
type RegexAsString = string
548+
export interface RegexAsString {
549+
s: string
550+
isUnicodeRegex?: boolean
551+
isCaseSensitiveRegex?: boolean
552+
}
533553

534554
export const convertInlineRegexSymbolsAndEscapeTheRest = (s: string): RegexAsString => {
535555
if (s === '') {
536-
return s
556+
return {
557+
s: s
558+
}
537559
}
538560

539561
let regexAsString: Array<string> = []
562+
let isUnicode: boolean = false
563+
let isCaseSensitive: boolean = false
540564

541565
while (s!.length > 0) {
542566
// detect the first inline regex
@@ -562,15 +586,22 @@ export const convertInlineRegexSymbolsAndEscapeTheRest = (s: string): RegexAsStr
562586
regexAsString.push(escapeRegexUnsafeCharacters(charsBeforeRegexSymbol))
563587
s = s!.substring(earliestRegexSymbolIdx)
564588
}
565-
regexAsString.push(inlineRegexSymbolsToRegexExpressionsArr[earliestRegexSymbol!])
589+
const expr = inlineRegexSymbolsToRegexExpressionsArr[earliestRegexSymbol!]
590+
regexAsString.push(expr.regexExpr)
591+
isUnicode ||= !!expr.isUnicode
592+
isCaseSensitive ||= !!expr.isCaseSensitive
566593
s = s!.substring(earliestRegexSymbol!.length)
567594
} else {
568595
regexAsString.push(escapeRegexUnsafeCharacters(s))
569596
s = ''
570597
}
571598
}
572599

573-
return regexAsString.join('')
600+
return {
601+
s: regexAsString.join(''),
602+
isUnicodeRegex: isUnicode,
603+
isCaseSensitiveRegex: isCaseSensitive
604+
}
574605
}
575606

576607
export const MatchFolderNameLexeme: string = 'name:'

0 commit comments

Comments
 (0)