Skip to content

Commit e3d42d2

Browse files
Add support for excluding specific tokens from Roman numeral conversion in metadata processing
1 parent 9d6c927 commit e3d42d2

File tree

1 file changed

+51
-9
lines changed

1 file changed

+51
-9
lines changed

hasheous-lib/Classes/DataObjects.cs

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1897,6 +1897,17 @@ public async Task DataObjectMetadataSearch(DataObjectType objectType, long? id,
18971897
// get all metadata sources
18981898
private static MetadataSources[] allMetadataSources = (MetadataSources[])Enum.GetValues(typeof(MetadataSources));
18991899

1900+
// Tokens commonly found in titles/platform names that should not be interpreted as Roman numerals.
1901+
private static readonly List<string> RomanConversionAcronymExclusions = new List<string>
1902+
{
1903+
"CD",
1904+
"DVD",
1905+
"PS",
1906+
"PSP",
1907+
"VR",
1908+
"3DO"
1909+
};
1910+
19001911
private async Task _DataObjectMetadataSearch(DataObjectType objectType, long? id, bool ForceSearch)
19011912
{
19021913
HashSet<MetadataSources> ProcessSources = [
@@ -2383,15 +2394,28 @@ void AddCandidate(string value)
23832394

23842395
void AddDelimiterVariants(string value)
23852396
{
2386-
if (value.Contains(" - ", StringComparison.Ordinal))
2397+
// normalize and expand delimiter variations so comparisons can match API results
2398+
// regardless of whether separators are spaces, hyphens, or colons.
2399+
string hyphenTight = Regex.Replace(value, @"\s*-\s*", "-");
2400+
string hyphenSpaced = Regex.Replace(value, @"\s*-\s*", " - ");
2401+
string hyphenToSpace = Regex.Replace(value, @"\s*-\s*", " ");
2402+
string hyphenToColon = Regex.Replace(value, @"\s*-\s*", ": ");
2403+
2404+
AddCandidate(hyphenTight);
2405+
AddCandidate(hyphenSpaced);
2406+
AddCandidate(hyphenToSpace);
2407+
AddCandidate(hyphenToColon);
2408+
2409+
// if a value has spaces but no hyphens, generate a hyphenated variant.
2410+
if (!value.Contains("-", StringComparison.Ordinal) && value.Contains(" ", StringComparison.Ordinal))
23872411
{
2388-
AddCandidate(value.Replace(" - ", ": "));
2389-
AddCandidate(value.Replace(" - ", " "));
2412+
AddCandidate(Regex.Replace(value, @"\s+", "-"));
23902413
}
23912414

2392-
if (value.Contains(": ", StringComparison.Ordinal))
2415+
if (value.Contains(":", StringComparison.Ordinal))
23932416
{
2394-
AddCandidate(value.Replace(": ", " "));
2417+
AddCandidate(Regex.Replace(value, @"\s*:\s*", " "));
2418+
AddCandidate(Regex.Replace(value, @"\s*:\s*", " - "));
23952419
}
23962420
}
23972421

@@ -2452,7 +2476,20 @@ void AddArticleVariants(string value)
24522476
{
24532477
string romanConverted = Regex.Replace(candidate, @"\b[IVXLCDM]+\b", match =>
24542478
{
2455-
return Common.RomanNumerals.RomanToInt(match.Value).ToString();
2479+
string token = match.Value;
2480+
2481+
if (RomanConversionAcronymExclusions.Contains(token, StringComparer.OrdinalIgnoreCase))
2482+
{
2483+
return token;
2484+
}
2485+
2486+
int parsed = Common.RomanNumerals.RomanToInt(token);
2487+
if (parsed >= 1 && parsed <= 30)
2488+
{
2489+
return parsed.ToString();
2490+
}
2491+
2492+
return token;
24562493
}, RegexOptions.IgnoreCase);
24572494

24582495
if (!string.Equals(romanConverted, candidate, StringComparison.Ordinal))
@@ -2466,7 +2503,7 @@ void AddArticleVariants(string value)
24662503
{
24672504
string numberToRoman = Regex.Replace(candidate, @"\b(\d+)\b", match =>
24682505
{
2469-
if (int.TryParse(match.Groups[1].Value, out int num) && num >= 1 && num <= 3999)
2506+
if (int.TryParse(match.Groups[1].Value, out int num) && num >= 1 && num <= 30)
24702507
{
24712508
return Common.RomanNumerals.IntToRoman(num);
24722509
}
@@ -2485,7 +2522,7 @@ void AddArticleVariants(string value)
24852522
// Convert numbers to words
24862523
string numberToWords = Regex.Replace(candidate, @"\b(\d+)\b", match =>
24872524
{
2488-
if (int.TryParse(match.Groups[1].Value, out int num) && num >= 0 && num <= 999999999)
2525+
if (int.TryParse(match.Groups[1].Value, out int num) && num >= 1 && num <= 30)
24892526
{
24902527
return Common.Numbers.NumberToWords(num);
24912528
}
@@ -2501,7 +2538,12 @@ void AddArticleVariants(string value)
25012538
string wordsToNumber = Regex.Replace(candidate, @"\b(?:Zero|One|Two|Three|Four|Five|Six|Seven|Eight|Nine|Ten|Eleven|Twelve|Thirteen|Fourteen|Fifteen|Sixteen|Seventeen|Eighteen|Nineteen|Twenty|Thirty|Forty|Fifty|Sixty|Seventy|Eighty|Ninety|Hundred|Thousand|Million|Billion)(?:\s+(?:Zero|One|Two|Three|Four|Five|Six|Seven|Eight|Nine|Ten|Eleven|Twelve|Thirteen|Fourteen|Fifteen|Sixteen|Seventeen|Eighteen|Nineteen|Twenty|Thirty|Forty|Fifty|Sixty|Seventy|Eighty|Ninety|Hundred|Thousand|Million|Billion))*\b", match =>
25022539
{
25032540
var result = Common.Numbers.WordsToNumbers(match.Value);
2504-
return result.HasValue ? result.Value.ToString() : match.Value;
2541+
if (result.HasValue && result.Value >= 1 && result.Value <= 30)
2542+
{
2543+
return result.Value.ToString();
2544+
}
2545+
2546+
return match.Value;
25052547
}, RegexOptions.IgnoreCase);
25062548

25072549
if (!string.Equals(wordsToNumber, candidate, StringComparison.Ordinal))

0 commit comments

Comments
 (0)