Skip to content

Commit ed249bc

Browse files
authored
[AdvPaste][JSON]Improve delimiter handling for CSV and add plain text parser (#33199)
* code changes * rework code * improvement * regex: ignore case * spell fixes * update regex * fixes * more fixes
1 parent fb7a85e commit ed249bc

File tree

1 file changed

+100
-2
lines changed

1 file changed

+100
-2
lines changed

src/modules/AdvancedPaste/AdvancedPaste/Helpers/JsonHelper.cs

+100-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
using System;
66
using System.Collections.Generic;
7+
using System.Linq;
8+
using System.Text.RegularExpressions;
79
using System.Threading.Tasks;
810
using System.Xml;
911
using ManagedCommon;
@@ -14,6 +16,10 @@ namespace AdvancedPaste.Helpers
1416
{
1517
internal static class JsonHelper
1618
{
19+
// List of supported CSV delimiters and Regex to detect separator property
20+
private static readonly char[] CsvDelimArry = [',', ';', '\t'];
21+
private static readonly Regex CsvSepIdentifierRegex = new Regex(@"^sep=(.)$", RegexOptions.IgnoreCase);
22+
1723
internal static string ToJsonFromXmlOrCsv(DataPackageView clipboardData)
1824
{
1925
Logger.LogTrace();
@@ -53,11 +59,31 @@ internal static string ToJsonFromXmlOrCsv(DataPackageView clipboardData)
5359
{
5460
var csv = new List<string[]>();
5561

56-
foreach (var line in text.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries))
62+
string[] lines = text.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);
63+
64+
// Detect the csv delimiter and the count of occurrence based on the first two csv lines.
65+
GetCsvDelimiter(lines, out char delim, out int delimCount);
66+
67+
foreach (var line in lines)
5768
{
58-
csv.Add(line.Split(","));
69+
// If line is separator property line, then skip it
70+
if (CsvSepIdentifierRegex.IsMatch(line))
71+
{
72+
continue;
73+
}
74+
75+
// A CSV line is valid, if the delimiter occurs more or equal times in every line compared to the first data line. (More because sometimes the delimiter occurs in a data string.)
76+
if (line.Count(x => x == delim) >= delimCount)
77+
{
78+
csv.Add(line.Split(delim));
79+
}
80+
else
81+
{
82+
throw new FormatException("Invalid CSV format: Number of delimiters wrong in the current line.");
83+
}
5984
}
6085

86+
Logger.LogDebug("Convert from csv.");
6187
jsonText = JsonConvert.SerializeObject(csv, Newtonsoft.Json.Formatting.Indented);
6288
}
6389
}
@@ -66,7 +92,79 @@ internal static string ToJsonFromXmlOrCsv(DataPackageView clipboardData)
6692
Logger.LogError("Failed parsing input as csv", ex);
6793
}
6894

95+
// Try convert Plain Text
96+
try
97+
{
98+
if (string.IsNullOrEmpty(jsonText))
99+
{
100+
var plainText = new List<string>();
101+
102+
foreach (var line in text.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries))
103+
{
104+
plainText.Add(line);
105+
}
106+
107+
Logger.LogDebug("Convert from plain text.");
108+
jsonText = JsonConvert.SerializeObject(plainText, Newtonsoft.Json.Formatting.Indented);
109+
}
110+
}
111+
catch (Exception ex)
112+
{
113+
Logger.LogError("Failed parsing input as plain text", ex);
114+
}
115+
69116
return string.IsNullOrEmpty(jsonText) ? text : jsonText;
70117
}
118+
119+
private static void GetCsvDelimiter(in string[] csvLines, out char delimiter, out int delimiterCount)
120+
{
121+
delimiter = '\0'; // Unicode "null" character.
122+
delimiterCount = 0;
123+
124+
if (csvLines.Length > 1)
125+
{
126+
// Try to select the delimiter based on the separator property.
127+
Match matchChar = CsvSepIdentifierRegex.Match(csvLines[0]);
128+
if (matchChar.Success)
129+
{
130+
// We can do matchChar[0] as the match only returns one character.
131+
// We get the count from the second line, as the first one only contains the character definition and not a CSV data line.
132+
char delimChar = matchChar.Groups[1].Value.Trim()[0];
133+
delimiter = delimChar;
134+
delimiterCount = csvLines[1].Count(x => x == delimChar);
135+
}
136+
}
137+
138+
if (csvLines.Length > 0 && delimiterCount == 0)
139+
{
140+
// Try to select the correct delimiter based on the first two CSV lines from a list of predefined delimiters.
141+
foreach (char c in CsvDelimArry)
142+
{
143+
int cntFirstLine = csvLines[0].Count(x => x == c);
144+
int cntNextLine = 0; // Default to 0 that the 'second line' check is always true.
145+
146+
// Additional count if we have more than one line
147+
if (csvLines.Length >= 2)
148+
{
149+
cntNextLine = csvLines[1].Count(x => x == c);
150+
}
151+
152+
// The delimiter is found if the count is bigger as from the last selected delimiter
153+
// and if the next csv line does not exist or has the same number or more occurrences of the delimiter.
154+
// (We check the next line to prevent false positives.)
155+
if (cntFirstLine > delimiterCount && (cntNextLine == 0 || cntNextLine >= cntFirstLine))
156+
{
157+
delimiter = c;
158+
delimiterCount = cntFirstLine;
159+
}
160+
}
161+
}
162+
163+
// If the delimiter count is 0, we can't detect it and it is no valid CSV.
164+
if (delimiterCount == 0)
165+
{
166+
throw new FormatException("Invalid CSV format: Failed to detect the delimiter.");
167+
}
168+
}
71169
}
72170
}

0 commit comments

Comments
 (0)