4
4
5
5
using System ;
6
6
using System . Collections . Generic ;
7
+ using System . Linq ;
8
+ using System . Text . RegularExpressions ;
7
9
using System . Threading . Tasks ;
8
10
using System . Xml ;
9
11
using ManagedCommon ;
@@ -14,6 +16,10 @@ namespace AdvancedPaste.Helpers
14
16
{
15
17
internal static class JsonHelper
16
18
{
19
+ // List of supported CSV delimiters and Regex to detect separator property
20
+ private static readonly char [ ] CsvDelimArry = [ ',' , ';' , '\t ' ] ;
21
+ private static readonly Regex CsvSepIdentifierRegex = new Regex ( @"^sep=(.)$" , RegexOptions . IgnoreCase ) ;
22
+
17
23
internal static string ToJsonFromXmlOrCsv ( DataPackageView clipboardData )
18
24
{
19
25
Logger . LogTrace ( ) ;
@@ -53,11 +59,31 @@ internal static string ToJsonFromXmlOrCsv(DataPackageView clipboardData)
53
59
{
54
60
var csv = new List < string [ ] > ( ) ;
55
61
56
- foreach ( var line in text . Split ( new string [ ] { Environment . NewLine } , StringSplitOptions . RemoveEmptyEntries ) )
62
+ string [ ] lines = text . Split ( new string [ ] { Environment . NewLine } , StringSplitOptions . RemoveEmptyEntries ) ;
63
+
64
+ // Detect the csv delimiter and the count of occurrence based on the first two csv lines.
65
+ GetCsvDelimiter ( lines , out char delim , out int delimCount ) ;
66
+
67
+ foreach ( var line in lines )
57
68
{
58
- csv . Add ( line . Split ( "," ) ) ;
69
+ // If line is separator property line, then skip it
70
+ if ( CsvSepIdentifierRegex . IsMatch ( line ) )
71
+ {
72
+ continue ;
73
+ }
74
+
75
+ // A CSV line is valid, if the delimiter occurs more or equal times in every line compared to the first data line. (More because sometimes the delimiter occurs in a data string.)
76
+ if ( line . Count ( x => x == delim ) >= delimCount )
77
+ {
78
+ csv . Add ( line . Split ( delim ) ) ;
79
+ }
80
+ else
81
+ {
82
+ throw new FormatException ( "Invalid CSV format: Number of delimiters wrong in the current line." ) ;
83
+ }
59
84
}
60
85
86
+ Logger . LogDebug ( "Convert from csv." ) ;
61
87
jsonText = JsonConvert . SerializeObject ( csv , Newtonsoft . Json . Formatting . Indented ) ;
62
88
}
63
89
}
@@ -66,7 +92,79 @@ internal static string ToJsonFromXmlOrCsv(DataPackageView clipboardData)
66
92
Logger . LogError ( "Failed parsing input as csv" , ex ) ;
67
93
}
68
94
95
+ // Try convert Plain Text
96
+ try
97
+ {
98
+ if ( string . IsNullOrEmpty ( jsonText ) )
99
+ {
100
+ var plainText = new List < string > ( ) ;
101
+
102
+ foreach ( var line in text . Split ( new string [ ] { Environment . NewLine } , StringSplitOptions . RemoveEmptyEntries ) )
103
+ {
104
+ plainText . Add ( line ) ;
105
+ }
106
+
107
+ Logger . LogDebug ( "Convert from plain text." ) ;
108
+ jsonText = JsonConvert . SerializeObject ( plainText , Newtonsoft . Json . Formatting . Indented ) ;
109
+ }
110
+ }
111
+ catch ( Exception ex )
112
+ {
113
+ Logger . LogError ( "Failed parsing input as plain text" , ex ) ;
114
+ }
115
+
69
116
return string . IsNullOrEmpty ( jsonText ) ? text : jsonText ;
70
117
}
118
+
119
+ private static void GetCsvDelimiter ( in string [ ] csvLines , out char delimiter , out int delimiterCount )
120
+ {
121
+ delimiter = '\0 ' ; // Unicode "null" character.
122
+ delimiterCount = 0 ;
123
+
124
+ if ( csvLines . Length > 1 )
125
+ {
126
+ // Try to select the delimiter based on the separator property.
127
+ Match matchChar = CsvSepIdentifierRegex . Match ( csvLines [ 0 ] ) ;
128
+ if ( matchChar . Success )
129
+ {
130
+ // We can do matchChar[0] as the match only returns one character.
131
+ // We get the count from the second line, as the first one only contains the character definition and not a CSV data line.
132
+ char delimChar = matchChar . Groups [ 1 ] . Value . Trim ( ) [ 0 ] ;
133
+ delimiter = delimChar ;
134
+ delimiterCount = csvLines [ 1 ] . Count ( x => x == delimChar ) ;
135
+ }
136
+ }
137
+
138
+ if ( csvLines . Length > 0 && delimiterCount == 0 )
139
+ {
140
+ // Try to select the correct delimiter based on the first two CSV lines from a list of predefined delimiters.
141
+ foreach ( char c in CsvDelimArry )
142
+ {
143
+ int cntFirstLine = csvLines [ 0 ] . Count ( x => x == c ) ;
144
+ int cntNextLine = 0 ; // Default to 0 that the 'second line' check is always true.
145
+
146
+ // Additional count if we have more than one line
147
+ if ( csvLines . Length >= 2 )
148
+ {
149
+ cntNextLine = csvLines [ 1 ] . Count ( x => x == c ) ;
150
+ }
151
+
152
+ // The delimiter is found if the count is bigger as from the last selected delimiter
153
+ // and if the next csv line does not exist or has the same number or more occurrences of the delimiter.
154
+ // (We check the next line to prevent false positives.)
155
+ if ( cntFirstLine > delimiterCount && ( cntNextLine == 0 || cntNextLine >= cntFirstLine ) )
156
+ {
157
+ delimiter = c ;
158
+ delimiterCount = cntFirstLine ;
159
+ }
160
+ }
161
+ }
162
+
163
+ // If the delimiter count is 0, we can't detect it and it is no valid CSV.
164
+ if ( delimiterCount == 0 )
165
+ {
166
+ throw new FormatException ( "Invalid CSV format: Failed to detect the delimiter." ) ;
167
+ }
168
+ }
71
169
}
72
170
}
0 commit comments