Skip to content

Commit c1a5ee7

Browse files
committed
Chain rewrite
1 parent b431bcd commit c1a5ee7

5 files changed

Lines changed: 1583 additions & 1337 deletions

File tree

fr/src/resources/com/github/oeuvres/alix/fr/fr-lemma-ud.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# manquent les participes passés adjectifs
12
lemma,udpos,count,distinct_forms
23
le,DET,11102181127,4
34
de,ADP,8818296701,2

util/src/java/com/github/oeuvres/alix/util/CSVReader.java

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -130,12 +130,44 @@ public Row readRow() throws IOException
130130
Chain cell = row.next();
131131
int bufPos = this.bufPos;
132132
int bufMark = bufPos; // from where to start a copy
133-
char sep = this.sep; // localize
134-
boolean sep1 = (sep != 0);
135133
// char quote = this.quote;
136134
// boolean inquote;
137135
char lastChar = 0;
138136
int crlf = 0; // used to not append CR to a CRLF ending line
137+
138+
/*
139+
final ArrayList<String> out = new ArrayList<>();
140+
final StringBuilder sb = new StringBuilder();
141+
boolean inQuotes = false;
142+
for (int i = 0; i < line.length(); i++) {
143+
char ch = line.charAt(i);
144+
if (inQuotes) {
145+
if (ch == '"') {
146+
if (i + 1 < line.length() && line.charAt(i + 1) == '"') {
147+
sb.append('"');
148+
i++;
149+
} else {
150+
inQuotes = false;
151+
}
152+
} else {
153+
sb.append(ch);
154+
}
155+
} else {
156+
if (ch == '"') {
157+
inQuotes = true;
158+
} else if (ch == ',') {
159+
out.add(sb.toString());
160+
sb.setLength(0);
161+
} else {
162+
sb.append(ch);
163+
}
164+
}
165+
}
166+
out.add(sb.toString());
167+
return out;
168+
}
169+
*/
170+
boolean inQuotes = false;
139171
while (true) {
140172
// fill buffer
141173
if (bufLen == bufPos) {
@@ -170,10 +202,8 @@ else if (lastChar == CR)
170202
break;
171203
if (c == CR)
172204
continue;
173-
// exclude case of cell separator
174-
if (sep1) { // one char declared for cell separator
175-
if (c != sep)
176-
continue;
205+
if (sep != 0 && c != sep) {
206+
continue;
177207
} else { // nice sugar on common separators
178208
if (c != '\t' && c != ',' && c != ';')
179209
continue;
@@ -237,7 +267,7 @@ public Row reset()
237267
{
238268
this.pointer = 0;
239269
for (int i = cols - 1; i >= 0; i--) {
240-
cells[i].reset();
270+
cells[i].clear();
241271
}
242272
return this;
243273
}

0 commit comments

Comments
 (0)