Skip to content

Commit 532c8c1

Browse files
committed
Util compile
1 parent 12e3692 commit 532c8c1

3 files changed

Lines changed: 123 additions & 6 deletions

File tree

test/src/java/com/github/oeuvres/alix/util/ChainTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public void normalizeString()
4545
"1 2",
4646
};
4747
for (int i = 0; i < phrases.length; i++) {
48-
String result = Chain.normalize(phrases[i], " \n\t\r", ' ');
48+
String result = Chain.normalizeSpace(phrases[i]);
4949
assertEquals( expected[i], result);
5050
}
5151
}

util/src/java/com/github/oeuvres/alix/util/Chain.java

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import java.nio.charset.CodingErrorAction;
4242
import java.nio.charset.StandardCharsets;
4343
import java.util.Arrays;
44+
import java.util.LinkedList;
4445

4546
/**
4647
* Mutable {@link CharSequence} backed by a growable {@code char[]} with slack
@@ -1033,6 +1034,61 @@ public boolean lastIs(final char c)
10331034
return this.length > 0 && chars[zero + this.length - 1] == c;
10341035
}
10351036

1037+
/**
1038+
* Like xpath normalize-space(), normalize a char used as a separator, maybe
1039+
* useful for url paths.
1040+
*
1041+
* @param cs the char sequence to normalize.
1042+
* @return a normalized String.
1043+
*/
1044+
static public String normalizeSpace(final CharSequence cs)
1045+
{
1046+
return normalizeSpace(cs," \n\t\r", ' ');
1047+
}
1048+
1049+
/**
1050+
* Like xpath normalize-space(), replace a set of chars, maybe repeated,
1051+
* for example space chars "\t\n  ",
1052+
* by only one char, for example space ' '.
1053+
*
1054+
* @param cs a char sequence to normalize.
1055+
* @param search a set of chars to normalize ex: "\t\n  ".
1056+
* @param replace a normalized char ex: ' '.
1057+
* @return a new normalized String.
1058+
*/
1059+
static public String normalizeSpace(final CharSequence cs, final String search, final char replace)
1060+
{
1061+
// create a new char array, not bigger than actual size
1062+
final int len = cs.length();
1063+
char[] newChars = new char[len];
1064+
int length = 0;
1065+
boolean sepToAppend = false;
1066+
boolean lastIsFullChar = false;
1067+
for (int i = 0; i < len; i++) {
1068+
final char c = cs.charAt(i);
1069+
// full char, append
1070+
if (search.indexOf(c) == -1) {
1071+
lastIsFullChar = true;
1072+
// append a separator only before a token to append
1073+
if (sepToAppend) {
1074+
newChars[length++] = replace;
1075+
sepToAppend = false;
1076+
}
1077+
newChars[length++] = c;
1078+
continue;
1079+
}
1080+
// separator
1081+
if (!lastIsFullChar) {
1082+
// previous was start or separator, append nothing
1083+
continue;
1084+
}
1085+
// append separator
1086+
lastIsFullChar = false;
1087+
sepToAppend = true;
1088+
}
1089+
return new String(newChars, 0, length);
1090+
}
1091+
10361092
/**
10371093
* Inspect the most recently pushed checkpoint without popping it.
10381094
*
@@ -1321,6 +1377,72 @@ public void setLengthI(final int newLength)
13211377
this.length = newLength;
13221378
this.hash = 0;
13231379
}
1380+
1381+
/**
1382+
* Split on one char.
1383+
*
1384+
* @param separator a char, ex: ',', ' ', ';'…
1385+
* @return array of segments separated.
1386+
*/
1387+
public String[] split(final char separator)
1388+
{
1389+
// store generated Strings in alist
1390+
LinkedList<String> list = new LinkedList<>();
1391+
int offset = zero;
1392+
int to = zero;
1393+
int max = zero + this.length;
1394+
char[] dat = chars;
1395+
while (to < max) {
1396+
// not separator, continue
1397+
if (dat[to] != separator) {
1398+
to++;
1399+
continue;
1400+
}
1401+
// separator, add a String, if not empty
1402+
if (to - offset > 0) {
1403+
list.add(new String(dat, offset, to - offset));
1404+
}
1405+
offset = ++to;
1406+
}
1407+
// separator, add a String, if not empty
1408+
if (to - offset > 0) {
1409+
list.add(new String(dat, offset, to - offset));
1410+
}
1411+
return list.toArray(new String[0]);
1412+
}
1413+
1414+
/**
1415+
* Split on one or more char.
1416+
*
1417+
* @param separators, ex: ",; ".
1418+
* @return array of segments separated.
1419+
*/
1420+
public String[] split(final String separators)
1421+
{
1422+
// store generated Strings in alist
1423+
LinkedList<String> list = new LinkedList<>();
1424+
int offset = zero;
1425+
int to = zero;
1426+
int max = zero + this.length;
1427+
char[] dat = chars;
1428+
while (to < max) {
1429+
// not separator, continue
1430+
if (separators.indexOf(dat[to]) == -1) {
1431+
to++;
1432+
continue;
1433+
}
1434+
// separator, add a String, if not empty
1435+
if (to - offset > 0) {
1436+
list.add(new String(dat, offset, to - offset));
1437+
}
1438+
offset = ++to;
1439+
}
1440+
// separator, add a String, if not empty
1441+
if (to - offset > 0) {
1442+
list.add(new String(dat, offset, to - offset));
1443+
}
1444+
return list.toArray(new String[0]);
1445+
}
13241446

13251447
/**
13261448
* Check whether the sequence starts with a given prefix.

util/src/java/com/github/oeuvres/alix/util/ML.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,12 @@
11
package com.github.oeuvres.alix.util;
22

3-
import java.io.BufferedReader;
43
import java.io.IOException;
5-
import java.io.InputStreamReader;
64
import java.io.Reader;
75
import java.io.StringReader;
86
import java.io.UnsupportedEncodingException;
9-
import java.nio.charset.StandardCharsets;
107
import java.nio.file.Files;
118
import java.nio.file.Paths;
129
import java.util.HashMap;
13-
import java.util.HashSet;
14-
import java.util.Set;
1510

1611
/**
1712
* Some useful tools to deal with “Markup Languages” (xml, but also html tag

0 commit comments

Comments
 (0)