Skip to content

Commit 07847d4

Browse files
committed
Move JSON utility into separate package
1 parent d3a4d0c commit 07847d4

19 files changed

+4673
-0
lines changed

mxdump/pom.xml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,16 @@
3636
<groupId>org.gridkit.jvmtool</groupId>
3737
<artifactId>sjk-core</artifactId>
3838
</dependency>
39+
<dependency>
40+
<groupId>org.gridkit.jvmtool</groupId>
41+
<artifactId>sjk-json</artifactId>
42+
</dependency>
43+
<dependency>
44+
<groupId>org.apache.tomcat.embed</groupId>
45+
<artifactId>tomcat-embed-core</artifactId>
46+
<version>8.5.16</version>
47+
<scope>test</scope>
48+
</dependency>
3949
<dependency>
4050
<groupId>junit</groupId>
4151
<artifactId>junit</artifactId>

pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
<module>sjk-hflame</module>
4040
<module>sjk-win32</module>
4141
<module>sjk-core</module>
42+
<module>sjk-json</module>
4243
<module>mxdump</module>
4344
<module>gcflow</module>
4445
<module>sjk</module>
@@ -99,6 +100,11 @@
99100
<artifactId>mxdump</artifactId>
100101
<version>${project.version}</version>
101102
</dependency>
103+
<dependency>
104+
<groupId>org.gridkit.jvmtool</groupId>
105+
<artifactId>sjk-json</artifactId>
106+
<version>${project.version}</version>
107+
</dependency>
102108
<dependency>
103109
<groupId>org.gridkit.jvmtool</groupId>
104110
<artifactId>ygc-bench</artifactId>

sjk-json/pom.xml

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
2+
3+
<!--
4+
5+
Copyright 2012 Alexey Ragozin
6+
7+
Licensed under the Apache License, Version 2.0 (the "License");
8+
you may not use this file except in compliance with the License.
9+
You may obtain a copy of the License at
10+
11+
http://www.apache.org/licenses/LICENSE-2.0
12+
13+
Unless required by applicable law or agreed to in writing, software
14+
distributed under the License is distributed on an "AS IS" BASIS,
15+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
See the License for the specific language governing permissions and
17+
limitations under the License.
18+
19+
-->
20+
21+
<modelVersion>4.0.0</modelVersion>
22+
23+
<parent>
24+
<groupId>org.gridkit.jvmtool</groupId>
25+
<artifactId>jvmtool-umbrella-pom</artifactId>
26+
<version>0.13-SNAPSHOT</version>
27+
<relativePath>../pom.xml</relativePath>
28+
</parent>
29+
30+
<artifactId>sjk-json</artifactId>
31+
<name>${project.groupId}::${project.artifactId}</name>
32+
<description>Minimal JSON support</description>
33+
34+
<dependencies>
35+
<dependency>
36+
<groupId>junit</groupId>
37+
<artifactId>junit</artifactId>
38+
<scope>test</scope>
39+
</dependency>
40+
<dependency>
41+
<groupId>org.assertj</groupId>
42+
<artifactId>assertj-core</artifactId>
43+
<scope>test</scope>
44+
</dependency>
45+
<dependency>
46+
<groupId>com.carrotsearch</groupId>
47+
<artifactId>junit-benchmarks</artifactId>
48+
<scope>test</scope>
49+
</dependency>
50+
</dependencies>
51+
52+
<build>
53+
<plugins>
54+
<plugin>
55+
<groupId>org.apache.maven.plugins</groupId>
56+
<artifactId>maven-compiler-plugin</artifactId>
57+
<configuration>
58+
<source>1.6</source>
59+
<target>1.6</target>
60+
</configuration>
61+
</plugin>
62+
</plugins>
63+
</build>
64+
</project>
Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
package org.gridkit.jvmtool.jackson;
2+
3+
import java.util.Arrays;
4+
5+
public final class CharTypes
6+
{
7+
private final static char[] HEX_CHARS = "0123456789ABCDEF".toCharArray();
8+
private final static byte[] HEX_BYTES;
9+
static {
10+
int len = HEX_CHARS.length;
11+
HEX_BYTES = new byte[len];
12+
for (int i = 0; i < len; ++i) {
13+
HEX_BYTES[i] = (byte) HEX_CHARS[i];
14+
}
15+
}
16+
17+
18+
/**
19+
* Lookup table used for determining which input characters
20+
* need special handling when contained in text segment.
21+
*/
22+
final static int[] sInputCodes;
23+
static {
24+
/* 96 would do for most cases (backslash is ascii 94)
25+
* but if we want to do lookups by raw bytes it's better
26+
* to have full table
27+
*/
28+
int[] table = new int[256];
29+
// Control chars and non-space white space are not allowed unquoted
30+
for (int i = 0; i < 32; ++i) {
31+
table[i] = -1;
32+
}
33+
// And then string end and quote markers are special too
34+
table['"'] = 1;
35+
table['\\'] = 1;
36+
sInputCodes = table;
37+
}
38+
39+
/**
40+
* Additionally we can combine UTF-8 decoding info into similar
41+
* data table.
42+
*/
43+
final static int[] sInputCodesUtf8;
44+
static {
45+
int[] table = new int[sInputCodes.length];
46+
System.arraycopy(sInputCodes, 0, table, 0, sInputCodes.length);
47+
for (int c = 128; c < 256; ++c) {
48+
int code;
49+
50+
// We'll add number of bytes needed for decoding
51+
if ((c & 0xE0) == 0xC0) { // 2 bytes (0x0080 - 0x07FF)
52+
code = 2;
53+
} else if ((c & 0xF0) == 0xE0) { // 3 bytes (0x0800 - 0xFFFF)
54+
code = 3;
55+
} else if ((c & 0xF8) == 0xF0) {
56+
// 4 bytes; double-char with surrogates and all...
57+
code = 4;
58+
} else {
59+
// And -1 seems like a good "universal" error marker...
60+
code = -1;
61+
}
62+
table[c] = code;
63+
}
64+
sInputCodesUtf8 = table;
65+
}
66+
67+
/**
68+
* To support non-default (and -standard) unquoted field names mode,
69+
* need to have alternate checking.
70+
* Basically this is list of 8-bit ASCII characters that are legal
71+
* as part of Javascript identifier
72+
*
73+
* @since 1.2
74+
*/
75+
final static int[] sInputCodesJsNames;
76+
static {
77+
int[] table = new int[256];
78+
// Default is "not a name char", mark ones that are
79+
Arrays.fill(table, -1);
80+
// Assume rules with JS same as Java (change if/as needed)
81+
for (int i = 33; i < 256; ++i) {
82+
if (Character.isJavaIdentifierPart((char) i)) {
83+
table[i] = 0;
84+
}
85+
}
86+
/* As per [JACKSON-267], '@', '#' and '*' are also to be accepted as well.
87+
* And '-' (for hyphenated names); and '+' for sake of symmetricity...
88+
*/
89+
table['@'] = 0;
90+
table['#'] = 0;
91+
table['*'] = 0;
92+
table['-'] = 0;
93+
table['+'] = 0;
94+
sInputCodesJsNames = table;
95+
}
96+
97+
/**
98+
* This table is similar to Latin-1, except that it marks all "high-bit"
99+
* code as ok. They will be validated at a later point, when decoding
100+
* name
101+
*/
102+
final static int[] sInputCodesUtf8JsNames;
103+
static {
104+
int[] table = new int[256];
105+
// start with 8-bit JS names
106+
System.arraycopy(sInputCodesJsNames, 0, table, 0, sInputCodesJsNames.length);
107+
Arrays.fill(table, 128, 128, 0);
108+
sInputCodesUtf8JsNames = table;
109+
}
110+
111+
/**
112+
* Decoding table used to quickly determine characters that are
113+
* relevant within comment content
114+
*/
115+
final static int[] sInputCodesComment = new int[256];
116+
static {
117+
// but first: let's start with UTF-8 multi-byte markers:
118+
System.arraycopy(sInputCodesUtf8, 128, sInputCodesComment, 128, 128);
119+
120+
// default (0) means "ok" (skip); -1 invalid, others marked by char itself
121+
Arrays.fill(sInputCodesComment, 0, 32, -1); // invalid white space
122+
sInputCodesComment['\t'] = 0; // tab is still fine
123+
sInputCodesComment['\n'] = '\n'; // lf/cr need to be observed, ends cpp comment
124+
sInputCodesComment['\r'] = '\r';
125+
sInputCodesComment['*'] = '*'; // end marker for c-style comments
126+
}
127+
128+
/**
129+
* Lookup table used for determining which output characters in
130+
* 7-bit ASCII range need to be quoted.
131+
*/
132+
final static int[] sOutputEscapes128;
133+
static {
134+
int[] table = new int[128];
135+
// Control chars need generic escape sequence
136+
for (int i = 0; i < 32; ++i) {
137+
// 04-Mar-2011, tatu: Used to use "-(i + 1)", replaced with constants
138+
table[i] = CharacterEscapes.ESCAPE_STANDARD;
139+
}
140+
/* Others (and some within that range too) have explicit shorter
141+
* sequences
142+
*/
143+
table['"'] = '"';
144+
table['\\'] = '\\';
145+
// Escaping of slash is optional, so let's not add it
146+
table[0x08] = 'b';
147+
table[0x09] = 't';
148+
table[0x0C] = 'f';
149+
table[0x0A] = 'n';
150+
table[0x0D] = 'r';
151+
sOutputEscapes128 = table;
152+
}
153+
154+
/**
155+
* Lookup table for the first 128 Unicode characters (7-bit ASCII)
156+
* range. For actual hex digits, contains corresponding value;
157+
* for others -1.
158+
*/
159+
final static int[] sHexValues = new int[128];
160+
static {
161+
Arrays.fill(sHexValues, -1);
162+
for (int i = 0; i < 10; ++i) {
163+
sHexValues['0' + i] = i;
164+
}
165+
for (int i = 0; i < 6; ++i) {
166+
sHexValues['a' + i] = 10 + i;
167+
sHexValues['A' + i] = 10 + i;
168+
}
169+
}
170+
171+
public final static int[] getInputCodeLatin1() { return sInputCodes; }
172+
public final static int[] getInputCodeUtf8() { return sInputCodesUtf8; }
173+
174+
public final static int[] getInputCodeLatin1JsNames() { return sInputCodesJsNames; }
175+
public final static int[] getInputCodeUtf8JsNames() { return sInputCodesUtf8JsNames; }
176+
177+
public final static int[] getInputCodeComment() { return sInputCodesComment; }
178+
179+
/**
180+
* Accessor for getting a read-only encoding table for first 128 Unicode
181+
* code points (single-byte UTF-8 characters).
182+
* Value of 0 means "no escaping"; other positive values that value is character
183+
* to use after backslash; and negative values that generic (backslash - u)
184+
* escaping is to be used.
185+
*/
186+
public final static int[] get7BitOutputEscapes() { return sOutputEscapes128; }
187+
188+
public static int charToHex(int ch)
189+
{
190+
return (ch > 127) ? -1 : sHexValues[ch];
191+
}
192+
193+
public static void appendQuoted(StringBuilder sb, String content)
194+
{
195+
final int[] escCodes = sOutputEscapes128;
196+
int escLen = escCodes.length;
197+
for (int i = 0, len = content.length(); i < len; ++i) {
198+
char c = content.charAt(i);
199+
if (c >= escLen || escCodes[c] == 0) {
200+
sb.append(c);
201+
continue;
202+
}
203+
sb.append('\\');
204+
int escCode = escCodes[c];
205+
if (escCode < 0) { // generic quoting (hex value)
206+
// We know that it has to fit in just 2 hex chars
207+
sb.append('u');
208+
sb.append('0');
209+
sb.append('0');
210+
int value = -(escCode + 1);
211+
sb.append(HEX_CHARS[value >> 4]);
212+
sb.append(HEX_CHARS[value & 0xF]);
213+
} else { // "named", i.e. prepend with slash
214+
sb.append((char) escCode);
215+
}
216+
}
217+
}
218+
219+
/**
220+
* @since 1.6
221+
*/
222+
public static char[] copyHexChars()
223+
{
224+
return (char[]) HEX_CHARS.clone();
225+
}
226+
227+
/**
228+
* @since 1.6
229+
*/
230+
public static byte[] copyHexBytes()
231+
{
232+
return (byte[]) HEX_BYTES.clone();
233+
}
234+
}
235+
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
package org.gridkit.jvmtool.jackson;
2+
3+
/**
4+
* Abstract base class that defines interface for customizing character
5+
* escaping aspects for String values, for formats that use escaping.
6+
* For JSON this applies to both property names and String values.
7+
*
8+
* @since 1.8
9+
*/
10+
public abstract class CharacterEscapes
11+
{
12+
/**
13+
* Value used for lookup tables to indicate that matching characters
14+
* do not need to be escaped.
15+
*/
16+
public final static int ESCAPE_NONE = 0;
17+
18+
/**
19+
* Value used for lookup tables to indicate that matching characters
20+
* are to be escaped using standard escaping; for JSON this means
21+
* (for example) using "backslash - u" escape method.
22+
*/
23+
public final static int ESCAPE_STANDARD = -1;
24+
25+
/**
26+
* Value used for lookup tables to indicate that matching characters
27+
* will need custom escapes; and that another call
28+
* to {@link #getEscapeSequence} is needed to figure out exact escape
29+
* sequence to output.
30+
*/
31+
public final static int ESCAPE_CUSTOM = -2;
32+
33+
/**
34+
* Helper method that can be used to get a copy of standard JSON
35+
* escape definitions; this is useful when just wanting to slightly
36+
* customize definitions. Caller can modify this array as it sees
37+
* fit and usually returns modified instance via {@link #getEscapeCodesForAscii}
38+
*/
39+
public static int[] standardAsciiEscapesForJSON()
40+
{
41+
int[] esc = CharTypes.get7BitOutputEscapes();
42+
int len = esc.length;
43+
int[] result = new int[len];
44+
System.arraycopy(esc, 0, result, 0, esc.length);
45+
return result;
46+
}
47+
}

0 commit comments

Comments
 (0)