aragozin
diff --git a/‎mxdump/pom.xml
Lines changed: 10 additions & 0 deletions b/‎mxdump/pom.xml
Lines changed: 10 additions & 0 deletions
diff --git a/‎pom.xml
Lines changed: 6 additions & 0 deletions b/‎pom.xml
Lines changed: 6 additions & 0 deletions
diff --git a/‎sjk-json/pom.xml
Lines changed: 64 additions & 0 deletions b/‎sjk-json/pom.xml
Lines changed: 64 additions & 0 deletions
diff --git a/‎sjk-json/src/main/java/org/gridkit/jvmtool/jackson/CharTypes.java
Lines changed: 235 additions & 0 deletions b/‎sjk-json/src/main/java/org/gridkit/jvmtool/jackson/CharTypes.java
Lines changed: 235 additions & 0 deletions
diff --git a/‎sjk-json/src/main/java/org/gridkit/jvmtool/jackson/CharacterEscapes.java
Lines changed: 47 additions & 0 deletions b/‎sjk-json/src/main/java/org/gridkit/jvmtool/jackson/CharacterEscapes.java
Lines changed: 47 additions & 0 deletions
@@ -36,6 +36,16 @@
             <groupId>org.gridkit.jvmtool</groupId>
             <artifactId>sjk-core</artifactId>
         </dependency>
+        <dependency>
+        	<groupId>org.gridkit.jvmtool</groupId>
+        	<artifactId>sjk-json</artifactId>
+        </dependency>
+        <dependency>
+        	<groupId>org.apache.tomcat.embed</groupId>
+        	<artifactId>tomcat-embed-core</artifactId>
+        	<version>8.5.16</version>
+            <scope>test</scope>
+        </dependency>
         <dependency>
             <groupId>junit</groupId>
             <artifactId>junit</artifactId>
 
@@ -39,6 +39,7 @@
     <module>sjk-hflame</module>
     <module>sjk-win32</module>
     <module>sjk-core</module>
+    <module>sjk-json</module>
     <module>mxdump</module>
     <module>gcflow</module>
     <module>sjk</module>
@@ -99,6 +100,11 @@
         <artifactId>mxdump</artifactId>
         <version>${project.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.gridkit.jvmtool</groupId>
+        <artifactId>sjk-json</artifactId>
+        <version>${project.version}</version>
+      </dependency>
       <dependency>
         <groupId>org.gridkit.jvmtool</groupId>
         <artifactId>ygc-bench</artifactId>
 
@@ -0,0 +1,64 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+<!--
+
+    Copyright 2012 Alexey Ragozin
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+      <groupId>org.gridkit.jvmtool</groupId>
+      <artifactId>jvmtool-umbrella-pom</artifactId>
+      <version>0.13-SNAPSHOT</version>
+      <relativePath>../pom.xml</relativePath>
+    </parent>
+
+    <artifactId>sjk-json</artifactId>
+    <name>${project.groupId}::${project.artifactId}</name>
+    <description>Minimal JSON support</description>
+
+    <dependencies>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.assertj</groupId>
+            <artifactId>assertj-core</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>com.carrotsearch</groupId>
+            <artifactId>junit-benchmarks</artifactId>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <configuration>
+                    <source>1.6</source>
+                    <target>1.6</target>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+</project>
@@ -0,0 +1,235 @@
+package org.gridkit.jvmtool.jackson;
+
+import java.util.Arrays;
+
+public final class CharTypes
+{
+    private final static char[] HEX_CHARS = "0123456789ABCDEF".toCharArray();
+    private final static byte[] HEX_BYTES;
+    static {
+        int len = HEX_CHARS.length;
+        HEX_BYTES = new byte[len];
+        for (int i = 0; i < len; ++i) {
+            HEX_BYTES[i] = (byte) HEX_CHARS[i];
+        }
+    }
+
+
+    /**
+     * Lookup table used for determining which input characters
+     * need special handling when contained in text segment.
+     */
+    final static int[] sInputCodes;
+    static {
+        /* 96 would do for most cases (backslash is ascii 94)
+         * but if we want to do lookups by raw bytes it's better
+         * to have full table
+         */
+        int[] table = new int[256];
+        // Control chars and non-space white space are not allowed unquoted
+        for (int i = 0; i < 32; ++i) {
+            table[i] = -1;
+        }
+        // And then string end and quote markers are special too
+        table['"'] = 1;
+        table['\\'] = 1;
+        sInputCodes = table;
+    }
+
+    /**
+     * Additionally we can combine UTF-8 decoding info into similar
+     * data table.
+     */
+    final static int[] sInputCodesUtf8;
+    static {
+        int[] table = new int[sInputCodes.length];
+        System.arraycopy(sInputCodes, 0, table, 0, sInputCodes.length);
+        for (int c = 128; c < 256; ++c) {
+            int code;
+
+            // We'll add number of bytes needed for decoding
+            if ((c & 0xE0) == 0xC0) { // 2 bytes (0x0080 - 0x07FF)
+                code = 2;
+            } else if ((c & 0xF0) == 0xE0) { // 3 bytes (0x0800 - 0xFFFF)
+                code = 3;
+            } else if ((c & 0xF8) == 0xF0) {
+                // 4 bytes; double-char with surrogates and all...
+                code = 4;
+            } else {
+                // And -1 seems like a good "universal" error marker...
+                code = -1;
+            }
+            table[c] = code;
+        }
+        sInputCodesUtf8 = table;
+    }
+
+    /**
+     * To support non-default (and -standard) unquoted field names mode,
+     * need to have alternate checking.
+     * Basically this is list of 8-bit ASCII characters that are legal
+     * as part of Javascript identifier
+     *
+     * @since 1.2
+     */
+    final static int[] sInputCodesJsNames;
+    static {
+        int[] table = new int[256];
+        // Default is "not a name char", mark ones that are
+        Arrays.fill(table, -1);
+        // Assume rules with JS same as Java (change if/as needed)
+        for (int i = 33; i < 256; ++i) {
+            if (Character.isJavaIdentifierPart((char) i)) {
+                table[i] = 0;
+            }
+        }
+        /* As per [JACKSON-267], '@', '#' and '*' are also to be accepted as well.
+         * And '-' (for hyphenated names); and '+' for sake of symmetricity...
+         */
+        table['@'] = 0;
+        table['#'] = 0;
+        table['*'] = 0;
+        table['-'] = 0;
+        table['+'] = 0;
+        sInputCodesJsNames = table;
+    }
+
+    /**
+     * This table is similar to Latin-1, except that it marks all "high-bit"
+     * code as ok. They will be validated at a later point, when decoding
+     * name
+     */
+    final static int[] sInputCodesUtf8JsNames;
+    static {
+        int[] table = new int[256];
+        // start with 8-bit JS names 
+        System.arraycopy(sInputCodesJsNames, 0, table, 0, sInputCodesJsNames.length);
+        Arrays.fill(table, 128, 128, 0);
+        sInputCodesUtf8JsNames = table;
+    }
+
+    /**
+     * Decoding table used to quickly determine characters that are
+     * relevant within comment content
+     */
+    final static int[] sInputCodesComment = new int[256];
+    static {
+        // but first: let's start with UTF-8 multi-byte markers:
+        System.arraycopy(sInputCodesUtf8, 128, sInputCodesComment, 128, 128);
+    
+        // default (0) means "ok" (skip); -1 invalid, others marked by char itself
+        Arrays.fill(sInputCodesComment, 0, 32, -1); // invalid white space
+        sInputCodesComment['\t'] = 0; // tab is still fine
+        sInputCodesComment['\n'] = '\n'; // lf/cr need to be observed, ends cpp comment
+        sInputCodesComment['\r'] = '\r';
+        sInputCodesComment['*'] = '*'; // end marker for c-style comments
+    }
+
+    /**
+     * Lookup table used for determining which output characters in 
+     * 7-bit ASCII range need to be quoted.
+     */
+    final static int[] sOutputEscapes128;
+    static {
+        int[] table = new int[128];
+        // Control chars need generic escape sequence
+        for (int i = 0; i < 32; ++i) {
+            // 04-Mar-2011, tatu: Used to use "-(i + 1)", replaced with constants
+            table[i] = CharacterEscapes.ESCAPE_STANDARD;
+        }
+        /* Others (and some within that range too) have explicit shorter
+         * sequences
+         */
+        table['"'] = '"';
+        table['\\'] = '\\';
+        // Escaping of slash is optional, so let's not add it
+        table[0x08] = 'b';
+        table[0x09] = 't';
+        table[0x0C] = 'f';
+        table[0x0A] = 'n';
+        table[0x0D] = 'r';
+        sOutputEscapes128 = table;
+    }
+
+    /**
+     * Lookup table for the first 128 Unicode characters (7-bit ASCII)
+     * range. For actual hex digits, contains corresponding value;
+     * for others -1.
+     */
+    final static int[] sHexValues = new int[128];
+    static {
+        Arrays.fill(sHexValues, -1);
+        for (int i = 0; i < 10; ++i) {
+            sHexValues['0' + i] = i;
+        }
+        for (int i = 0; i < 6; ++i) {
+            sHexValues['a' + i] = 10 + i;
+            sHexValues['A' + i] = 10 + i;
+        }
+    }
+
+    public final static int[] getInputCodeLatin1() { return sInputCodes; }
+    public final static int[] getInputCodeUtf8() { return sInputCodesUtf8; }
+
+    public final static int[] getInputCodeLatin1JsNames() { return sInputCodesJsNames; }
+    public final static int[] getInputCodeUtf8JsNames() { return sInputCodesUtf8JsNames; }
+
+    public final static int[] getInputCodeComment() { return sInputCodesComment; }
+    
+    /**
+     * Accessor for getting a read-only encoding table for first 128 Unicode
+     * code points (single-byte UTF-8 characters).
+     * Value of 0 means "no escaping"; other positive values that value is character
+     * to use after backslash; and negative values that generic (backslash - u)
+     * escaping is to be used.
+     */
+    public final static int[] get7BitOutputEscapes() { return sOutputEscapes128; }
+
+    public static int charToHex(int ch)
+    {
+        return (ch > 127) ? -1 : sHexValues[ch];
+    }
+
+    public static void appendQuoted(StringBuilder sb, String content)
+    {
+        final int[] escCodes = sOutputEscapes128;
+        int escLen = escCodes.length;
+        for (int i = 0, len = content.length(); i < len; ++i) {
+            char c = content.charAt(i);
+            if (c >= escLen || escCodes[c] == 0) {
+                sb.append(c);
+                continue;
+            }
+            sb.append('\\');
+            int escCode = escCodes[c];
+            if (escCode < 0) { // generic quoting (hex value)
+                // We know that it has to fit in just 2 hex chars
+                sb.append('u');
+                sb.append('0');
+                sb.append('0');
+                int value = -(escCode + 1);
+                sb.append(HEX_CHARS[value >> 4]);
+                sb.append(HEX_CHARS[value & 0xF]);
+            } else { // "named", i.e. prepend with slash
+                sb.append((char) escCode);
+            }
+        }
+    }
+
+    /**
+     * @since 1.6
+     */
+    public static char[] copyHexChars()
+    {
+        return (char[]) HEX_CHARS.clone();
+    }
+
+    /**
+     * @since 1.6
+     */
+    public static byte[] copyHexBytes()
+    {
+        return (byte[]) HEX_BYTES.clone();
+    }
+}
+
@@ -0,0 +1,47 @@
+package org.gridkit.jvmtool.jackson;
+
+/**
+ * Abstract base class that defines interface for customizing character
+ * escaping aspects for String values, for formats that use escaping.
+ * For JSON this applies to both property names and String values.
+ *
+ * @since 1.8
+ */
+public abstract class CharacterEscapes
+{
+    /**
+     * Value used for lookup tables to indicate that matching characters
+     * do not need to be escaped.
+     */
+    public final static int ESCAPE_NONE = 0;
+
+    /**
+     * Value used for lookup tables to indicate that matching characters
+     * are to be escaped using standard escaping; for JSON this means
+     * (for example) using "backslash - u" escape method.
+     */
+    public final static int ESCAPE_STANDARD = -1;
+
+    /**
+     * Value used for lookup tables to indicate that matching characters
+     * will need custom escapes; and that another call
+     * to {@link #getEscapeSequence} is needed to figure out exact escape
+     * sequence to output.
+     */
+    public final static int ESCAPE_CUSTOM = -2;
+    
+    /**
+     * Helper method that can be used to get a copy of standard JSON
+     * escape definitions; this is useful when just wanting to slightly
+     * customize definitions. Caller can modify this array as it sees
+     * fit and usually returns modified instance via {@link #getEscapeCodesForAscii}
+     */
+    public static int[] standardAsciiEscapesForJSON()
+    {
+        int[] esc = CharTypes.get7BitOutputEscapes();
+        int len = esc.length;
+        int[] result = new int[len];
+        System.arraycopy(esc, 0, result, 0, esc.length);
+        return result;
+    }
+}