1+ package eu .cqrxs .crypt .encoding ;
2+
3+ import java .math .BigDecimal ;
4+ import java .nio .ByteBuffer ;
5+ import java .nio .charset .StandardCharsets ;
6+ import java .util .Arrays ;
7+ import java .util .regex .Pattern ;
8+
9+ import eu .cqrxs .util .CException ;
10+ import java .nio .charset .Charset ;
11+
12+
13+ public class Ascii85Coder implements IEncodable {
14+
15+ private final static int ASCII_SHIFT = 33 ;
16+
17+ private static int [] BASE85_POW = {
18+ 1 ,
19+ 85 ,
20+ 85 * 85 ,
21+ 85 * 85 * 85 ,
22+ 85 * 85 * 85 *85
23+ };
24+
25+ private static Pattern REMOVE_WHITESPACE = Pattern .compile ("\\ s+" );
26+
27+ public Ascii85Coder () {
28+ }
29+
30+ /**
31+ * encode
32+ * @param inString
33+ * @return {@link String}
34+ */
35+ public String encode (String inString ) {
36+ byte [] inBytes = inString .getBytes (Charset .forName ("UTF-8" ));
37+ return encodeBytesToString (inBytes );
38+ }
39+
40+
41+ public String encodeBytesToString (byte [] payload ) {
42+ if (payload == null ) {
43+ throw new IllegalArgumentException ("You must provide a non-null input" );
44+ }
45+ //By using five ASCII characters to represent four bytes of binary data the encoded size ¹⁄₄ is larger than the original
46+ StringBuilder stringBuff = new StringBuilder (payload .length * 5 /4 );
47+ //We break the payload into int (4 bytes)
48+ byte [] chunk = new byte [4 ];
49+ int chunkIndex = 0 ;
50+ for (int i = 0 ; i < payload .length ; i ++) {
51+ byte currByte = payload [i ];
52+ chunk [chunkIndex ++] = currByte ;
53+
54+ if (chunkIndex == 4 ) {
55+ int value = byteToInt (chunk );
56+ //Because all-zero data is quite common, an exception is made for the sake of data compression,
57+ //and an all-zero group is encoded as a single character "z" instead of "!!!!!".
58+ if (value == 0 ) {
59+ stringBuff .append ('z' );
60+ } else {
61+ stringBuff .append (encodeChunk (value ));
62+ }
63+ Arrays .fill (chunk , (byte ) 0 );
64+ chunkIndex = 0 ;
65+ }
66+ }
67+
68+ //If we didn't end on 0, then we need some padding
69+ if (chunkIndex > 0 ) {
70+ int numPadded = chunk .length - chunkIndex ;
71+ Arrays .fill (chunk , chunkIndex , chunk .length , (byte )0 );
72+ int value = byteToInt (chunk );
73+ char [] encodedChunk = encodeChunk (value );
74+ for (int i = 0 ; i < encodedChunk .length - numPadded ; i ++) {
75+ stringBuff .append (encodedChunk [i ]);
76+ }
77+ }
78+
79+ return stringBuff .toString ();
80+ }
81+
82+ private static char [] encodeChunk (int value ) {
83+ //transform value to unsigned long
84+ long longValue = value & 0x00000000ffffffffL ;
85+ char [] encodedChunk = new char [5 ];
86+ for (int i = 0 ; i < encodedChunk .length ; i ++) {
87+ encodedChunk [i ] = (char ) ((longValue / BASE85_POW [4 - i ]) + ASCII_SHIFT );
88+ longValue = longValue % BASE85_POW [4 - i ];
89+ }
90+ return encodedChunk ;
91+ }
92+
93+ /**
94+ * decode
95+ * @param ascii85String ASCII encoded String
96+ * @return {@link String}
97+ */
98+ public String decode (String ascii85String ) {
99+ byte [] decodedBytes = decodeStringToBytes (ascii85String );
100+ String decodedString = new String (decodedBytes , StandardCharsets .UTF_8 );
101+ return decodedString ;
102+ }
103+
104+
105+
106+ /**
107+ * This is a very simple base85 decoder. It respects the 'z' optimization for empty chunks, and
108+ * strips whitespace between characters to respect line limits.
109+ * @see <a href="https://en.wikipedia.org/wiki/Ascii85">Ascii85</a>
110+ * @param chars The input characters that are base85 encoded.
111+ * @return The binary data decoded from the input
112+ */
113+ public byte [] decodeStringToBytes (String chars ) {
114+ if (chars == null ) {
115+ throw new IllegalArgumentException ("You must provide a non-null input" );
116+ }
117+ // Because we perform compression when encoding four bytes of zeros to a single 'z', we need
118+ // to scan through the input to compute the target length, instead of just subtracting 20% of
119+ // the encoded text length.
120+ final int inputLength = chars .length ();
121+
122+ // lets first count the occurrences of 'z'
123+ long zCount = chars .chars ().filter (c -> c == 'z' ).count ();
124+
125+ // Typically by using five ASCII characters to represent four bytes of binary data
126+ // the encoded size ¹⁄₄ is larger than the original.
127+ // We however have to account for the 'z' which were compressed
128+ BigDecimal uncompressedZLength = BigDecimal .valueOf (zCount ).multiply (BigDecimal .valueOf (4 ));
129+
130+ BigDecimal uncompressedNonZLength = BigDecimal .valueOf (inputLength - zCount )
131+ .multiply (BigDecimal .valueOf (4 ))
132+ .divide (BigDecimal .valueOf (5 ));
133+
134+ BigDecimal uncompressedLength = uncompressedZLength .add (uncompressedNonZLength );
135+
136+ ByteBuffer bytebuff = ByteBuffer .allocate (uncompressedLength .intValue ());
137+ //1. Whitespace characters may occur anywhere to accommodate line length limitations. So lets strip it.
138+ chars = REMOVE_WHITESPACE .matcher (chars ).replaceAll ("" );
139+ //Since Base85 is an ascii encoder, we don't need to get the bytes as UTF-8.
140+ byte [] payload = chars .getBytes (StandardCharsets .US_ASCII );
141+ byte [] chunk = new byte [5 ];
142+ int chunkIndex = 0 ;
143+ for (int i = 0 ; i < payload .length ; i ++) {
144+ byte currByte = payload [i ];
145+ //Because all-zero data is quite common, an exception is made for the sake of data compression,
146+ //and an all-zero group is encoded as a single character "z" instead of "!!!!!".
147+ if (currByte == 'z' ) {
148+ if (chunkIndex > 0 ) {
149+ throw new IllegalArgumentException ("The payload is not base 85 encoded." );
150+ }
151+ chunk [chunkIndex ++] = '!' ;
152+ chunk [chunkIndex ++] = '!' ;
153+ chunk [chunkIndex ++] = '!' ;
154+ chunk [chunkIndex ++] = '!' ;
155+ chunk [chunkIndex ++] = '!' ;
156+ } else {
157+ chunk [chunkIndex ++] = currByte ;
158+ }
159+
160+ if (chunkIndex == 5 ) {
161+ bytebuff .put (decodeChunk (chunk ));
162+ Arrays .fill (chunk , (byte ) 0 );
163+ chunkIndex = 0 ;
164+ }
165+ }
166+
167+ //If we didn't end on 0, then we need some padding
168+ if (chunkIndex > 0 ) {
169+ int numPadded = chunk .length - chunkIndex ;
170+ Arrays .fill (chunk , chunkIndex , chunk .length , (byte )'u' );
171+ byte [] paddedDecode = decodeChunk (chunk );
172+ for (int i = 0 ; i < paddedDecode .length - numPadded ; i ++) {
173+ bytebuff .put (paddedDecode [i ]);
174+ }
175+ }
176+
177+ bytebuff .flip ();
178+ return Arrays .copyOf (bytebuff .array (),bytebuff .limit ());
179+ }
180+
181+ private static byte [] decodeChunk (byte [] chunk ) {
182+ if (chunk .length != 5 ) {
183+ throw new IllegalArgumentException ("You can only decode chunks of size 5." );
184+ }
185+ int value = 0 ;
186+ value += (chunk [0 ] - ASCII_SHIFT ) * BASE85_POW [4 ];
187+ value += (chunk [1 ] - ASCII_SHIFT ) * BASE85_POW [3 ];
188+ value += (chunk [2 ] - ASCII_SHIFT ) * BASE85_POW [2 ];
189+ value += (chunk [3 ] - ASCII_SHIFT ) * BASE85_POW [1 ];
190+ value += (chunk [4 ] - ASCII_SHIFT ) * BASE85_POW [0 ];
191+
192+ return intToByte (value );
193+ }
194+
195+ private static int byteToInt (byte [] value ) {
196+ if (value == null || value .length != 4 ) {
197+ throw new IllegalArgumentException ("You cannot create an int without exactly 4 bytes." );
198+ }
199+ return ByteBuffer .wrap (value ).getInt ();
200+ }
201+
202+ private static byte [] intToByte (int value ) {
203+ return new byte [] {
204+ (byte ) (value >>> 24 ),
205+ (byte ) (value >>> 16 ),
206+ (byte ) (value >>> 8 ),
207+ (byte ) (value )
208+ };
209+ }
210+
211+
212+
213+ }
0 commit comments