1+ /**
2+ * Licensed to the Apache Software Foundation (ASF) under one or more
3+ * contributor license agreements. See the NOTICE file distributed with
4+ * this work for additional information regarding copyright ownership.
5+ * The ASF licenses this file to You under the Apache License, Version 2.0
6+ * (the "License"); you may not use this file except in compliance with
7+ * the License. You may obtain a copy of the License at
8+ *
9+ * http://www.apache.org/licenses/LICENSE-2.0
10+ *
11+ * Unless required by applicable law or agreed to in writing, software
12+ * distributed under the License is distributed on an "AS IS" BASIS,
13+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+ * See the License for the specific language governing permissions and
15+ * limitations under the License.
16+ */
17+
18+ /**
19+ * Taken from org.apache.mahout.math
20+ * https://github.com/apache/mahout
21+ */
22+
23+ package com .twitter .chill .hadoop ;
24+
25+ import java .io .DataInputStream ;
26+ import java .io .DataOutputStream ;
27+ import java .io .IOException ;
28+
29+ /**
30+ * <p>Encodes signed and unsigned values using a common variable-length
31+ * scheme, found for example in
32+ * <a href="http://code.google.com/apis/protocolbuffers/docs/encoding.html">
33+ * Google's Protocol Buffers</a>. It uses fewer bytes to encode smaller values,
34+ * but will use slightly more bytes to encode large values.</p>
35+ *
36+ * <p>Signed values are further encoded using so-called zig-zag encoding
37+ * in order to make them "compatible" with variable-length encoding.</p>
38+ */
39+ final class Varint {
40+
41+ private Varint () {
42+ }
43+
44+ /**
45+ * Encodes a value using the variable-length encoding from
46+ * <a href="http://code.google.com/apis/protocolbuffers/docs/encoding.html">
47+ * Google Protocol Buffers</a>. It uses zig-zag encoding to efficiently
48+ * encode signed values. If values are known to be nonnegative,
49+ * {@link #writeUnsignedVarLong(long, java.io.DataOutputStream)} should be used.
50+ *
51+ * @param value value to encode
52+ * @param out to write bytes to
53+ * @throws java.io.IOException if {@link java.io.DataOutput} throws {@link java.io.IOException}
54+ */
55+ public static void writeSignedVarLong (long value , DataOutputStream out ) throws IOException {
56+ // Great trick from http://code.google.com/apis/protocolbuffers/docs/encoding.html#types
57+ writeUnsignedVarLong ((value << 1 ) ^ (value >> 63 ), out );
58+ }
59+
60+ /**
61+ * Encodes a value using the variable-length encoding from
62+ * <a href="http://code.google.com/apis/protocolbuffers/docs/encoding.html">
63+ * Google Protocol Buffers</a>. Zig-zag is not used, so input must not be negative.
64+ * If values can be negative, use {@link #writeSignedVarLong(long, java.io.DataOutputStream)}
65+ * instead. This method treats negative input as like a large unsigned value.
66+ *
67+ * @param value value to encode
68+ * @param out to write bytes to
69+ * @throws java.io.IOException if {@link java.io.DataOutputStream} throws {@link java.io.IOException}
70+ */
71+ public static void writeUnsignedVarLong (long value , DataOutputStream out ) throws IOException {
72+ while ((value & 0xFFFFFFFFFFFFFF80L ) != 0L ) {
73+ out .writeByte (((int ) value & 0x7F ) | 0x80 );
74+ value >>>= 7 ;
75+ }
76+ out .writeByte ((int ) value & 0x7F );
77+ }
78+
79+ /**
80+ * @see #writeSignedVarLong(long, java.io.DataOutputStream)
81+ */
82+ public static void writeSignedVarInt (int value , DataOutputStream out ) throws IOException {
83+ // Great trick from http://code.google.com/apis/protocolbuffers/docs/encoding.html#types
84+ writeUnsignedVarInt ((value << 1 ) ^ (value >> 31 ), out );
85+ }
86+
87+ /**
88+ * @see #writeUnsignedVarLong(long, java.io.DataOutputStream)
89+ */
90+ public static void writeUnsignedVarInt (int value , DataOutputStream out ) throws IOException {
91+ while ((value & 0xFFFFFF80 ) != 0L ) {
92+ out .writeByte ((value & 0x7F ) | 0x80 );
93+ value >>>= 7 ;
94+ }
95+ out .writeByte (value & 0x7F );
96+ }
97+
98+ /**
99+ * @param in to read bytes from
100+ * @return decode value
101+ * @throws java.io.IOException if {@link java.io.DataInput} throws {@link java.io.IOException}
102+ * @throws IllegalArgumentException if variable-length value does not terminate
103+ * after 9 bytes have been read
104+ * @see #writeSignedVarLong(long, java.io.DataOutputStream)
105+ */
106+ public static long readSignedVarLong (DataInputStream in ) throws IOException {
107+ long raw = readUnsignedVarLong (in );
108+ // This undoes the trick in writeSignedVarLong()
109+ long temp = (((raw << 63 ) >> 63 ) ^ raw ) >> 1 ;
110+ // This extra step lets us deal with the largest signed values by treating
111+ // negative results from read unsigned methods as like unsigned values
112+ // Must re-flip the top bit if the original read value had it set.
113+ return temp ^ (raw & (1L << 63 ));
114+ }
115+
116+ /**
117+ * @param in to read bytes from
118+ * @return decode value
119+ * @throws java.io.IOException if {@link java.io.DataInput} throws {@link java.io.IOException}
120+ * @throws IllegalArgumentException if variable-length value does not terminate
121+ * after 9 bytes have been read
122+ * @see #writeUnsignedVarLong(long, java.io.DataOutputStream)
123+ */
124+ public static long readUnsignedVarLong (DataInputStream in ) throws IOException {
125+ long value = 0L ;
126+ int i = 0 ;
127+ long b ;
128+ while (((b = in .readByte ()) & 0x80L ) != 0 ) {
129+ value |= (b & 0x7F ) << i ;
130+ i += 7 ;
131+ }
132+ return value | (b << i );
133+ }
134+
135+ /**
136+ * @throws IllegalArgumentException if variable-length value does not terminate
137+ * after 5 bytes have been read
138+ * @throws java.io.IOException if {@link java.io.DataInput} throws {@link java.io.IOException}
139+ * @see #readSignedVarLong(java.io.DataInputStream)
140+ */
141+ public static int readSignedVarInt (DataInputStream in ) throws IOException {
142+ int raw = readUnsignedVarInt (in );
143+ // This undoes the trick in writeSignedVarInt()
144+ int temp = (((raw << 31 ) >> 31 ) ^ raw ) >> 1 ;
145+ // This extra step lets us deal with the largest signed values by treating
146+ // negative results from read unsigned methods as like unsigned values.
147+ // Must re-flip the top bit if the original read value had it set.
148+ return temp ^ (raw & (1 << 31 ));
149+ }
150+
151+ /**
152+ * @throws IllegalArgumentException if variable-length value does not terminate
153+ * after 5 bytes have been read
154+ * @throws java.io.IOException if {@link java.io.DataInput} throws {@link java.io.IOException}
155+ * @see #readUnsignedVarLong(java.io.DataInputStream)
156+ */
157+ public static int readUnsignedVarInt (DataInputStream in ) throws IOException {
158+ int value = 0 ;
159+ int i = 0 ;
160+ int b ;
161+ while (((b = in .readByte ()) & 0x80 ) != 0 && i < 42 ) {
162+ value |= (b & 0x7F ) << i ;
163+ i += 7 ;
164+ }
165+ if (i == 42 ) { // Over read!
166+ throw new IllegalArgumentException ("Read more than 5 bytes of data, must be invalid Var int" );
167+ }
168+ return value | (b << i );
169+ }
170+
171+ }
0 commit comments