Skip to content

Commit 652d4b9

Browse files
authored
[VECTOR_FLOAT16] Implement serialization and deserialization logic (#2853)
* [VECTOR_FLOAT16] Implement serialization and deserialization logic * Added test scenarios
1 parent 80ffa81 commit 652d4b9

File tree

2 files changed

+391
-0
lines changed

2 files changed

+391
-0
lines changed

src/main/java/com/microsoft/sqlserver/jdbc/VectorUtils.java

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,172 @@ static String getTypeDefinition(Vector vector, int scale, boolean isOutput, int
249249
return "VECTOR(" + precision + ")";
250250
}
251251

252+
/**
253+
* Serializes a 4-byte float to 2-byte float16 (IEEE 754 half-precision format).
254+
* This method converts a 32-bit IEEE 754 float to a 16-bit IEEE 754 half-precision float.
255+
*
256+
* float16 bit layout : S (1) | E (5) | M (10) and exponent bias 15
257+
* float32 bit layout : S (1) | E (8) | M (23) and exponent bias 127
258+
*
259+
* @param value The 4-byte float value to serialize
260+
* @return The 2-byte representation as a short
261+
*/
262+
private static Short floatToFloat16(Float value) {
263+
int bits = Float.floatToIntBits(value);
264+
265+
int sign = (bits >>> 31) & 0x1;
266+
int exponent = (bits >>> 23) & 0xFF;
267+
int mantissa = bits & 0x7FFFFF;
268+
269+
// NaN or Infinity
270+
if (exponent == 0xFF) {
271+
if (mantissa != 0) {
272+
return (short) ((sign << 15) | 0x7E00); // NaN
273+
}
274+
return (short) ((sign << 15) | 0x7C00); // Infinity
275+
}
276+
277+
// Zero (preserve signed zero)
278+
if ((bits & 0x7FFFFFFF) == 0) {
279+
return (short) (sign << 15);
280+
}
281+
282+
// Convert exponent
283+
int halfExponent = exponent - 127 + 15;
284+
285+
// Overflow → Infinity
286+
if (halfExponent >= 31) {
287+
return (short) ((sign << 15) | 0x7C00);
288+
}
289+
290+
// Underflow → Subnormal or Zero
291+
if (halfExponent <= 0) {
292+
if (halfExponent < -10) {
293+
return (short) (sign << 15); // Too small → zero
294+
}
295+
296+
// Convert to subnormal
297+
mantissa |= 0x800000;
298+
int shift = 1 - halfExponent;
299+
300+
int mant = mantissa >> (shift + 13);
301+
302+
// Round to nearest-even
303+
int roundBit = (mantissa >> (shift + 12)) & 1;
304+
int lostBits = mantissa & ((1 << (shift + 12)) - 1);
305+
306+
if (roundBit == 1 && (lostBits != 0 || (mant & 1) == 1)) {
307+
mant++;
308+
}
309+
310+
return (short) ((sign << 15) | mant);
311+
}
312+
313+
// Normal number
314+
int mant = mantissa >> 13;
315+
316+
// Rounding
317+
int roundBit = (mantissa >> 12) & 1;
318+
int lostBits = mantissa & 0xFFF;
319+
320+
if (roundBit == 1 && (lostBits != 0 || (mant & 1) == 1)) {
321+
mant++;
322+
if (mant == 0x400) { // Mantissa overflow
323+
mant = 0;
324+
halfExponent++;
325+
if (halfExponent >= 31) {
326+
return (short) ((sign << 15) | 0x7C00);
327+
}
328+
}
329+
}
330+
331+
return (short) ((sign << 15) | (halfExponent << 10) | mant);
332+
}
333+
334+
/**
335+
* Deserializes a 2-byte float16 to a 4-byte float (IEEE 754 single-precision format).
336+
* This method converts a 16-bit IEEE 754 half-precision float to a 32-bit IEEE 754 float.
337+
*
338+
* float16 bit layout : S (1) | E (5) | M (10) and exponent bias 15
339+
* float32 bit layout : S (1) | E (8) | M (23) and exponent bias 127
340+
*
341+
* @param value The 2-byte float16 value as a short
342+
* @return The 4-byte float representation
343+
*/
344+
private static Float float16ToFloat(Short value) {
345+
int bits = value & 0xFFFF;
346+
347+
int sign = (bits >>> 15) & 1;
348+
int exponent = (bits >>> 10) & 0x1F;
349+
int mantissa = bits & 0x3FF;
350+
351+
// NaN or Infinity
352+
if (exponent == 0x1F) {
353+
if (mantissa == 0) {
354+
return Float.intBitsToFloat((sign << 31) | 0x7F800000);
355+
}
356+
return Float.NaN;
357+
}
358+
359+
// Zero
360+
if (exponent == 0 && mantissa == 0) {
361+
return Float.intBitsToFloat(sign << 31);
362+
}
363+
364+
// Subnormal
365+
if (exponent == 0) {
366+
while ((mantissa & 0x400) == 0) {
367+
mantissa <<= 1;
368+
exponent--;
369+
}
370+
mantissa &= 0x3FF;
371+
exponent++;
372+
}
373+
374+
// Convert exponent bias
375+
exponent = exponent + (127 - 15);
376+
377+
int result = (sign << 31) | (exponent << 23) | (mantissa << 13);
378+
return Float.intBitsToFloat(result);
379+
}
380+
381+
382+
/**
383+
* Converts an array of 4-byte floats to an array of 2-byte float16 values.
384+
*
385+
* @param floats Array of 4-byte float values
386+
* @return Array of 2-byte values representing float16 format
387+
*/
388+
static Short[] serializeFloat16Array(Float[] float32) {
389+
if (float32 == null) {
390+
return null;
391+
}
392+
393+
Short[] result = new Short[float32.length];
394+
for (int i = 0; i < float32.length; i++) {
395+
result[i] = floatToFloat16(float32[i]);
396+
}
397+
return result;
398+
}
399+
400+
/**
401+
* Converts an array of 2-byte float16 values to an array of 4-byte floats.
402+
*
403+
* @param float16Values Array of 2-byte values in float16 format
404+
* @return Array of 4-byte float values
405+
*/
406+
static Float[] deserializeFloat16Array(Short[] float16Values) {
407+
if (float16Values == null) {
408+
return null;
409+
}
410+
411+
Float[] result = new Float[float16Values.length];
412+
for (int i = 0; i < float16Values.length; i++) {
413+
result[i] = float16ToFloat(float16Values[i]);
414+
}
415+
return result;
416+
}
417+
252418
private static IllegalArgumentException vectorException(String resourceKey, Object... args) {
253419
try {
254420
MessageFormat form = new MessageFormat(
Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
/*
2+
* Microsoft JDBC Driver for SQL Server Copyright(c) Microsoft Corporation All rights reserved. This program is made
3+
* available under the terms of the MIT License. See the LICENSE file in the project root for more information.
4+
*/
5+
6+
package com.microsoft.sqlserver.jdbc;
7+
8+
import static org.junit.jupiter.api.Assertions.assertEquals;
9+
import static org.junit.jupiter.api.Assertions.assertNotEquals;
10+
import static org.junit.jupiter.api.Assertions.assertNotNull;
11+
import static org.junit.jupiter.api.Assertions.assertTrue;
12+
13+
import org.junit.jupiter.api.BeforeAll;
14+
import org.junit.jupiter.api.DisplayName;
15+
import org.junit.jupiter.api.Tag;
16+
import org.junit.jupiter.api.Test;
17+
18+
import com.microsoft.sqlserver.testframework.AbstractTest;
19+
import com.microsoft.sqlserver.testframework.Constants;
20+
21+
@DisplayName("Test Vector Float16 Data Type")
22+
@Tag(Constants.vectorTest)
23+
public class VectorFloat16Test extends AbstractTest {
24+
25+
@BeforeAll
26+
private static void setupTest() throws Exception {
27+
setConnection();
28+
}
29+
30+
@Test
31+
@DisplayName("Test serializeFloat16Array: Float[] → Short[]")
32+
public void testSerializeFloat16Array() {
33+
34+
Float[] input = new Float[] {
35+
1.0f, // 0x3C00
36+
-2.0f, // 0xC000
37+
0.5f, // 0x3800
38+
0.0f, // 0x0000
39+
-0.0f, // 0x8000
40+
Float.POSITIVE_INFINITY, // 0x7C00
41+
Float.NEGATIVE_INFINITY, // 0xFC00
42+
Float.NaN // 0x7E00
43+
};
44+
45+
Short[] result = VectorUtils.serializeFloat16Array(input);
46+
47+
Short[] expected = new Short[] {
48+
(short) 0x3C00,
49+
(short) 0xC000,
50+
(short) 0x3800,
51+
(short) 0x0000,
52+
(short) 0x8000,
53+
(short) 0x7C00,
54+
(short) 0xFC00,
55+
(short) 0x7E00
56+
};
57+
58+
assertNotNull(result);
59+
assertEquals(expected.length, result.length);
60+
61+
for (int i = 0; i < expected.length; i++) {
62+
assertEquals(expected[i], result[i], "Mismatch at index " + i);
63+
}
64+
}
65+
66+
@Test
67+
@DisplayName("Test deserializeFloat16Array: Short[] → Float[]")
68+
public void testDeserializeFloat16Array() {
69+
70+
Short[] input = new Short[] {
71+
(short) 0x3C00, // 1.0
72+
(short) 0xC000, // -2.0
73+
(short) 0x3800, // 0.5
74+
(short) 0x0000, // +0
75+
(short) 0x8000, // -0
76+
(short) 0x7C00, // +Inf
77+
(short) 0xFC00, // -Inf
78+
(short) 0x7E00 // NaN
79+
};
80+
81+
Float[] result = VectorUtils.deserializeFloat16Array(input);
82+
83+
assertNotNull(result);
84+
assertEquals(input.length, result.length);
85+
86+
assertEquals(1.0f, result[0]);
87+
assertEquals(-2.0f, result[1]);
88+
assertEquals(0.5f, result[2]);
89+
assertEquals(0.0f, result[3]);
90+
assertEquals(-0.0f, result[4]);
91+
assertEquals(Float.POSITIVE_INFINITY, result[5]);
92+
assertEquals(Float.NEGATIVE_INFINITY, result[6]);
93+
assertTrue(Float.isNaN(result[7]));
94+
}
95+
96+
@Test
97+
@DisplayName("Float -> Float16 Serialization: All Scenarios")
98+
void testFloatToFloat16Serialization() {
99+
100+
Float[] input = new Float[] {
101+
102+
// Normal number: well within float16 representable range
103+
// Should convert to a normal float16 value
104+
1.5f,
105+
106+
// Very small number: representable as subnormal in float16
107+
// Should convert to subnormal, not zero
108+
5.96e-8f,
109+
110+
// Extremely small number: below float16 subnormal range
111+
// Should underflow to signed zero
112+
1.0e-10f,
113+
114+
// Large number beyond float16 max (65504)
115+
// Should overflow to +Infinity
116+
70000.0f,
117+
118+
// Negative overflow
119+
// Should overflow to -Infinity
120+
-100000.0f,
121+
122+
// Exactly representable boundary value
123+
// Should serialize without rounding error
124+
0.5f,
125+
126+
// Value needing rounding (tie-to-even scenario)
127+
// Should round correctly using nearest-even rule
128+
1.0009766f,
129+
130+
// Special value: +Infinity
131+
// Should map to float16 Infinity
132+
Float.POSITIVE_INFINITY,
133+
134+
// Special value: -Infinity
135+
// Should map to float16 -Infinity
136+
Float.NEGATIVE_INFINITY,
137+
138+
// Special value: NaN
139+
// Should convert to canonical float16 NaN (0x7E00)
140+
Float.NaN,
141+
142+
// Positive zero
143+
// Should preserve sign bit
144+
+0.0f,
145+
146+
// Negative zero
147+
// Must preserve negative zero sign
148+
-0.0f
149+
};
150+
151+
Short[] result = VectorUtils.serializeFloat16Array(input);
152+
153+
// Assertions
154+
assertEquals((short) 0x3E00, result[0]); // 1.5
155+
assertNotEquals((short) 0x0000, result[1]); // Subnormal not zero
156+
assertEquals((short) 0x0000, result[2]); // Underflow to zero
157+
assertEquals((short) 0x7C00, result[3]); // +Infinity
158+
assertEquals((short) 0xFC00, result[4]); // -Infinity
159+
assertEquals((short) 0x3800, result[5]); // 0.5
160+
assertEquals((short) 0x3C01, result[6]); // rounded value
161+
assertEquals((short) 0x7C00, result[7]); // +Infinity
162+
assertEquals((short) 0xFC00, result[8]); // -Infinity
163+
assertEquals((short) 0x7E00, result[9]); // NaN
164+
assertEquals((short) 0x0000, result[10]); // +0
165+
assertEquals((short) 0x8000, result[11]); // -0 preserved
166+
}
167+
168+
@Test
169+
@DisplayName("Float16 -> Float Deserialization: All Scenarios")
170+
void testFloat16ToFloatDeserialization() {
171+
172+
Short[] input = new Short[] {
173+
174+
// Normal float16 number → normal float
175+
// 1.5 in float16 representation
176+
(short) 0x3E00,
177+
178+
// Smallest positive subnormal float16
179+
// Should convert to tiny non-zero float
180+
(short) 0x0001,
181+
182+
// Zero
183+
// Must become +0.0
184+
(short) 0x0000,
185+
186+
// Negative zero
187+
// Must preserve -0.0 sign
188+
(short) 0x8000,
189+
190+
// Largest normal float16 value (65504)
191+
// Should deserialize to approx 65504f
192+
(short) 0x7BFF,
193+
194+
// Positive Infinity
195+
// Must deserialize to Float.POSITIVE_INFINITY
196+
(short) 0x7C00,
197+
198+
// Negative Infinity
199+
// Must deserialize to Float.NEGATIVE_INFINITY
200+
(short) 0xFC00,
201+
202+
// Canonical NaN
203+
// Must deserialize to Float.NaN
204+
(short) 0x7E00,
205+
206+
// A random normal float16
207+
// Validates general path
208+
(short) 0x3555
209+
};
210+
211+
Float[] result = VectorUtils.deserializeFloat16Array(input);
212+
213+
// Assertions
214+
assertEquals(1.5f, result[0]);
215+
assertTrue(result[1] > 0 && result[1] < 1e-6); // subnormal tiny positive
216+
assertEquals(0.0f, result[2]);
217+
assertEquals(Float.floatToRawIntBits(-0.0f), Float.floatToRawIntBits(result[3])); // sign preserved
218+
assertEquals(65504.0f, result[4]);
219+
assertEquals(Float.POSITIVE_INFINITY, result[5]);
220+
assertEquals(Float.NEGATIVE_INFINITY, result[6]);
221+
assertTrue(Float.isNaN(result[7]));
222+
assertNotNull(result[8]); // general valid float
223+
}
224+
225+
}

0 commit comments

Comments
 (0)