From 24a6440a4a20ee0198b6c6205a9a3184b61909dc Mon Sep 17 00:00:00 2001 From: Muskan Gupta Date: Wed, 26 Nov 2025 13:50:33 +0530 Subject: [PATCH 1/2] [VECTOR_FLOAT16] Implement serialization and deserialization logic --- .../microsoft/sqlserver/jdbc/VectorUtils.java | 166 ++++++++++++++++++ .../sqlserver/jdbc/VectorFloat16Test.java | 93 ++++++++++ 2 files changed, 259 insertions(+) create mode 100644 src/test/java/com/microsoft/sqlserver/jdbc/VectorFloat16Test.java diff --git a/src/main/java/com/microsoft/sqlserver/jdbc/VectorUtils.java b/src/main/java/com/microsoft/sqlserver/jdbc/VectorUtils.java index 31ae6e460..a91af00d2 100644 --- a/src/main/java/com/microsoft/sqlserver/jdbc/VectorUtils.java +++ b/src/main/java/com/microsoft/sqlserver/jdbc/VectorUtils.java @@ -249,6 +249,172 @@ static String getTypeDefinition(Vector vector, int scale, boolean isOutput, int return "VECTOR(" + precision + ")"; } + /** + * Serializes a 4-byte float to 2-byte float16 (IEEE 754 half-precision format). + * This method converts a 32-bit IEEE 754 float to a 16-bit IEEE 754 half-precision float. + * + * float16 bit layout : S (1) | E (5) | M (10) and exponent bias 15 + * float32 bit layout : S (1) | E (8) | M (23) and exponent bias 127 + * + * @param value The 4-byte float value to serialize + * @return The 2-byte representation as a short + */ + static Short floatToFloat16(Float value) { + int bits = Float.floatToIntBits(value); + + int sign = (bits >>> 31) & 0x1; + int exponent = (bits >>> 23) & 0xFF; + int mantissa = bits & 0x7FFFFF; + + // NaN or Infinity + if (exponent == 0xFF) { + if (mantissa != 0) { + return (short) ((sign << 15) | 0x7E00); // NaN + } + return (short) ((sign << 15) | 0x7C00); // Infinity + } + + // Zero (preserve signed zero) + if ((bits & 0x7FFFFFFF) == 0) { + return (short) (sign << 15); + } + + // Convert exponent + int halfExponent = exponent - 127 + 15; + + // Overflow → Infinity + if (halfExponent >= 31) { + return (short) ((sign << 15) | 0x7C00); + } + + // Underflow → Subnormal or Zero + if (halfExponent <= 0) { + if (halfExponent < -10) { + return (short) (sign << 15); // Too small → zero + } + + // Convert to subnormal + mantissa |= 0x800000; + int shift = 1 - halfExponent; + + int mant = mantissa >> (shift + 13); + + // Round to nearest-even + int roundBit = (mantissa >> (shift + 12)) & 1; + int lostBits = mantissa & ((1 << (shift + 12)) - 1); + + if (roundBit == 1 && (lostBits != 0 || (mant & 1) == 1)) { + mant++; + } + + return (short) ((sign << 15) | mant); + } + + // Normal number + int mant = mantissa >> 13; + + // Rounding + int roundBit = (mantissa >> 12) & 1; + int lostBits = mantissa & 0xFFF; + + if (roundBit == 1 && (lostBits != 0 || (mant & 1) == 1)) { + mant++; + if (mant == 0x400) { // Mantissa overflow + mant = 0; + halfExponent++; + if (halfExponent >= 31) { + return (short) ((sign << 15) | 0x7C00); + } + } + } + + return (short) ((sign << 15) | (halfExponent << 10) | mant); + } + + /** + * Deserializes a 2-byte float16 to a 4-byte float (IEEE 754 single-precision format). + * This method converts a 16-bit IEEE 754 half-precision float to a 32-bit IEEE 754 float. + * + * float16 bit layout : S (1) | E (5) | M (10) and exponent bias 15 + * float32 bit layout : S (1) | E (8) | M (23) and exponent bias 127 + * + * @param value The 2-byte float16 value as a short + * @return The 4-byte float representation + */ + static Float float16ToFloat(Short value) { + int bits = value & 0xFFFF; + + int sign = (bits >>> 15) & 1; + int exponent = (bits >>> 10) & 0x1F; + int mantissa = bits & 0x3FF; + + // NaN or Infinity + if (exponent == 0x1F) { + if (mantissa == 0) { + return Float.intBitsToFloat((sign << 31) | 0x7F800000); + } + return Float.NaN; + } + + // Zero + if (exponent == 0 && mantissa == 0) { + return Float.intBitsToFloat(sign << 31); + } + + // Subnormal + if (exponent == 0) { + while ((mantissa & 0x400) == 0) { + mantissa <<= 1; + exponent--; + } + mantissa &= 0x3FF; + exponent++; + } + + // Convert exponent bias + exponent = exponent + (127 - 15); + + int result = (sign << 31) | (exponent << 23) | (mantissa << 13); + return Float.intBitsToFloat(result); + } + + + /** + * Converts an array of 4-byte floats to an array of 2-byte float16 values. + * + * @param floats Array of 4-byte float values + * @return Array of 2-byte values representing float16 format + */ + static Short[] serializeFloat16Array(Float[] float32) { + if (float32 == null) { + return null; + } + + Short[] result = new Short[float32.length]; + for (int i = 0; i < float32.length; i++) { + result[i] = floatToFloat16(float32[i]); + } + return result; + } + + /** + * Converts an array of 2-byte float16 values to an array of 4-byte floats. + * + * @param float16Values Array of 2-byte values in float16 format + * @return Array of 4-byte float values + */ + static Float[] deserializeFloat16Array(Short[] float16Values) { + if (float16Values == null) { + return null; + } + + Float[] result = new Float[float16Values.length]; + for (int i = 0; i < float16Values.length; i++) { + result[i] = float16ToFloat(float16Values[i]); + } + return result; + } + private static IllegalArgumentException vectorException(String resourceKey, Object... args) { try { MessageFormat form = new MessageFormat( diff --git a/src/test/java/com/microsoft/sqlserver/jdbc/VectorFloat16Test.java b/src/test/java/com/microsoft/sqlserver/jdbc/VectorFloat16Test.java new file mode 100644 index 000000000..5e5c47392 --- /dev/null +++ b/src/test/java/com/microsoft/sqlserver/jdbc/VectorFloat16Test.java @@ -0,0 +1,93 @@ +/* + * Microsoft JDBC Driver for SQL Server Copyright(c) Microsoft Corporation All rights reserved. This program is made + * available under the terms of the MIT License. See the LICENSE file in the project root for more information. + */ + +package com.microsoft.sqlserver.jdbc; + +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +import com.microsoft.sqlserver.testframework.AbstractTest; +import com.microsoft.sqlserver.testframework.Constants; + +@DisplayName("Test Vector Float16 Data Type") +@Tag(Constants.vectorTest) +public class VectorFloat16Test extends AbstractTest { + + @BeforeAll + private static void setupTest() throws Exception { + setConnection(); + } + + @Test + @DisplayName("Test serializeFloat16Array: Float[] → Short[]") + public void testSerializeFloat16Array() { + + Float[] input = new Float[] { + 1.0f, // 0x3C00 + -2.0f, // 0xC000 + 0.5f, // 0x3800 + 0.0f, // 0x0000 + -0.0f, // 0x8000 + Float.POSITIVE_INFINITY, // 0x7C00 + Float.NEGATIVE_INFINITY, // 0xFC00 + Float.NaN // 0x7E00 + }; + + Short[] result = VectorUtils.serializeFloat16Array(input); + + Short[] expected = new Short[] { + (short) 0x3C00, + (short) 0xC000, + (short) 0x3800, + (short) 0x0000, + (short) 0x8000, + (short) 0x7C00, + (short) 0xFC00, + (short) 0x7E00 + }; + + assertNotNull(result); + assertEquals(expected.length, result.length); + + for (int i = 0; i < expected.length; i++) { + assertEquals(expected[i], result[i], "Mismatch at index " + i); + } + } + + @Test + @DisplayName("Test deserializeFloat16Array: Short[] → Float[]") + public void testDeserializeFloat16Array() { + + Short[] input = new Short[] { + (short) 0x3C00, // 1.0 + (short) 0xC000, // -2.0 + (short) 0x3800, // 0.5 + (short) 0x0000, // +0 + (short) 0x8000, // -0 + (short) 0x7C00, // +Inf + (short) 0xFC00, // -Inf + (short) 0x7E00 // NaN + }; + + Float[] result = VectorUtils.deserializeFloat16Array(input); + + assertNotNull(result); + assertEquals(input.length, result.length); + + assertEquals(1.0f, result[0]); + assertEquals(-2.0f, result[1]); + assertEquals(0.5f, result[2]); + assertEquals(0.0f, result[3]); + assertEquals(-0.0f, result[4]); + assertEquals(Float.POSITIVE_INFINITY, result[5]); + assertEquals(Float.NEGATIVE_INFINITY, result[6]); + assertTrue(Float.isNaN(result[7])); + } + +} \ No newline at end of file From 93fc891a99201e2cf0f4d3ab0dd036aa6f9b576b Mon Sep 17 00:00:00 2001 From: Muskan Gupta Date: Wed, 26 Nov 2025 14:32:28 +0530 Subject: [PATCH 2/2] Added test scenarios --- .../microsoft/sqlserver/jdbc/VectorUtils.java | 4 +- .../sqlserver/jdbc/VectorFloat16Test.java | 136 +++++++++++++++++- 2 files changed, 136 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/microsoft/sqlserver/jdbc/VectorUtils.java b/src/main/java/com/microsoft/sqlserver/jdbc/VectorUtils.java index a91af00d2..cced96504 100644 --- a/src/main/java/com/microsoft/sqlserver/jdbc/VectorUtils.java +++ b/src/main/java/com/microsoft/sqlserver/jdbc/VectorUtils.java @@ -259,7 +259,7 @@ static String getTypeDefinition(Vector vector, int scale, boolean isOutput, int * @param value The 4-byte float value to serialize * @return The 2-byte representation as a short */ - static Short floatToFloat16(Float value) { + private static Short floatToFloat16(Float value) { int bits = Float.floatToIntBits(value); int sign = (bits >>> 31) & 0x1; @@ -341,7 +341,7 @@ static Short floatToFloat16(Float value) { * @param value The 2-byte float16 value as a short * @return The 4-byte float representation */ - static Float float16ToFloat(Short value) { + private static Float float16ToFloat(Short value) { int bits = value & 0xFFFF; int sign = (bits >>> 15) & 1; diff --git a/src/test/java/com/microsoft/sqlserver/jdbc/VectorFloat16Test.java b/src/test/java/com/microsoft/sqlserver/jdbc/VectorFloat16Test.java index 5e5c47392..814220e74 100644 --- a/src/test/java/com/microsoft/sqlserver/jdbc/VectorFloat16Test.java +++ b/src/test/java/com/microsoft/sqlserver/jdbc/VectorFloat16Test.java @@ -5,13 +5,16 @@ package com.microsoft.sqlserver.jdbc; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; -import static org.junit.jupiter.api.Assertions.*; - import com.microsoft.sqlserver.testframework.AbstractTest; import com.microsoft.sqlserver.testframework.Constants; @@ -90,4 +93,133 @@ public void testDeserializeFloat16Array() { assertTrue(Float.isNaN(result[7])); } + @Test + @DisplayName("Float -> Float16 Serialization: All Scenarios") + void testFloatToFloat16Serialization() { + + Float[] input = new Float[] { + + // Normal number: well within float16 representable range + // Should convert to a normal float16 value + 1.5f, + + // Very small number: representable as subnormal in float16 + // Should convert to subnormal, not zero + 5.96e-8f, + + // Extremely small number: below float16 subnormal range + // Should underflow to signed zero + 1.0e-10f, + + // Large number beyond float16 max (65504) + // Should overflow to +Infinity + 70000.0f, + + // Negative overflow + // Should overflow to -Infinity + -100000.0f, + + // Exactly representable boundary value + // Should serialize without rounding error + 0.5f, + + // Value needing rounding (tie-to-even scenario) + // Should round correctly using nearest-even rule + 1.0009766f, + + // Special value: +Infinity + // Should map to float16 Infinity + Float.POSITIVE_INFINITY, + + // Special value: -Infinity + // Should map to float16 -Infinity + Float.NEGATIVE_INFINITY, + + // Special value: NaN + // Should convert to canonical float16 NaN (0x7E00) + Float.NaN, + + // Positive zero + // Should preserve sign bit + +0.0f, + + // Negative zero + // Must preserve negative zero sign + -0.0f + }; + + Short[] result = VectorUtils.serializeFloat16Array(input); + + // Assertions + assertEquals((short) 0x3E00, result[0]); // 1.5 + assertNotEquals((short) 0x0000, result[1]); // Subnormal not zero + assertEquals((short) 0x0000, result[2]); // Underflow to zero + assertEquals((short) 0x7C00, result[3]); // +Infinity + assertEquals((short) 0xFC00, result[4]); // -Infinity + assertEquals((short) 0x3800, result[5]); // 0.5 + assertEquals((short) 0x3C01, result[6]); // rounded value + assertEquals((short) 0x7C00, result[7]); // +Infinity + assertEquals((short) 0xFC00, result[8]); // -Infinity + assertEquals((short) 0x7E00, result[9]); // NaN + assertEquals((short) 0x0000, result[10]); // +0 + assertEquals((short) 0x8000, result[11]); // -0 preserved + } + + @Test + @DisplayName("Float16 -> Float Deserialization: All Scenarios") + void testFloat16ToFloatDeserialization() { + + Short[] input = new Short[] { + + // Normal float16 number → normal float + // 1.5 in float16 representation + (short) 0x3E00, + + // Smallest positive subnormal float16 + // Should convert to tiny non-zero float + (short) 0x0001, + + // Zero + // Must become +0.0 + (short) 0x0000, + + // Negative zero + // Must preserve -0.0 sign + (short) 0x8000, + + // Largest normal float16 value (65504) + // Should deserialize to approx 65504f + (short) 0x7BFF, + + // Positive Infinity + // Must deserialize to Float.POSITIVE_INFINITY + (short) 0x7C00, + + // Negative Infinity + // Must deserialize to Float.NEGATIVE_INFINITY + (short) 0xFC00, + + // Canonical NaN + // Must deserialize to Float.NaN + (short) 0x7E00, + + // A random normal float16 + // Validates general path + (short) 0x3555 + }; + + Float[] result = VectorUtils.deserializeFloat16Array(input); + + // Assertions + assertEquals(1.5f, result[0]); + assertTrue(result[1] > 0 && result[1] < 1e-6); // subnormal tiny positive + assertEquals(0.0f, result[2]); + assertEquals(Float.floatToRawIntBits(-0.0f), Float.floatToRawIntBits(result[3])); // sign preserved + assertEquals(65504.0f, result[4]); + assertEquals(Float.POSITIVE_INFINITY, result[5]); + assertEquals(Float.NEGATIVE_INFINITY, result[6]); + assertTrue(Float.isNaN(result[7])); + assertNotNull(result[8]); // general valid float + } + } \ No newline at end of file