@@ -454,7 +454,7 @@ import numpy as np
454454from simsimd import fma, wsum
455455
456456# Let's take two FullHD video frames
457- first_frame = np.random.randn(1920 * 1024 ).astype(np.uint8)
457+ first_frame = np.random.randn(1920 * 1024 ).astype(np.uint8)
458458second_frame = np.random.randn(1920 * 1024 ).astype(np.uint8)
459459average_frame = np.empty_like(first_frame)
460460wsum(first_frame, second_frame, alpha = 0.5 , beta = 0.5 , out = average_frame)
@@ -479,7 +479,7 @@ alpha = 0.7 # Weight for the diffuse component
479479beta = 0.3 # Weight for the specular component
480480
481481# Formula: color = alpha * light_intensity * diffuse_component + beta * specular_component
482- fma(light_intensity, diffuse_component, specular_component,
482+ fma(light_intensity, diffuse_component, specular_component,
483483 dtype = " float16" , # Optional, unless it can't be inferred from the input
484484 alpha = alpha, beta = beta, out = output_color)
485485
@@ -499,7 +499,7 @@ ndim = 1536 # OpenAI Ada embeddings
499499matrix1 = np.packbits(np.random.randint(2 , size = (10_000 , ndim)).astype(np.uint8))
500500matrix2 = np.packbits(np.random.randint(2 , size = (1_000 , ndim)).astype(np.uint8))
501501
502- distances = simsimd.cdist(matrix1, matrix2,
502+ distances = simsimd.cdist(matrix1, matrix2,
503503 metric = " hamming" , # Unlike SciPy, SimSIMD doesn't divide by the number of dimensions
504504 out_dtype = " uint8" , # so we can use `uint8` instead of `float64` to save memory.
505505 threads = 0 , # Use all CPU cores with OpenMP.
@@ -541,8 +541,38 @@ with ThreadPoolExecutor(max_workers=num_threads) as executor:
541541 futures.append(executor.submit(compute_batch, start_idx, end_idx))
542542
543543 # Collect results from all threads
544- results = [future.result() for future in futures]
545- ```
544+ results = [future.result() for future in futures]
545+ ```
546+
547+ ### Half-Precision Brain-Float Numbers
548+
549+ The "brain-float-16" is a popular machine learning format.
550+ It's broadly supported in hardware and is very machine-friendly, but software support is still lagging behind.
551+ [ Unlike NumPy] ( https://github.com/numpy/numpy/issues/19808 ) , you can already use ` bf16 ` datatype in SimSIMD.
552+ Luckily, to downcast ` f32 ` to ` bf16 ` you only have to drop the last 16 bits:
553+
554+ ``` py
555+ import numpy as np
556+ import simsimd as simd
557+
558+ a = np.random.randn(ndim).astype(np.float32)
559+ b = np.random.randn(ndim).astype(np.float32)
560+
561+ # NumPy doesn't natively support brain-float, so we need a trick!
562+ # Luckily, it's very easy to reduce the representation accuracy
563+ # by simply masking the low 16-bits of our 32-bit single-precision
564+ # numbers. We can also add `0x8000` to round the numbers.
565+ a_f32rounded = ((a.view(np.uint32) + 0x 8000 ) & 0x FFFF0000 ).view(np.float32)
566+ b_f32rounded = ((b.view(np.uint32) + 0x 8000 ) & 0x FFFF0000 ).view(np.float32)
567+
568+ # To represent them as brain-floats, we need to drop the second half
569+ a_bf16 = np.right_shift(a_f32rounded.view(np.uint32), 16 ).astype(np.uint16)
570+ b_bf16 = np.right_shift(b_f32rounded.view(np.uint32), 16 ).astype(np.uint16)
571+
572+ # Now we can compare the results
573+ expected = np.inner(a_f32rounded, b_f32rounded)
574+ result = simd.inner(a_bf16, b_bf16, " bf16" )
575+ ```
546576
547577### Helper Functions
548578
@@ -693,23 +723,48 @@ Binary similarity functions are available only for `u8` types.
693723
694724### Half-Precision Floating-Point Numbers
695725
696- Rust has no native support for half-precision floating-point numbers, but SimSIMD provides a ` f16 ` type.
697- It has no functionality - it is a ` transparent ` wrapper around ` u16 ` and can be used with ` half ` or any other half-precision library .
726+ Rust has no native support for half-precision floating-point numbers, but SimSIMD provides a ` f16 ` type with built-in conversion methods .
727+ The underlying ` u16 ` representation is publicly accessible for direct bit manipulation .
698728
699729``` rust
700- use simsimd :: SpatialSimilarity ;
701- use simsimd :: f16 as SimF16 ;
730+ use simsimd :: {SpatialSimilarity , f16};
731+
732+ fn main () {
733+ // Create f16 vectors using built-in conversion methods
734+ let vector_a : Vec <f16 > = vec! [1.0 , 2.0 , 3.0 ]. iter (). map (| & x | f16 :: from_f32 (x )). collect ();
735+ let vector_b : Vec <f16 > = vec! [4.0 , 5.0 , 6.0 ]. iter (). map (| & x | f16 :: from_f32 (x )). collect ();
736+
737+ // Compute the cosine similarity
738+ let cosine_similarity = f16 :: cosine (& vector_a , & vector_b )
739+ . expect (" Vectors must be of the same length" );
740+
741+ println! (" Cosine Similarity: {}" , cosine_similarity );
742+
743+ // Direct bit manipulation
744+ let half = f16 :: from_f32 (3.14159 );
745+ let bits = half . 0 ; // Access raw u16 representation
746+ let reconstructed = f16 (bits );
747+
748+ // Convert back to f32
749+ let float_value = half . to_f32 ();
750+ }
751+ ```
752+
753+ For interoperability with the ` half ` crate:
754+
755+ ``` rust
756+ use simsimd :: {SpatialSimilarity , f16 as SimF16 };
702757use half :: f16 as HalfF16 ;
703758
704759fn main () {
705- let vector_a : Vec <HalfF16 > = ...
706- let vector_b : Vec <HalfF16 > = ...
760+ let vector_a : Vec <HalfF16 > = vec! [ 1.0 , 2.0 , 3.0 ] . iter () . map ( | & x | HalfF16 :: from_f32 ( x )) . collect ();
761+ let vector_b : Vec <HalfF16 > = vec! [ 4.0 , 5.0 , 6.0 ] . iter () . map ( | & x | HalfF16 :: from_f32 ( x )) . collect ();
707762
708- let buffer_a : & [SimF16 ] = unsafe { std :: slice :: from_raw_parts (a_half . as_ptr () as * const SimF16 , a_half . len ()) };
709- let buffer_b : & [SimF16 ] = unsafe { std :: slice :: from_raw_parts (b_half . as_ptr () as * const SimF16 , b_half . len ()) };
763+ // Safe reinterpret cast due to identical memory layout
764+ let buffer_a : & [SimF16 ] = unsafe { std :: slice :: from_raw_parts (vector_a . as_ptr () as * const SimF16 , vector_a . len ()) };
765+ let buffer_b : & [SimF16 ] = unsafe { std :: slice :: from_raw_parts (vector_b . as_ptr () as * const SimF16 , vector_b . len ()) };
710766
711- // Compute the cosine similarity between vector_a and vector_b
712- let cosine_similarity = SimF16 :: cosine (& vector_a , & vector_b )
767+ let cosine_similarity = SimF16 :: cosine (buffer_a , buffer_b )
713768 . expect (" Vectors must be of the same length" );
714769
715770 println! (" Cosine Similarity: {}" , cosine_similarity );
@@ -719,31 +774,41 @@ fn main() {
719774### Half-Precision Brain-Float Numbers
720775
721776The "brain-float-16" is a popular machine learning format.
722- It's broadly supported in hardware and is very machine-friendly, but software support is still lagging behind.
777+ It's broadly supported in hardware and is very machine-friendly, but software support is still lagging behind.
723778[ Unlike NumPy] ( https://github.com/numpy/numpy/issues/19808 ) , you can already use ` bf16 ` datatype in SimSIMD.
724- Luckily, to downcast ` f32 ` to ` bf16 ` you only have to drop the last 16 bits:
779+ SimSIMD provides a ` bf16 ` type with built-in conversion methods and direct bit access.
725780
726- ``` py
727- import numpy as np
728- import simsimd as simd
781+ ``` rust
782+ use simsimd :: {SpatialSimilarity , bf16};
729783
730- a = np.random.randn(ndim).astype(np.float32)
731- b = np.random.randn(ndim).astype(np.float32)
784+ fn main () {
785+ // Create bf16 vectors using built-in conversion methods
786+ let vector_a : Vec <bf16 > = vec! [1.0 , 2.0 , 3.0 ]. iter (). map (| & x | bf16 :: from_f32 (x )). collect ();
787+ let vector_b : Vec <bf16 > = vec! [4.0 , 5.0 , 6.0 ]. iter (). map (| & x | bf16 :: from_f32 (x )). collect ();
732788
733- # NumPy doesn't natively support brain-float, so we need a trick!
734- # Luckily, it's very easy to reduce the representation accuracy
735- # by simply masking the low 16-bits of our 32-bit single-precision
736- # numbers. We can also add `0x8000` to round the numbers.
737- a_f32rounded = ((a.view(np.uint32) + 0x 8000 ) & 0x FFFF0000 ).view(np.float32)
738- b_f32rounded = ((b.view(np.uint32) + 0x 8000 ) & 0x FFFF0000 ).view(np.float32)
789+ // Compute the cosine similarity
790+ let cosine_similarity = bf16 :: cosine (& vector_a , & vector_b )
791+ . expect (" Vectors must be of the same length" );
792+
793+ println! (" Cosine Similarity: {}" , cosine_similarity );
739794
740- # To represent them as brain-floats, we need to drop the second half
741- a_bf16 = np.right_shift(a_f32rounded.view(np.uint32), 16 ).astype(np.uint16)
742- b_bf16 = np.right_shift(b_f32rounded.view(np.uint32), 16 ).astype(np.uint16)
795+ // Direct bit manipulation
796+ let brain_half = bf16 :: from_f32 (3.14159 );
797+ let bits = brain_half . 0 ; // Access raw u16 representation
798+ let reconstructed = bf16 (bits );
799+
800+ // Convert back to f32
801+ let float_value = brain_half . to_f32 ();
743802
744- # Now we can compare the results
745- expected = np.inner(a_f32rounded, b_f32rounded)
746- result = simd.inner(a_bf16, b_bf16, " bf16" )
803+ // Compare precision differences
804+ let original = 3.14159_f32 ;
805+ let f16_roundtrip = f16 :: from_f32 (original ). to_f32 ();
806+ let bf16_roundtrip = bf16 :: from_f32 (original ). to_f32 ();
807+
808+ println! (" Original: {}" , original );
809+ println! (" f16 roundtrip: {}" , f16_roundtrip );
810+ println! (" bf16 roundtrip: {}" , bf16_roundtrip );
811+ }
747812```
748813
749814### Dynamic Dispatch in Rust
@@ -760,6 +825,7 @@ println!("uses ice: {}", capabilities::uses_ice());
760825println! (" uses genoa: {}" , capabilities :: uses_genoa ());
761826println! (" uses sapphire: {}" , capabilities :: uses_sapphire ());
762827println! (" uses turin: {}" , capabilities :: uses_turin ());
828+ println! (" uses sierra: {}" , capabilities :: uses_sierra ());
763829```
764830
765831## Using SimSIMD in JavaScript
@@ -776,13 +842,13 @@ This will automatically happen unless you install the package with the `--ignore
776842After you install it, you will be able to call the SimSIMD functions on various ` TypedArray ` variants:
777843
778844``` js
779- const { sqeuclidean , cosine , inner , hamming , jaccard } = require (' simsimd' );
845+ const { sqeuclidean , cosine , inner , hamming , jaccard } = require (" simsimd" );
780846
781847const vectorA = new Float32Array ([1.0 , 2.0 , 3.0 ]);
782848const vectorB = new Float32Array ([4.0 , 5.0 , 6.0 ]);
783849
784850const distance = sqeuclidean (vectorA, vectorB);
785- console .log (' Squared Euclidean Distance:' , distance);
851+ console .log (" Squared Euclidean Distance:" , distance);
786852```
787853
788854Other numeric types and precision levels are supported as well.
@@ -798,8 +864,8 @@ When doing machine learning and vector search with high-dimensional vectors you
798864You may want to project values from the $[ -1, 1] $ range to the $[ -127, 127] $ range and then cast them to ` Int8Array ` :
799865
800866``` js
801- const quantizedVectorA = new Int8Array (vectorA .map (v => ( v * 127 ) ));
802- const quantizedVectorB = new Int8Array (vectorB .map (v => ( v * 127 ) ));
867+ const quantizedVectorA = new Int8Array (vectorA .map (( v ) => v * 127 ));
868+ const quantizedVectorB = new Int8Array (vectorB .map (( v ) => v * 127 ));
803869const distance = cosine (quantizedVectorA, quantizedVectorB);
804870```
805871
@@ -808,7 +874,7 @@ You can map all positive values to `1` and all negative values and zero to `0`,
808874After that, Hamming and Jaccard distances can be computed.
809875
810876``` js
811- const { toBinary , hamming } = require (' simsimd' );
877+ const { toBinary , hamming } = require (" simsimd" );
812878
813879const binaryVectorA = toBinary (vectorA);
814880const binaryVectorB = toBinary (vectorB);
@@ -919,7 +985,7 @@ int main() {
919985 simsimd_cos_f32(f32s, f32s, 1536, &distance);
920986 simsimd_cos_f64(f64s, f64s, 1536, &distance);
921987 simsimd_cos_bf16(bf16s, bf16s, 1536, &distance);
922-
988+
923989 // Euclidean distance between two vectors
924990 simsimd_l2sq_i8(i8s, i8s, 1536, &distance);
925991 simsimd_l2sq_u8(u8s, u8s, 1536, &distance);
@@ -1036,7 +1102,7 @@ To explicitly disable half-precision support, define the following macro before
10361102> This flag does just that and is used to produce the `simsimd.so` shared library, as well as the Python and other bindings.
10371103
10381104For Arm: `SIMSIMD_TARGET_NEON`, `SIMSIMD_TARGET_SVE`, `SIMSIMD_TARGET_SVE2`, `SIMSIMD_TARGET_NEON_F16`, `SIMSIMD_TARGET_SVE_F16`, `SIMSIMD_TARGET_NEON_BF16`, `SIMSIMD_TARGET_SVE_BF16`.
1039- For x86: (`SIMSIMD_TARGET_HASWELL`, `SIMSIMD_TARGET_SKYLAKE`, `SIMSIMD_TARGET_ICE`, `SIMSIMD_TARGET_GENOA`, `SIMSIMD_TARGET_SAPPHIRE`, `SIMSIMD_TARGET_TURIN`, `SIMSIMD_TARGET_SIERRA`.
1105+ For x86: (`SIMSIMD_TARGET_HASWELL`, `SIMSIMD_TARGET_SKYLAKE`, `SIMSIMD_TARGET_ICE`, `SIMSIMD_TARGET_GENOA`, `SIMSIMD_TARGET_SAPPHIRE`, `SIMSIMD_TARGET_TURIN`, `SIMSIMD_TARGET_SIERRA`.
10401106
10411107> By default, SimSIMD automatically infers the target architecture and pre-compiles as many kernels as possible.
10421108> In some cases, you may want to explicitly disable some of the kernels.
@@ -1064,7 +1130,7 @@ In general there are a few principles that SimSIMD follows:
10641130
10651131Possibly, in the future:
10661132
1067- - Best effort computation silencing `NaN` components in low-precision inputs.
1133+ - Best effort computation silencing `NaN` components in low-precision inputs.
10681134- Detect overflows and report the distance with a "signaling" `NaN`.
10691135
10701136Last, but not the least - don't build unless there is a demand for it.
@@ -1199,7 +1265,7 @@ SimSIMD defines `dot` and `vdot` kernels as:
11991265
12001266Where $\bar{b_i}$ is the complex conjugate of $b_i$.
12011267Putting that into Python code for scalar arrays:
1202-
1268+
12031269``` python
12041270def dot (a : List[number], b : List[number]) -> number:
12051271 a_real, a_imaginary = a[0 ::2 ], a[1 ::2 ]
0 commit comments