@@ -103,121 +103,181 @@ You can learn more about the technical implementation details in the following b
103
103
<th align="center">C 99</th>
104
104
<th align="center">SimSIMD</th>
105
105
</tr >
106
- <!-- Cosine distance with different precision levels -->
106
+ <!-- Cosine distances with different precision levels -->
107
107
<tr >
108
- <td colspan="4" align="center">cosine distances between 1536d vectors in <code>float16 </code></td>
108
+ <td colspan="4" align="center">cosine distances between 1536d vectors in <code>int8 </code></td>
109
109
</tr >
110
110
<tr >
111
111
<td align="center"> <!-- scipy.spatial.distance.cosine -->
112
- <code>int8</code><br/>
113
112
🚧 overflows<br/>
114
- <code>bfloat16</code><br/>
115
- 🚧 not supported<br/>
116
- <code>float16</code><br/>
117
- <span style="color:#ABABAB;">x86:</span> <b>40,481</b> ·
118
- <span style="color:#ABABAB;">arm:</span> <b>21,451</b> ops/s
119
- <code>float32</code><br/>
120
- <span style="color:#ABABAB;">x86:</span> <b>253,902</b> ·
121
- <span style="color:#ABABAB;">arm:</span> <b>46,394</b> ops/s
122
- <code>float64</code><br/>
123
- <span style="color:#ABABAB;">x86:</span> <b>212,421</b> ·
124
- <span style="color:#ABABAB;">arm:</span> <b>52,904</b> ops/s
125
113
</td>
126
114
<td align="center"> <!-- serial -->
127
- <code>int8</code><br/>
128
- <span style="color:#ABABAB;">x86:</span> <b>10,548,600</b> ·
115
+ <span style="color:#ABABAB;">x86:</span> <b>10,548,600</b> ops/s<br/>
129
116
<span style="color:#ABABAB;">arm:</span> <b>11,379,300</b> ops/s
130
- <code>bfloat16</code><br/>
131
- <span style="color:#ABABAB;">x86:</span> <b>119,835</b> ·
132
- <span style="color:#ABABAB;">arm:</span> <b>403,909</b> ops/s
133
- <code>float16</code><br/>
134
- <span style="color:#ABABAB;">x86:</span> <b>501,310</b> ·
135
- <span style="color:#ABABAB;">arm:</span> <b>871,963</b> ops/s
136
- <code>float32</code><br/>
137
- <span style="color:#ABABAB;">x86:</span> <b>882,484</b> ·
138
- <span style="color:#ABABAB;">arm:</span> <b>399,661</b> ops/s
139
- <code>float64</code><br/>
140
- <span style="color:#ABABAB;">x86:</span> <b>839,301</b> ·
141
- <span style="color:#ABABAB;">arm:</span> <b>837,126</b> ops/s
142
117
</td>
143
118
<td align="center"> <!-- simsimd -->
144
- <code>int8</code><br/>
145
- <span style="color:#ABABAB;">x86:</span> <b>16,151,800</b> ·
119
+ <span style="color:#ABABAB;">x86:</span> <b>16,151,800</b> ops/s<br/>
146
120
<span style="color:#ABABAB;">arm:</span> <b>13,524,000</b> ops/s
147
- <code>bfloat16</code><br/>
148
- <span style="color:#ABABAB;">x86:</span> <b>9,738,540</b> ·
121
+ </td>
122
+ </tr >
123
+ <tr >
124
+ <td colspan="4" align="center">cosine distances between 1536d vectors in <code>bfloat16</code></td>
125
+ </tr >
126
+ <tr >
127
+ <td align="center"> <!-- scipy.spatial.distance.cosine -->
128
+ 🚧 not supported<br/>
129
+ </td>
130
+ <td align="center"> <!-- serial -->
131
+ <span style="color:#ABABAB;">x86:</span> <b>119,835</b> ops/s<br/>
132
+ <span style="color:#ABABAB;">arm:</span> <b>403,909</b> ops/s
133
+ </td>
134
+ <td align="center"> <!-- simsimd -->
135
+ <span style="color:#ABABAB;">x86:</span> <b>9,738,540</b> ops/s<br/>
149
136
<span style="color:#ABABAB;">arm:</span> <b>4,881,900</b> ops/s
150
- <code>float16</code><br/>
151
- <span style="color:#ABABAB;">x86:</span> <b>7,627,600</b> ·
137
+ </td>
138
+ </tr >
139
+ <tr >
140
+ <td colspan="4" align="center">cosine distances between 1536d vectors in <code>float16</code></td>
141
+ </tr >
142
+ <tr >
143
+ <td align="center"> <!-- scipy.spatial.distance.cosine -->
144
+ <span style="color:#ABABAB;">x86:</span> <b>40,481</b> ops/s<br/>
145
+ <span style="color:#ABABAB;">arm:</span> <b>21,451</b> ops/s
146
+ </td>
147
+ <td align="center"> <!-- serial -->
148
+ <span style="color:#ABABAB;">x86:</span> <b>501,310</b> ops/s<br/>
149
+ <span style="color:#ABABAB;">arm:</span> <b>871,963</b> ops/s
150
+ </td>
151
+ <td align="center"> <!-- simsimd -->
152
+ <span style="color:#ABABAB;">x86:</span> <b>7,627,600</b> ops/s<br/>
152
153
<span style="color:#ABABAB;">arm:</span> <b>3,316,810</b> ops/s
153
- <code>float32</code><br/>
154
- <span style="color:#ABABAB;">x86:</span> <b>8,202,910</b> ·
154
+ </td>
155
+ </tr >
156
+ <tr >
157
+ <td colspan="4" align="center">cosine distances between 1536d vectors in <code>float32</code></td>
158
+ </tr >
159
+ <tr >
160
+ <td align="center"> <!-- scipy.spatial.distance.cosine -->
161
+ <span style="color:#ABABAB;">x86:</span> <b>253,902</b> ops/s<br/>
162
+ <span style="color:#ABABAB;">arm:</span> <b>46,394</b> ops/s
163
+ </td>
164
+ <td align="center"> <!-- serial -->
165
+ <span style="color:#ABABAB;">x86:</span> <b>882,484</b> ops/s<br/>
166
+ <span style="color:#ABABAB;">arm:</span> <b>399,661</b> ops/s
167
+ </td>
168
+ <td align="center"> <!-- simsimd -->
169
+ <span style="color:#ABABAB;">x86:</span> <b>8,202,910</b> ops/s<br/>
155
170
<span style="color:#ABABAB;">arm:</span> <b>3,400,620</b> ops/s
156
- <code>float64</code><br/>
157
- <span style="color:#ABABAB;">x86:</span> <b>1,538,530</b> ·
171
+ </td>
172
+ </tr >
173
+ <tr >
174
+ <td colspan="4" align="center">cosine distances between 1536d vectors in <code>float64</code></td>
175
+ </tr >
176
+ <tr >
177
+ <td align="center"> <!-- scipy.spatial.distance.cosine -->
178
+ <span style="color:#ABABAB;">x86:</span> <b>212,421</b> ops/s<br/>
179
+ <span style="color:#ABABAB;">arm:</span> <b>52,904</b> ops/s
180
+ </td>
181
+ <td align="center"> <!-- serial -->
182
+ <span style="color:#ABABAB;">x86:</span> <b>839,301</b> ops/s<br/>
183
+ <span style="color:#ABABAB;">arm:</span> <b>837,126</b> ops/s
184
+ </td>
185
+ <td align="center"> <!-- simsimd -->
186
+ <span style="color:#ABABAB;">x86:</span> <b>1,538,530</b> ops/s<br/>
158
187
<span style="color:#ABABAB;">arm:</span> <b>1,678,920</b> ops/s
159
188
</td>
160
189
</tr >
190
+
161
191
<!-- Euclidean distance with different precision level -->
162
192
<tr >
163
- <td colspan="4" align="center">eculidean distance between 1536d vectors in <code>float16 </code></td>
193
+ <td colspan="4" align="center">eculidean distance between 1536d vectors in <code>int8 </code></td>
164
194
</tr >
165
195
<tr >
166
196
<td align="center"> <!-- scipy.spatial.distance.sqeuclidean -->
167
- <code>int8</code><br/>
168
- <span style="color:#ABABAB;">x86:</span> <b>252,113</b> ·
197
+ <span style="color:#ABABAB;">x86:</span> <b>252,113</b> ops/s<br/>
169
198
<span style="color:#ABABAB;">arm:</span> <b>177,443</b> ops/s
170
- <code>bfloat16</code><br/>
171
- 🚧 not supported<br/>
172
- <code>float16</code><br/>
173
- <span style="color:#ABABAB;">x86:</span> <b>54,621</b> ·
174
- <span style="color:#ABABAB;">arm:</span> <b>71,793</b> ops/s
175
- <code>float32</code><br/>
176
- <span style="color:#ABABAB;">x86:</span> <b>424,944</b> ·
177
- <span style="color:#ABABAB;">arm:</span> <b>292,629</b> ops/s
178
- <code>float64</code><br/>
179
- <span style="color:#ABABAB;">x86:</span> <b>334,929</b> ·
180
- <span style="color:#ABABAB;">arm:</span> <b>237,505</b> ops/s
181
199
</td>
182
200
<td align="center"> <!-- serial -->
183
- <code>int8</code><br/>
184
- <span style="color:#ABABAB;">x86:</span> <b>6,690,110</b> ·
201
+ <span style="color:#ABABAB;">x86:</span> <b>6,690,110</b> ops/s<br/>
185
202
<span style="color:#ABABAB;">arm:</span> <b>4,114,160</b> ops/s
186
- <code>bfloat16</code><br/>
187
- <span style="color:#ABABAB;">x86:</span> <b>119,842</b> ·
188
- <span style="color:#ABABAB;">arm:</span> <b>1,049,230</b> ops/s
189
- <code>float16</code><br/>
190
- <span style="color:#ABABAB;">x86:</span> <b>196,413</b> ·
191
- <span style="color:#ABABAB;">arm:</span> <b>911,370</b> ops/s
192
- <code>float32</code><br/>
193
- <span style="color:#ABABAB;">x86:</span> <b>1,295,210</b> ·
194
- <span style="color:#ABABAB;">arm:</span> <b>1,055,940</b> ops/s
195
- <code>float64</code><br/>
196
- <span style="color:#ABABAB;">x86:</span> <b>1,215,190</b> ·
197
- <span style="color:#ABABAB;">arm:</span> <b>905,782</b> ops/s
198
203
</td>
199
204
<td align="center"> <!-- simsimd -->
200
- <code>int8</code><br/>
201
- <span style="color:#ABABAB;">x86:</span> <b>18,989,000</b> ·
205
+ <span style="color:#ABABAB;">x86:</span> <b>18,989,000</b> ops/s<br/>
202
206
<span style="color:#ABABAB;">arm:</span> <b>18,878,200</b> ops/s
203
- <code>bfloat16</code><br/>
204
- <span style="color:#ABABAB;">x86:</span> <b>9,727,210</b> ·
207
+ </td>
208
+ </tr >
209
+ <tr >
210
+ <td colspan="4" align="center">eculidean distance between 1536d vectors in <code>bfloat16</code></td>
211
+ </tr >
212
+ <tr >
213
+ <td align="center"> <!-- scipy.spatial.distance.sqeuclidean -->
214
+ 🚧 not supported<br/>
215
+ </td>
216
+ <td align="center"> <!-- serial -->
217
+ <span style="color:#ABABAB;">x86:</span> <b>119,842</b> ops/s<br/>
218
+ <span style="color:#ABABAB;">arm:</span> <b>1,049,230</b> ops/s
219
+ </td>
220
+ <td align="center"> <!-- simsimd -->
221
+ <span style="color:#ABABAB;">x86:</span> <b>9,727,210</b> ops/s<br/>
205
222
<span style="color:#ABABAB;">arm:</span> <b>4,233,420</b> ops/s
206
- <code>float16</code><br/>
207
- <span style="color:#ABABAB;">x86:</span> <b>19,466,800</b> ·
223
+ </td>
224
+ </tr >
225
+ <tr >
226
+ <td colspan="4" align="center">eculidean distance between 1536d vectors in <code>float16</code></td>
227
+ </tr >
228
+ <tr >
229
+ <td align="center"> <!-- scipy.spatial.distance.sqeuclidean -->
230
+ <span style="color:#ABABAB;">x86:</span> <b>54,621</b> ops/s<br/>
231
+ <span style="color:#ABABAB;">arm:</span> <b>71,793</b> ops/s
232
+ </td>
233
+ <td align="center"> <!-- serial -->
234
+ <span style="color:#ABABAB;">x86:</span> <b>196,413</b> ops/s<br/>
235
+ <span style="color:#ABABAB;">arm:</span> <b>911,370</b> ops/s
236
+ </td>
237
+ <td align="center"> <!-- simsimd -->
238
+ <span style="color:#ABABAB;">x86:</span> <b>19,466,800</b> ops/s<br/>
208
239
<span style="color:#ABABAB;">arm:</span> <b>3,522,760</b> ops/s
209
- <code>float32</code><br/>
210
- <span style="color:#ABABAB;">x86:</span> <b>8,924,100</b> ·
240
+ </td>
241
+ </tr >
242
+ <tr >
243
+ <td colspan="4" align="center">eculidean distance between 1536d vectors in <code>float32</code></td>
244
+ </tr >
245
+ <tr >
246
+ <td align="center"> <!-- scipy.spatial.distance.sqeuclidean -->
247
+ <span style="color:#ABABAB;">x86:</span> <b>424,944</b> ops/s<br/>
248
+ <span style="color:#ABABAB;">arm:</span> <b>292,629</b> ops/s
249
+ </td>
250
+ <td align="center"> <!-- serial -->
251
+ <span style="color:#ABABAB;">x86:</span> <b>1,295,210</b> ops/s<br/>
252
+ <span style="color:#ABABAB;">arm:</span> <b>1,055,940</b> ops/s
253
+ </td>
254
+ <td align="center"> <!-- simsimd -->
255
+ <span style="color:#ABABAB;">x86:</span> <b>8,924,100</b> ops/s<br/>
211
256
<span style="color:#ABABAB;">arm:</span> <b>3,602,650</b> ops/s
212
- <code>float64</code><br/>
213
- <span style="color:#ABABAB;">x86:</span> <b>1,701,740</b> ·
257
+ </td>
258
+ </tr >
259
+ <tr >
260
+ <td colspan="4" align="center">eculidean distance between 1536d vectors in <code>float64</code></td>
261
+ </tr >
262
+ <tr >
263
+ <td align="center"> <!-- scipy.spatial.distance.sqeuclidean -->
264
+ <span style="color:#ABABAB;">x86:</span> <b>334,929</b> ops/s<br/>
265
+ <span style="color:#ABABAB;">arm:</span> <b>237,505</b> ops/s
266
+ </td>
267
+ <td align="center"> <!-- serial -->
268
+ <span style="color:#ABABAB;">x86:</span> <b>1,215,190</b> ops/s<br/>
269
+ <span style="color:#ABABAB;">arm:</span> <b>905,782</b> ops/s
270
+ </td>
271
+ <td align="center"> <!-- simsimd -->
272
+ <span style="color:#ABABAB;">x86:</span> <b>1,701,740</b> ops/s<br/>
214
273
<span style="color:#ABABAB;">arm:</span> <b>1,735,840</b> ops/s
215
274
</td>
216
275
</tr >
217
276
<!-- Bilinear forms -->
218
277
<!-- Sparse set intersections -->
219
278
</table >
220
279
280
+ > For benchmarks we mostly use 1536-dimensional vectors, like the embeddings produced by the OpenAI Ada API.
221
281
> The code was compiled with GCC 12, using glibc v2.35.
222
282
> The benchmarks performed on Arm-based Graviton3 AWS ` c7g ` instances and ` r7iz ` Intel Sapphire Rapids.
223
283
> Most modern Arm-based 64-bit CPUs will have similar relative speedups.
0 commit comments