Skip to content

Commit 9c399e9

Browse files
authored
Merge pull request #231 from ashvardanian/main-dev
Better Profiling
2 parents a55df33 + 000866d commit 9c399e9

File tree

2 files changed

+36
-6
lines changed

2 files changed

+36
-6
lines changed

scripts/bench.cxx

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -819,10 +819,6 @@ int main(int argc, char **argv) {
819819
dense_<f16_k>("l2sq_f16_sve", simsimd_l2sq_f16_sve, simsimd_l2sq_f16_accurate);
820820
dense_<f16_k>("l2_f16_sve", simsimd_l2_f16_sve, simsimd_l2_f16_accurate);
821821

822-
dense_<bf16_k>("cos_bf16_sve", simsimd_cos_bf16_sve, simsimd_cos_bf16_accurate);
823-
dense_<bf16_k>("l2sq_bf16_sve", simsimd_l2sq_bf16_sve, simsimd_l2sq_bf16_accurate);
824-
dense_<bf16_k>("l2_bf16_sve", simsimd_l2_bf16_sve, simsimd_l2_bf16_accurate);
825-
826822
dense_<f32_k>("dot_f32_sve", simsimd_dot_f32_sve, simsimd_dot_f32_accurate);
827823
dense_<f32_k>("cos_f32_sve", simsimd_cos_f32_sve, simsimd_cos_f32_accurate);
828824
dense_<f32_k>("l2sq_f32_sve", simsimd_l2sq_f32_sve, simsimd_l2sq_f32_accurate);

scripts/bench_vectors.py

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,21 @@ class Kernel:
7272
tensor_type: callable = np.array
7373

7474

75+
def serial_cosine(a: List[float], b: List[float]) -> float:
76+
dot_product = sum(ai * bi for ai, bi in zip(a, b))
77+
norm_a = sum(ai * ai for ai in a) ** 0.5
78+
norm_b = sum(bi * bi for bi in b) ** 0.5
79+
if norm_a == 0 and norm_b == 0:
80+
return 1
81+
if dot_product == 0:
82+
return 0
83+
return dot_product / (norm_a * norm_b)
84+
85+
86+
def serial_sqeuclidean(a: List[float], b: List[float]) -> float:
87+
return sum((ai - bi) ** 2 for ai, bi in zip(a, b))
88+
89+
7590
def yield_kernels(
7691
metric_families: List[str],
7792
dtype_names: List[str],
@@ -197,6 +212,25 @@ def for_dtypes(
197212
simd.vdot,
198213
lambda A, B: simd.cdist(A, B, metric="vdot"),
199214
)
215+
if "spatial" in metric_families:
216+
yield from for_dtypes(
217+
"serial.cosine",
218+
["float64", "float32", "float16", "int8"],
219+
serial_cosine,
220+
wrap_rows_batch_calls(serial_cosine),
221+
lambda A, B: spd.cdist(A, B, "cosine"),
222+
simd.cosine,
223+
lambda A, B: simd.cdist(A, B, metric="cosine"),
224+
)
225+
yield from for_dtypes(
226+
"serial.sqeuclidean",
227+
["float64", "float32", "float16", "int8"],
228+
serial_sqeuclidean,
229+
wrap_rows_batch_calls(serial_sqeuclidean),
230+
lambda A, B: spd.cdist(A, B, "sqeuclidean"),
231+
simd.sqeuclidean,
232+
lambda A, B: simd.cdist(A, B, metric="sqeuclidean"),
233+
)
200234
if "spatial" in metric_families and include_scipy:
201235
yield from for_dtypes(
202236
"scipy.cosine",
@@ -528,11 +562,11 @@ def result_to_row(result: Result) -> List[str]:
528562
if isinstance(result.baseline_seconds, float):
529563
ops_per_second = result.distance_calculations / result.baseline_seconds
530564
gbs_per_second = result.bytes_per_vector * ops_per_second / 1e9
531-
baseline_cell = f"{ops_per_second:,.0f} ops/s, {gbs_per_second:,.2f} GB/s"
565+
baseline_cell = f"{ops_per_second:,.0f} ops/s, {gbs_per_second:,.3f} GB/s"
532566
if isinstance(result.simsimd_seconds, float):
533567
ops_per_second = result.distance_calculations / result.simsimd_seconds
534568
gbs_per_second = result.bytes_per_vector * ops_per_second / 1e9
535-
simsimd_cell = f"{ops_per_second:,.0f} ops/s, {gbs_per_second:,.2f} GB/s"
569+
simsimd_cell = f"{ops_per_second:,.0f} ops/s, {gbs_per_second:,.3f} GB/s"
536570
if isinstance(result.baseline_seconds, float) and isinstance(result.simsimd_seconds, float):
537571
improvement_cell = f"{result.baseline_seconds / result.simsimd_seconds:,.2f} x"
538572

0 commit comments

Comments
 (0)