|
41 | 41 | "BAAI/bge-base-en-v1.5", |
42 | 42 | "intfloat/e5-small-v2", |
43 | 43 | ) |
| 44 | +MODEL_NAME_TO_ATOL: dict[str, float] = { |
| 45 | + "lightonai/modernbert-embed-large": 6e-3, |
| 46 | +} |
44 | 47 |
|
45 | 48 | STEP_BETWEEN_INPUT_TOKENS = 8 |
46 | 49 | MIN_TOKENS = 8 |
@@ -244,12 +247,12 @@ def _benchmark_model(model_name: str, versions: tuple[Version, ...] = ("base", " |
244 | 247 | records.extend(benchmark_model_result.records) |
245 | 248 | version_to_embeddings[version] = benchmark_model_result.embeddings |
246 | 249 |
|
247 | | - # Sanity check correctness by comparing cos sim. PyTorch's huggingface dynamo bench does |
248 | | - # allclose(eager_bf16, compiled_bf16) at tol=1e-3 (bumped to 4e-3 for known-noisy models) for its bf16+compile |
249 | | - # AMP inference suite, with an fp64 reference as a second-chance fallback when allclose fails — see |
250 | | - # https://github.com/pytorch/pytorch/blob/19ecfe58b45fe56afcd9155ad721dcf9a7569339/benchmarks/dynamo/huggingface.py#L529. |
251 | | - # We do the same allclose on the cos_sim diagonal. A stricter test would also check against an fp64 reference. |
252 | | - atol = 1e-3 if torch.cuda.is_bf16_supported() else 1e-4 |
| 250 | + # Sanity check correctness by comparing cos sim. PyTorch's huggingface dynamo bench does allclose(eager_bf16, |
| 251 | + # compiled_bf16) at tol=1e-3 (bumped to 4e-3 for known-noisy models) for bf16+compile AMP inference checking, with |
| 252 | + # an fp64 reference as a second-chance fallback when allclose fails. |
| 253 | + # https://github.com/pytorch/pytorch/blob/19ecfe58b45fe56afcd9155ad721dcf9a7569339/benchmarks/dynamo/huggingface.py#L529 |
| 254 | + default_atol = 1e-3 if torch.cuda.is_bf16_supported() else 1e-4 |
| 255 | + atol = MODEL_NAME_TO_ATOL.get(model_name, default_atol) |
253 | 256 | for version1, version2 in combinations(versions, 2): |
254 | 257 | cos_sim = _cos_sim(version_to_embeddings[version1], version_to_embeddings[version2]) |
255 | 258 | diag = np.diag(cos_sim) |
|
0 commit comments