Skip to content

Commit 5c07c83

Browse files
committed
tests run
1 parent 28d873b commit 5c07c83

4 files changed

Lines changed: 86 additions & 36 deletions

File tree

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@ authors = [{ name = "Sergey Feldman, Daniel King, Shivashankar Subramanian" }]
1313

1414
# --- Runtime dependencies ---
1515
dependencies = [
16-
"awscli",
16+
"awscli>=1.44.38",
1717
"fasttext-wheel>=0.9.2",
18+
"pillow>=12.1.1",
1819
"pycld2>=0.41",
1920
"scikit-learn==1.7.1",
2021
"text-unidecode==1.3",

s2and/feature_port.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,9 +1064,7 @@ def get_constraints_matrix_rust(
10641064

10651065
get_constraints_matrix = getattr(featurizer, "get_constraints_matrix", None)
10661066
if not callable(get_constraints_matrix):
1067-
raise RuntimeError(
1068-
"RustFeaturizer.get_constraints_matrix is unavailable; rebuild/install s2and-rust>=0.40.0."
1069-
)
1067+
raise RuntimeError("RustFeaturizer.get_constraints_matrix is unavailable; rebuild/install s2and-rust>=0.40.0.")
10701068
return list(
10711069
get_constraints_matrix(
10721070
pairs,

scripts/eval_prod_models.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,55 @@
44
"""
55
Evaluate production S2AND models (SPECTER1 vs SPECTER2) on various datasets.
66
7+
8+
In this script we try to answer the question: if we deploy SPECTER2, will S2AND care?
9+
Both with retraining and without retraining.
10+
11+
This is done with s2and-mini. Ai2 employee, find it at s3://ai2-s2-research/s2and/s2and-mini/
12+
13+
With retraining (random seed 42):
14+
15+
Performance with SPECTERv1 data, on arnetminer (B3): (0.922, 0.985, 0.952)
16+
Performance with SPECTERv2 data, on arnetminer (B3): (0.93, 0.988, 0.958)
17+
18+
Performance with SPECTERv1 data, on inspire (B3): (0.958, 0.974, 0.966)
19+
Performance with SPECTERv2 data, on inspire (B3): (0.995, 0.959, 0.977)
20+
21+
Performance with SPECTERv1 data, on kisti (B3): (0.951, 0.971, 0.961)
22+
Performance with SPECTERv2 data, on kisti (B3): (0.946, 0.98, 0.963)
23+
24+
Performance with SPECTERv1 data, on pubmed (B3): (0.849, 0.988, 0.913)
25+
Performance with SPECTERv2 data, on pubmed (B3): (0.86, 0.988, 0.92)
26+
27+
Performance with SPECTERv1 data, on qian (B3): (0.936, 0.943, 0.94)
28+
Performance with SPECTERv2 data, on qian (B3): (0.95, 0.964, 0.957)
29+
30+
Performance with SPECTERv1 data, on zbmath (B3): (0.966, 0.984, 0.975)
31+
Performance with SPECTERv2 data, on zbmath (B3): (0.975, 0.991, 0.983)
32+
33+
---
34+
35+
Without retraining,
36+
37+
Performance with SPECTERv1 data, on arnetminer (B3): (0.977, 0.982, 0.979)
38+
Performance with SPECTERv2 data, on arnetminer (B3):
39+
40+
Performance with SPECTERv1 data, on inspire (B3): (0.993, 0.964, 0.978)
41+
Performance with SPECTERv2 data, on inspire (B3):
42+
43+
Performance with SPECTERv1 data, on kisti (B3): (0.96, 0.957, 0.959)
44+
Performance with SPECTERv2 data, on kisti (B3):
45+
46+
Performance with SPECTERv1 data, on pubmed (B3): (1.0, 0.968, 0.984)
47+
Performance with SPECTERv2 data, on pubmed (B3):
48+
49+
Performance with SPECTERv1 data, on qian (B3): (0.985, 0.955, 0.969)
50+
Performance with SPECTERv2 data, on qian (B3):
51+
52+
Performance with SPECTERv1 data, on zbmath (B3): (0.967, 0.955, 0.961)
53+
Performance with SPECTERv2 data, on zbmath (B3):
54+
55+
756
Usage:
857
# Evaluate on inventors_s2and (default)
958
python scripts/eval_prod_models.py

0 commit comments

Comments
 (0)