Skip to content

Commit 57e29ea

Browse files
committed
Add SciNet API
1 parent ea8d5f9 commit 57e29ea

8 files changed

Lines changed: 286 additions & 21 deletions

File tree

.env.example

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
SCINET_API_BASE_URL=http://127.0.0.1:8000
22
SCINET_API_KEY=replace-me
3-
SCINET_API_TIMEOUT=120
3+
SCINET_API_TIMEOUT_DEFAULT=120
4+
SCINET_API_TIMEOUT_SEARCH=1800
5+
SCINET_API_TIMEOUT_AUTHORS_RELATED=120
6+
SCINET_API_TIMEOUT_AUTHORS_PAPERS=300
7+
SCINET_API_TIMEOUT_SUPPORT_PAPERS=600
8+
SCINET_API_CONNECT_TIMEOUT=10
9+
SCINET_API_WRITE_TIMEOUT=60
10+
SCINET_API_POOL_TIMEOUT=10
411

512
# Required for all tasks.
613
LLM_PROVIDER=openai_compatible
@@ -18,5 +25,9 @@ GROBID_BASE_URL=http://127.0.0.1:8070
1825
OA_API_KEY=
1926
OPENALEX_MAILTO=
2027

21-
# Grounding model settings for `grounded_review` are passed via --params-file or --params-json.
22-
# See examples/grounded_review_params.example.json.
28+
# Optional local model paths for `Idea Grounding and Evaluation`.
29+
# Leave empty to use the public Hugging Face model ids below.
30+
SCINET_EMBEDDING_MODEL_PATH=
31+
SCINET_RERANKER_MODEL_PATH=
32+
33+
# Grounding model settings can also be passed via --params-file or --params-json.

README.md

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,12 +119,11 @@ pip install -r requirements.txt
119119
cp .env.example .env
120120
```
121121

122-
Fill in the required variables:
122+
We currently provide a public `SciNet API` endpoint for community use. Fill in the required variables:
123123

124124
```env
125-
SCINET_API_BASE_URL=https://your-scinet-api.example.com
126-
SCINET_API_KEY=replace-me
127-
SCINET_API_TIMEOUT=120
125+
SCINET_API_BASE_URL=http://scinet.openkg.cn
126+
SCINET_API_KEY=scinet-public-key
128127
129128
LLM_PROVIDER=openai_compatible
130129
LLM_API_KEY=replace-me
@@ -248,6 +247,12 @@ python3 run_scinet.py \
248247
```
249248

250249
For `Idea Grounding and Evaluation`, model-related overrides can be supplied through `--params-file` or `--params-json`.
250+
You can also set local model paths once in `.env`:
251+
252+
```bash
253+
SCINET_EMBEDDING_MODEL_PATH=/absolute/path/to/BAAI--bge-large-en-v1.5
254+
SCINET_RERANKER_MODEL_PATH=/absolute/path/to/BAAI--bge-reranker-large
255+
```
251256

252257
`grounded_review` also accepts `query_provider`, `query_model`, and `query_api_url` overrides in `params`.
253258
If omitted, it resolves them from `LLM_PROVIDER`, `LLM_MODEL`, and `LLM_BASE_URL`.

scinet/cli.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,15 @@
88
from pathlib import Path
99
from typing import Any
1010

11-
from .core.common import DEFAULT_ENV_PATH, DEFAULT_RUN_ROOT, normalize_whitespace, resolve_run_dir, write_json, write_text
11+
from .core.common import (
12+
DEFAULT_ENV_PATH,
13+
DEFAULT_RUN_ROOT,
14+
normalize_whitespace,
15+
read_json,
16+
resolve_run_dir,
17+
write_json,
18+
write_text,
19+
)
1220
from .core.schemas import (
1321
SUPPORTED_TASK_TYPES,
1422
TASK_AUTHOR_PROFILE,
@@ -43,6 +51,26 @@ def build_parser() -> argparse.ArgumentParser:
4351
parser.add_argument("--author-name", default=None, help=f"Author name input for {TASK_AUTHOR_PROFILE}.")
4452
parser.add_argument("--params-file", default=None, help="Path to a JSON file with task params overrides.")
4553
parser.add_argument("--params-json", default=None, help="Inline JSON object for task params overrides.")
54+
parser.add_argument("--api-timeout-default", type=float, default=None, help="Default SciNet API read timeout in seconds.")
55+
parser.add_argument("--api-timeout-search", type=float, default=None, help="Read timeout in seconds for /v1/search.")
56+
parser.add_argument(
57+
"--api-timeout-authors-related",
58+
type=float,
59+
default=None,
60+
help="Read timeout in seconds for /v1/authors/related.",
61+
)
62+
parser.add_argument(
63+
"--api-timeout-authors-papers",
64+
type=float,
65+
default=None,
66+
help="Read timeout in seconds for /v1/authors/papers.",
67+
)
68+
parser.add_argument(
69+
"--api-timeout-support-papers",
70+
type=float,
71+
default=None,
72+
help="Read timeout in seconds for /v1/authors/support-papers.",
73+
)
4674
parser.add_argument("--output-root", default=str(DEFAULT_RUN_ROOT), help="Root folder for SciNet runs.")
4775
parser.add_argument("--run-id", default=None, help="Optional run id.")
4876
parser.add_argument("--env", default=str(DEFAULT_ENV_PATH), help="Path to the SciNet .env file.")
@@ -94,6 +122,17 @@ def _build_request_from_args(args: argparse.Namespace) -> SciNetRequest:
94122
params = {}
95123
params.update(_load_optional_json(args.params_file))
96124
params.update(_parse_inline_json(args.params_json))
125+
cli_timeout_params = {
126+
"api_timeout_default": "scinet_api_timeout_default",
127+
"api_timeout_search": "scinet_api_timeout_search",
128+
"api_timeout_authors_related": "scinet_api_timeout_authors_related",
129+
"api_timeout_authors_papers": "scinet_api_timeout_authors_papers",
130+
"api_timeout_support_papers": "scinet_api_timeout_support_papers",
131+
}
132+
for arg_name, param_name in cli_timeout_params.items():
133+
value = getattr(args, arg_name, None)
134+
if value is not None:
135+
params[param_name] = value
97136
return SciNetRequest(
98137
task_type=args.task_type,
99138
input_payload=input_payload,

scinet/core/api_client.py

Lines changed: 187 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,22 @@
22

33
from dataclasses import dataclass
44
from pathlib import Path
5+
from time import monotonic
56
from typing import Any
67

7-
from .common import DEFAULT_SCINET_API_TIMEOUT, get_env_value, load_env_values, normalize_whitespace
8+
from .common import (
9+
DEFAULT_SCINET_API_AUTHORS_PAPERS_TIMEOUT,
10+
DEFAULT_SCINET_API_AUTHORS_RELATED_TIMEOUT,
11+
DEFAULT_SCINET_API_CONNECT_TIMEOUT,
12+
DEFAULT_SCINET_API_POOL_TIMEOUT,
13+
DEFAULT_SCINET_API_SEARCH_TIMEOUT,
14+
DEFAULT_SCINET_API_SUPPORT_PAPERS_TIMEOUT,
15+
DEFAULT_SCINET_API_TIMEOUT,
16+
DEFAULT_SCINET_API_WRITE_TIMEOUT,
17+
get_env_value,
18+
load_env_values,
19+
normalize_whitespace,
20+
)
821

922

1023
class SciNetApiError(RuntimeError):
@@ -18,7 +31,49 @@ def __init__(self, message: str, *, status_code: int | None = None, payload: Any
1831
class SciNetApiSettings:
1932
base_url: str
2033
api_key: str
21-
timeout: float = DEFAULT_SCINET_API_TIMEOUT
34+
timeout: float | None = None
35+
default_timeout: float = DEFAULT_SCINET_API_TIMEOUT
36+
search_timeout: float = DEFAULT_SCINET_API_SEARCH_TIMEOUT
37+
authors_related_timeout: float = DEFAULT_SCINET_API_AUTHORS_RELATED_TIMEOUT
38+
authors_papers_timeout: float = DEFAULT_SCINET_API_AUTHORS_PAPERS_TIMEOUT
39+
authors_support_papers_timeout: float = DEFAULT_SCINET_API_SUPPORT_PAPERS_TIMEOUT
40+
connect_timeout: float = DEFAULT_SCINET_API_CONNECT_TIMEOUT
41+
write_timeout: float = DEFAULT_SCINET_API_WRITE_TIMEOUT
42+
pool_timeout: float = DEFAULT_SCINET_API_POOL_TIMEOUT
43+
44+
def __post_init__(self) -> None:
45+
if self.timeout is not None:
46+
object.__setattr__(self, "default_timeout", float(self.timeout))
47+
else:
48+
object.__setattr__(self, "timeout", self.default_timeout)
49+
50+
51+
def _first_timeout_value(
52+
*,
53+
overrides: dict[str, Any],
54+
env_values: dict[str, str],
55+
param_keys: tuple[str, ...],
56+
env_keys: tuple[str, ...],
57+
default: float,
58+
) -> float:
59+
raw_value = ""
60+
for key in param_keys:
61+
raw_value = normalize_whitespace(overrides.get(key))
62+
if raw_value:
63+
break
64+
if not raw_value:
65+
raw_value = get_env_value(env_values, *env_keys)
66+
if not raw_value:
67+
return float(default)
68+
try:
69+
value = float(raw_value)
70+
except ValueError as exc:
71+
keys = ", ".join((*param_keys, *env_keys))
72+
raise ValueError(f"Invalid SciNet API timeout {raw_value!r} for one of: {keys}") from exc
73+
if value <= 0:
74+
keys = ", ".join((*param_keys, *env_keys))
75+
raise ValueError(f"SciNet API timeout must be positive for one of: {keys}")
76+
return value
2277

2378

2479
def load_scinet_api_settings(env_path: Path, params: dict[str, Any] | None = None) -> SciNetApiSettings:
@@ -36,19 +91,102 @@ def load_scinet_api_settings(env_path: Path, params: dict[str, Any] | None = Non
3691
or overrides.get("kg2api_api_key")
3792
or get_env_value(env_values, "SCINET_API_KEY", "SCIMAP_API_KEY", "KG2API_API_KEY")
3893
)
39-
timeout_text = normalize_whitespace(
40-
overrides.get("scinet_api_timeout")
41-
or overrides.get("scimap_api_timeout")
42-
or overrides.get("kg2api_timeout")
43-
or get_env_value(env_values, "SCINET_API_TIMEOUT", "SCIMAP_API_TIMEOUT", "KG2API_TIMEOUT")
94+
default_timeout = _first_timeout_value(
95+
overrides=overrides,
96+
env_values=env_values,
97+
param_keys=(
98+
"scinet_api_timeout_default",
99+
"api_timeout_default",
100+
"scinet_api_timeout",
101+
"scimap_api_timeout",
102+
"kg2api_timeout",
103+
),
104+
env_keys=("SCINET_API_TIMEOUT_DEFAULT", "SCINET_API_TIMEOUT", "SCIMAP_API_TIMEOUT", "KG2API_TIMEOUT"),
105+
default=DEFAULT_SCINET_API_TIMEOUT,
106+
)
107+
search_timeout = _first_timeout_value(
108+
overrides=overrides,
109+
env_values=env_values,
110+
param_keys=("scinet_api_timeout_search", "api_timeout_search", "search_timeout"),
111+
env_keys=("SCINET_API_TIMEOUT_SEARCH", "SCIMAP_API_TIMEOUT_SEARCH", "KG2API_TIMEOUT_SEARCH"),
112+
default=DEFAULT_SCINET_API_SEARCH_TIMEOUT,
113+
)
114+
authors_related_timeout = _first_timeout_value(
115+
overrides=overrides,
116+
env_values=env_values,
117+
param_keys=("scinet_api_timeout_authors_related", "api_timeout_authors_related", "authors_related_timeout"),
118+
env_keys=(
119+
"SCINET_API_TIMEOUT_AUTHORS_RELATED",
120+
"SCIMAP_API_TIMEOUT_AUTHORS_RELATED",
121+
"KG2API_TIMEOUT_AUTHORS_RELATED",
122+
),
123+
default=DEFAULT_SCINET_API_AUTHORS_RELATED_TIMEOUT,
124+
)
125+
authors_papers_timeout = _first_timeout_value(
126+
overrides=overrides,
127+
env_values=env_values,
128+
param_keys=("scinet_api_timeout_authors_papers", "api_timeout_authors_papers", "authors_papers_timeout"),
129+
env_keys=(
130+
"SCINET_API_TIMEOUT_AUTHORS_PAPERS",
131+
"SCIMAP_API_TIMEOUT_AUTHORS_PAPERS",
132+
"KG2API_TIMEOUT_AUTHORS_PAPERS",
133+
),
134+
default=DEFAULT_SCINET_API_AUTHORS_PAPERS_TIMEOUT,
135+
)
136+
authors_support_papers_timeout = _first_timeout_value(
137+
overrides=overrides,
138+
env_values=env_values,
139+
param_keys=(
140+
"scinet_api_timeout_support_papers",
141+
"api_timeout_support_papers",
142+
"support_papers_timeout",
143+
"authors_support_papers_timeout",
144+
),
145+
env_keys=(
146+
"SCINET_API_TIMEOUT_SUPPORT_PAPERS",
147+
"SCIMAP_API_TIMEOUT_SUPPORT_PAPERS",
148+
"KG2API_TIMEOUT_SUPPORT_PAPERS",
149+
),
150+
default=DEFAULT_SCINET_API_SUPPORT_PAPERS_TIMEOUT,
151+
)
152+
connect_timeout = _first_timeout_value(
153+
overrides=overrides,
154+
env_values=env_values,
155+
param_keys=("scinet_api_connect_timeout", "api_connect_timeout"),
156+
env_keys=("SCINET_API_CONNECT_TIMEOUT", "SCIMAP_API_CONNECT_TIMEOUT", "KG2API_CONNECT_TIMEOUT"),
157+
default=DEFAULT_SCINET_API_CONNECT_TIMEOUT,
158+
)
159+
write_timeout = _first_timeout_value(
160+
overrides=overrides,
161+
env_values=env_values,
162+
param_keys=("scinet_api_write_timeout", "api_write_timeout"),
163+
env_keys=("SCINET_API_WRITE_TIMEOUT", "SCIMAP_API_WRITE_TIMEOUT", "KG2API_WRITE_TIMEOUT"),
164+
default=DEFAULT_SCINET_API_WRITE_TIMEOUT,
165+
)
166+
pool_timeout = _first_timeout_value(
167+
overrides=overrides,
168+
env_values=env_values,
169+
param_keys=("scinet_api_pool_timeout", "api_pool_timeout"),
170+
env_keys=("SCINET_API_POOL_TIMEOUT", "SCIMAP_API_POOL_TIMEOUT", "KG2API_POOL_TIMEOUT"),
171+
default=DEFAULT_SCINET_API_POOL_TIMEOUT,
44172
)
45-
timeout = float(timeout_text) if timeout_text else float(DEFAULT_SCINET_API_TIMEOUT)
46173

47174
if not base_url:
48175
raise ValueError(f"Missing SCINET_API_BASE_URL in {env_path}")
49176
if not api_key:
50177
raise ValueError(f"Missing SCINET_API_KEY in {env_path}")
51-
return SciNetApiSettings(base_url=base_url.rstrip("/"), api_key=api_key, timeout=timeout)
178+
return SciNetApiSettings(
179+
base_url=base_url.rstrip("/"),
180+
api_key=api_key,
181+
default_timeout=default_timeout,
182+
search_timeout=search_timeout,
183+
authors_related_timeout=authors_related_timeout,
184+
authors_papers_timeout=authors_papers_timeout,
185+
authors_support_papers_timeout=authors_support_papers_timeout,
186+
connect_timeout=connect_timeout,
187+
write_timeout=write_timeout,
188+
pool_timeout=pool_timeout,
189+
)
52190

53191

54192
class SciNetApiClient:
@@ -59,7 +197,7 @@ def __init__(self, settings: SciNetApiSettings) -> None:
59197
self._httpx = httpx
60198
self._client = httpx.Client(
61199
base_url=settings.base_url,
62-
timeout=settings.timeout,
200+
timeout=self._make_timeout(settings.default_timeout),
63201
trust_env=False,
64202
headers={
65203
"Content-Type": "application/json",
@@ -76,13 +214,45 @@ def __enter__(self) -> "SciNetApiClient":
76214
def __exit__(self, exc_type, exc, tb) -> None:
77215
self.close()
78216

217+
def _make_timeout(self, read_timeout: float) -> Any:
218+
return self._httpx.Timeout(
219+
connect=self.settings.connect_timeout,
220+
write=self.settings.write_timeout,
221+
pool=self.settings.pool_timeout,
222+
read=read_timeout,
223+
)
224+
225+
def _read_timeout_for_path(self, path: str) -> float:
226+
if path == "/v1/search":
227+
return self.settings.search_timeout
228+
if path == "/v1/authors/related":
229+
return self.settings.authors_related_timeout
230+
if path == "/v1/authors/papers":
231+
return self.settings.authors_papers_timeout
232+
if path == "/v1/authors/support-papers":
233+
return self.settings.authors_support_papers_timeout
234+
return self.settings.default_timeout
235+
79236
def _request(self, path: str, payload: dict[str, Any]) -> dict[str, Any]:
237+
read_timeout = self._read_timeout_for_path(path)
238+
started_at = monotonic()
80239
try:
81-
response = self._client.post(path, json=payload)
240+
response = self._client.post(path, json=payload, timeout=self._make_timeout(read_timeout))
82241
except self._httpx.TimeoutException as exc:
83-
raise SciNetApiError(f"SciNet API request timed out for {path}") from exc
242+
elapsed = monotonic() - started_at
243+
raise SciNetApiError(
244+
f"SciNet API {path} timed out after {elapsed:.1f}s "
245+
f"(read_timeout={read_timeout:.1f}s, base_url={self.settings.base_url})"
246+
) from exc
84247
except self._httpx.HTTPError as exc:
85-
raise SciNetApiError(f"SciNet API request failed for {path}: {exc}") from exc
248+
elapsed = monotonic() - started_at
249+
message = f"SciNet API request failed for {path} after {elapsed:.1f}s: {exc}"
250+
if isinstance(exc, self._httpx.RemoteProtocolError):
251+
message += (
252+
" This can happen when the backend interrupts a long-running request; "
253+
"increase the endpoint timeout and check server logs."
254+
)
255+
raise SciNetApiError(message) from exc
86256

87257
raw_body = response.text
88258
try:
@@ -92,9 +262,13 @@ def _request(self, path: str, payload: dict[str, Any]) -> dict[str, Any]:
92262

93263
if response.status_code >= 400:
94264
detail = None
265+
request_id = ""
95266
if isinstance(body, dict):
96267
detail = body.get("detail") or body.get("error") or body.get("message")
268+
request_id = normalize_whitespace(body.get("request_id"))
97269
message = normalize_whitespace(detail) or raw_body or f"HTTP {response.status_code}"
270+
if request_id:
271+
message = f"{message} (request_id={request_id})"
98272
raise SciNetApiError(
99273
f"SciNet API {path} returned {response.status_code}: {message}",
100274
status_code=response.status_code,

scinet/core/common.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,13 @@
1414
DEFAULT_LLM_BASE_URL = "https://api.openai.com/v1"
1515
DEFAULT_LLM_MODEL_NAME = "gpt-4.1-mini"
1616
DEFAULT_SCINET_API_TIMEOUT = 120
17+
DEFAULT_SCINET_API_SEARCH_TIMEOUT = 1800
18+
DEFAULT_SCINET_API_AUTHORS_RELATED_TIMEOUT = 120
19+
DEFAULT_SCINET_API_AUTHORS_PAPERS_TIMEOUT = 300
20+
DEFAULT_SCINET_API_SUPPORT_PAPERS_TIMEOUT = 600
21+
DEFAULT_SCINET_API_CONNECT_TIMEOUT = 10
22+
DEFAULT_SCINET_API_WRITE_TIMEOUT = 60
23+
DEFAULT_SCINET_API_POOL_TIMEOUT = 10
1724

1825

1926
def normalize_whitespace(text: Any) -> str:

0 commit comments

Comments
 (0)