Skip to content

Commit 418058e

Browse files
committed
Add a collection visualisation page
1 parent 2d6869a commit 418058e

File tree

9 files changed

+131
-19
lines changed

9 files changed

+131
-19
lines changed

config/autofocuses.yaml

Lines changed: 0 additions & 1 deletion
This file was deleted.

config/basic.yaml

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,21 @@ paperoni:
88
paperoni_v2: _
99
fetch:
1010
$class: RulesFetcher
11+
simultaneous:
12+
"*": 3
1113
rules:
14+
"^https://api.ror.org/": standard
1215
"^https://export.arxiv.org/": standard
1316
"^https://api.crossref.org/": cached
1417
"^https://api.semanticscholar.org/": cached
18+
"^https://api.openalex.org/": cached
19+
"^https://api.datacite.org/": cached
20+
"^https://api.unpaywall.org/": cached
1521
"^https://www.biorxiv.org/": sequence
1622
".*": sequence
1723
fetchers:
1824
standard:
19-
$class: RequestsFetcher
25+
$class: HTTPXFetcher
2026
user_agent: chrome
2127
cached:
2228
$class: CachedFetcher
@@ -28,20 +34,20 @@ paperoni:
2834
sequence:
2935
$class: SequenceFetcher
3036
fetchers:
37+
- $class: HTTPXFetcher
38+
user_agent: chrome
3139
- $class: CloudFlareFetcher
3240
user_agent: chrome
3341
- $class: ScraperAPIFetcher
3442
cache_path: ${paperoni.cache_path}/scraper-cache.sql
3543
expire_after: 100d
3644
api_key: ${paperoni.api_keys.scraperapi}
37-
focuses:
38-
$include:
39-
- focuses.yaml
40-
- autofocuses.yaml
45+
focuses: focuses.yaml
4146
autofocus:
4247
author:
4348
score: 1
4449
institution_score_threshold: 1
50+
threshold: 1
4551
autovalidate:
4652
score_threshold: 10.0
4753
discovery:
@@ -68,6 +74,9 @@ paperoni:
6874
$class: paperoni.collection.filecoll:FileCollection
6975
file: ${paperoni.data_path}/collection.json
7076
server:
77+
host: localhost
78+
port: 8000
79+
protocol: http
7180
max_results: 10000
7281
auth:
7382
server_metadata_url: https://accounts.google.com/.well-known/openid-configuration
@@ -89,4 +98,4 @@ paperoni:
8998
user: []
9099
search: [user]
91100
validate: [search]
92-
dev: []
101+
dev: [validate]

pyproject.toml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ dev = [
6363
"pytest-asyncio>=1.3.0",
6464
"ruff>=0.11.3",
6565
]
66+
lint = [
67+
"ruff>=0.11.3",
68+
]
6669

6770
[project.entry-points."paperoni.discovery"]
6871
semantic_scholar = "paperoni.discovery.semantic_scholar:SemanticScholar"
@@ -150,6 +153,19 @@ mongo = [
150153
"hatch run mongo:stop",
151154
]
152155

156+
[tool.hatch.envs.lint]
157+
installer = "uv"
158+
detached = true
159+
dependency-groups = ["lint"]
160+
161+
[tool.hatch.envs.lint.scripts]
162+
uv = 'UV_PROJECT_ENVIRONMENT="{env:VIRTUAL_ENV}" "$(which uv)" {args}'
163+
lint = [
164+
"uv sync --no-dev --no-install-workspace --group lint",
165+
"ruff check --fix",
166+
"ruff format",
167+
]
168+
153169
[tool.hatch.envs.pixi]
154170
detached = true
155171
installer = "uv"

src/paperoni/__main__.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,7 @@ async def run(self, work: "Work"):
553553
# [alias: -f]
554554
focus_file: Path = None
555555

556-
# Collection dir
556+
# Collection file
557557
# [alias: -c]
558558
collection_file: Path = None
559559

@@ -848,17 +848,17 @@ async def run(self, coll: "Coll"):
848848
# Command to execute
849849
command: TaggedUnion[Search, Import, Export, Drop, Validate, Diff, Operate]
850850

851-
# Collection dir
851+
# Collection string. Can be a remote collection URL or a path.
852852
# [alias: -c]
853-
collection_path: str = None
853+
collection_path: Path = None
854854

855855
@cached_property
856856
def collection(self) -> PaperCollection:
857857
if self.collection_path:
858-
if self.collection_path.startswith("http"):
859-
return RemoteCollection(endpoint=self.collection_path)
858+
if str(self.collection_path).startswith("http"):
859+
return RemoteCollection(endpoint=str(self.collection_path))
860860
else:
861-
return FileCollection(file=Path(self.collection_path))
861+
return FileCollection(file=self.collection_path)
862862
else:
863863
return config.collection
864864

@@ -914,7 +914,7 @@ async def run(self, focus: "Focus"):
914914
# [option: -f]
915915
focuses: Focuses @ FileProxy() = None
916916

917-
# Collection dir
917+
# Collection file
918918
# [alias: -c]
919919
collection_file: Path = None
920920

@@ -947,6 +947,10 @@ class Serve:
947947
# Whether to enable auth
948948
auth: bool = True
949949

950+
# Collection file
951+
# [alias: -c]
952+
collection_file: Path = None
953+
950954
async def run(self):
951955
from .web import create_app
952956

@@ -959,6 +963,11 @@ async def run(self):
959963
"__admin__": ["admin"]
960964
},
961965
}
966+
if self.collection_file:
967+
overrides["paperoni.collection"] = {
968+
"$class": f"{FileCollection.__module__}:{FileCollection.__qualname__}",
969+
"file": str(self.collection_file.resolve()),
970+
}
962971
with gifnoc.overlay(overrides):
963972
app = create_app()
964973
ssl_config = config.server.ssl

src/paperoni/collection/filecoll.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import stat
2+
import warnings
13
from dataclasses import dataclass, field
24
from pathlib import Path
35

@@ -10,10 +12,22 @@
1012
@dataclass(kw_only=True)
1113
class FileCollection(MemCollection):
1214
file: Path = field(compare=False)
15+
read_only: bool = False
1316

1417
def __post_init__(self):
1518
ann = FileProxy(default_factory=PaperIndex, refresh=True)
1619
self._index = deserialize(PaperIndex @ ann, str(self.file))
1720

21+
# Check if file is read-only
22+
if self.read_only or (
23+
self.file.exists() and self.file.stat().st_mode & stat.S_IWRITE == 0
24+
):
25+
self.read_only = True
26+
1827
def _commit(self) -> None:
28+
if self.read_only:
29+
warnings.warn(
30+
f"Collection {self.file} is open in read-only mode, skipping commit."
31+
)
32+
return
1933
self._index.save()

src/paperoni/web/assets/workset.js

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import { html } from './common.js';
22
import {
33
createAuthorsSection,
4-
createReleasesSection,
54
createDetailsSection,
5+
createReleasesSection,
66
getScoreClass
77
} from './paper.js';
88

@@ -30,6 +30,22 @@ async function fetchWorksets(offset = 0, size = 100) {
3030
return await response.json();
3131
}
3232

33+
async function fetchCollection(offset = 0, limit = 100) {
34+
const queryParams = new URLSearchParams({
35+
offset: offset.toString(),
36+
limit: limit.toString(),
37+
});
38+
39+
const url = `/api/v1/search?${queryParams.toString()}`;
40+
const response = await fetch(url);
41+
42+
if (!response.ok) {
43+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
44+
}
45+
46+
return await response.json();
47+
}
48+
3349
function createInfoValue(value) {
3450
if (value === null || value === undefined) {
3551
return html`<span class="info-null">null</span>`;
@@ -105,11 +121,11 @@ function createWorksetPaperElement(paper) {
105121
`;
106122
}
107123

108-
function createWorksetElement(scoredWorkset, index) {
109-
// scoredWorkset is Scored[PaperWorkingSet]
124+
function createWorksetElement(scoredWorksetOrPaper, index) {
125+
// scoredWorkset is Scored[PaperWorkingSet] or Paper
110126
// It has: { score: float, value: PaperWorkingSet }
111-
const score = scoredWorkset.score;
112-
const workset = scoredWorkset.value;
127+
const score = scoredWorksetOrPaper.score;
128+
const workset = scoredWorksetOrPaper.value ?? { current: scoredWorksetOrPaper, collected: [scoredWorksetOrPaper] };
113129
const current = workset.current;
114130
const collected = workset.collected || [];
115131

@@ -340,3 +356,15 @@ export async function displayWorksets() {
340356
displayError(error);
341357
}
342358
}
359+
360+
export async function displayCollection() {
361+
displayLoading();
362+
363+
try {
364+
const data = await fetchCollection();
365+
renderWorksets(data);
366+
} catch (error) {
367+
console.error('Failed to load collection:', error);
368+
displayError(error);
369+
}
370+
}

src/paperoni/web/search.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,4 +65,19 @@ async def exclusions_page(request: Request):
6565
validation_buttons=False,
6666
)
6767

68+
@app.get("/visualize")
69+
async def visualize_page(
70+
request: Request,
71+
user: str = Depends(hascap("dev", redirect=True)),
72+
):
73+
"""Render the collection visualization page."""
74+
validate = deserialize(app.auth.capabilities.captype, "validate")
75+
is_validator = app.auth.capabilities.check(user, validate)
76+
return render_template(
77+
"visualize.html",
78+
request,
79+
is_validator=is_validator,
80+
validation_buttons=False,
81+
)
82+
6883
return app
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{% extends "main.html" %}
2+
3+
{% block title %}Visualize{% endblock %}
4+
5+
{% block page_title %}Visualize{% endblock %}
6+
7+
{% block content %}
8+
<div class="workset-container">
9+
<div id="worksetContainer"></div>
10+
</div>
11+
{% endblock %}
12+
13+
{% block scripts %}
14+
<script type="module">
15+
import { displayCollection } from '/assets/workset.js';
16+
displayCollection();
17+
</script>
18+
{% endblock %}

uv.lock

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)