Skip to content

Commit a8ccac2

Browse files
authored
Merge pull request #10057 from benbdeitch/third-try-trending
Add Trending Field to Solr
2 parents e6c2f33 + 13c1a24 commit a8ccac2

File tree

23 files changed

+655
-206
lines changed

23 files changed

+655
-206
lines changed

compose.production.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -256,8 +256,8 @@ services:
256256
environment:
257257
- OL_CONFIG=/olsystem/etc/openlibrary.yml
258258
- OL_URL=https://openlibrary.org/
259-
- EXTRA_OPTS=--solr-url http://ol-solr0:8984/solr/openlibrary
260-
--no-solr-next
259+
- OL_SOLR_BASE_URL=http://ol-solr0:8984/solr/openlibrary
260+
- EXTRA_OPTS=--no-solr-next
261261
volumes:
262262
- ../olsystem:/olsystem
263263
logging:
@@ -276,9 +276,9 @@ services:
276276
environment:
277277
- OL_CONFIG=/olsystem/etc/openlibrary.yml
278278
- OL_URL=https://openlibrary.org/
279+
- OL_SOLR_BASE_URL=http://ol-solr1:8984/solr/openlibrary
279280
- STATE_FILE=solr-next-update.offset
280-
- EXTRA_OPTS=--solr-url http://ol-solr1:8984/solr/openlibrary
281-
--solr-next
281+
- EXTRA_OPTS=--solr-next
282282
volumes:
283283
- solr-updater-data:/solr-updater-data
284284
- ../olsystem:/olsystem

conf/solr/conf/managed-schema.xml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,41 @@
203203
<field name="ratings_count_4" type="pint"/>
204204
<field name="ratings_count_5" type="pint"/>
205205

206+
<!-- Trending related values-->
207+
<field name="trending_score_hourly_0" type="pint" indexed="false" stored="false"/>
208+
<field name="trending_score_hourly_1" type="pint" indexed="false" stored="false"/>
209+
<field name="trending_score_hourly_2" type="pint" indexed="false" stored="false"/>
210+
<field name="trending_score_hourly_3" type="pint" indexed="false" stored="false"/>
211+
<field name="trending_score_hourly_4" type="pint" indexed="false" stored="false"/>
212+
<field name="trending_score_hourly_5" type="pint" indexed="false" stored="false"/>
213+
<field name="trending_score_hourly_6" type="pint" indexed="false" stored="false"/>
214+
<field name="trending_score_hourly_7" type="pint" indexed="false" stored="false"/>
215+
<field name="trending_score_hourly_8" type="pint" indexed="false" stored="false"/>
216+
<field name="trending_score_hourly_9" type="pint" indexed="false" stored="false"/>
217+
<field name="trending_score_hourly_10" type="pint" indexed="false" stored="false"/>
218+
<field name="trending_score_hourly_11" type="pint" indexed="false" stored="false"/>
219+
<field name="trending_score_hourly_12" type="pint" indexed="false" stored="false"/>
220+
<field name="trending_score_hourly_13" type="pint" indexed="false" stored="false"/>
221+
<field name="trending_score_hourly_14" type="pint" indexed="false" stored="false"/>
222+
<field name="trending_score_hourly_15" type="pint" indexed="false" stored="false"/>
223+
<field name="trending_score_hourly_16" type="pint" indexed="false" stored="false"/>
224+
<field name="trending_score_hourly_17" type="pint" indexed="false" stored="false"/>
225+
<field name="trending_score_hourly_18" type="pint" indexed="false" stored="false"/>
226+
<field name="trending_score_hourly_19" type="pint" indexed="false" stored="false"/>
227+
<field name="trending_score_hourly_20" type="pint" indexed="false" stored="false"/>
228+
<field name="trending_score_hourly_21" type="pint" indexed="false" stored="false"/>
229+
<field name="trending_score_hourly_22" type="pint" indexed="false" stored="false"/>
230+
<field name="trending_score_hourly_23" type="pint" indexed="false" stored="false"/>
231+
<field name="trending_score_hourly_sum" type="pint" indexed="false" stored="false" />
232+
<field name="trending_score_daily_0" type="pint" indexed="false" stored="false"/>
233+
<field name="trending_score_daily_1" type="pint" indexed="false" stored="false"/>
234+
<field name="trending_score_daily_2" type="pint" indexed="false" stored="false"/>
235+
<field name="trending_score_daily_3" type="pint" indexed="false" stored="false"/>
236+
<field name="trending_score_daily_4" type="pint" indexed="false" stored="false"/>
237+
<field name="trending_score_daily_5" type="pint" indexed="false" stored="false"/>
238+
<field name="trending_score_daily_6" type="pint" indexed="false" stored="false"/>
239+
<field name="trending_z_score" type="pfloat" indexed="false" stored="false"/>
240+
206241
<!-- Reading Log -->
207242
<field name="readinglog_count" type="pint"/>
208243
<field name="want_to_read_count" type="pint"/>

docker/ol-solr-updater-start.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@ python --version
44
OSP_DUMP_LOCATION="/solr-updater-data/osp_totals.db"
55
# If the osp dump file does not exist, download it. Takes ~30s
66
# Keep link in sync with Makefile and Jenkinsfile
7-
wget --output-document=$OSP_DUMP_LOCATION \
8-
--progress=dot:giga --no-clobber \
7+
curl -L --output $OSP_DUMP_LOCATION \
8+
--progress-bar --continue-at - \
99
https://archive.org/download/2023_openlibrary_osp_counts/osp_totals.db
1010

1111
ls -la /solr-updater-data/
12-
python scripts/solr_updater.py $OL_CONFIG \
12+
PYTHONPATH=. python scripts/solr_updater/solr_updater.py $OL_CONFIG \
1313
--state-file /solr-updater-data/$STATE_FILE \
1414
--ol-url "$OL_URL" \
1515
--osp-dump "$OSP_DUMP_LOCATION" \

openlibrary/core/schema.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ CREATE TABLE bookshelves_books (
5252
primary key (username, work_id, bookshelf_id)
5353
);
5454
CREATE INDEX bookshelves_books_work_id_idx ON bookshelves_books (work_id);
55-
55+
CREATE INDEX bookshelves_books_updated_idx ON bookshelves_books (updated);
5656
INSERT INTO bookshelves (name, description) VALUES ('Want to Read', 'A list of books I want to read');
5757
INSERT INTO bookshelves (name, description) VALUES ('Currently Reading', 'A list of books I am currently reading');
5858
INSERT INTO bookshelves (name, description) VALUES ('Already Read', 'A list of books I have finished reading');

openlibrary/plugins/worksearch/code.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,9 @@ def process_facet_counts(
143143

144144

145145
def execute_solr_query(
146-
solr_path: str, params: dict | list[tuple[str, Any]]
146+
solr_path: str,
147+
params: dict | list[tuple[str, Any]],
148+
_timeout: int | None = None,
147149
) -> Response | None:
148150
url = solr_path
149151
if params:
@@ -152,7 +154,11 @@ def execute_solr_query(
152154

153155
stats.begin("solr", url=url)
154156
try:
155-
response = get_solr().raw_request(solr_path, urlencode(params))
157+
response = get_solr().raw_request(
158+
solr_path,
159+
urlencode(params),
160+
_timeout=_timeout,
161+
)
156162
except requests.HTTPError:
157163
logger.exception("Failed solr query")
158164
return None

openlibrary/plugins/worksearch/schemes/works.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ class WorkSearchScheme(SearchScheme):
9797
"lcc_sort",
9898
"ddc_sort",
9999
"osp_count",
100+
# Trending
101+
"trending_score_hourly_sum",
102+
"trending_z_score",
100103
}
101104
)
102105
non_solr_fields = frozenset(
@@ -141,6 +144,8 @@ class WorkSearchScheme(SearchScheme):
141144
'editions': 'edition_count desc',
142145
'old': 'def(first_publish_year, 9999) asc',
143146
'new': 'first_publish_year desc',
147+
'daily': 'trending_score_hourly_sum desc',
148+
'trending': 'trending_z_score desc',
144149
'rating': 'ratings_sortable desc',
145150
'rating asc': 'ratings_sortable asc',
146151
'rating desc': 'ratings_sortable desc',

openlibrary/plugins/worksearch/search.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
"""Search utilities."""
22

3+
import os
4+
35
from infogami import config
46
from openlibrary.utils.solr import Solr
57

@@ -9,6 +11,10 @@
911
def get_solr():
1012
global _ACTIVE_SOLR
1113
if not _ACTIVE_SOLR:
12-
base_url = config.plugin_worksearch.get('solr_base_url')
14+
if os.environ.get('OL_SOLR_BASE_URL'):
15+
base_url = os.environ['OL_SOLR_BASE_URL']
16+
else:
17+
base_url = config.plugin_worksearch.get('solr_base_url')
18+
1319
_ACTIVE_SOLR = Solr(base_url)
1420
return _ACTIVE_SOLR

openlibrary/solr/data_provider.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import itertools
1111
import logging
1212
import re
13+
import typing
1314
from collections.abc import Iterable, Sized
1415
from typing import TypedDict, cast
1516

@@ -23,6 +24,7 @@
2324
from openlibrary.core import ia
2425
from openlibrary.core.bookshelves import Bookshelves
2526
from openlibrary.core.ratings import Ratings, WorkRatingsSummary
27+
from openlibrary.solr.utils import get_solr_base_url
2628
from openlibrary.utils import extract_numeric_id_from_olid
2729

2830
logger = logging.getLogger("openlibrary.solr.data_provider")
@@ -288,6 +290,13 @@ def get_editions_of_work(self, work):
288290
"""
289291
raise NotImplementedError
290292

293+
async def get_trending_data(self, work_key: str) -> dict:
294+
"""
295+
Fetches the record's information from Solr.
296+
:param work_key: type-prefixed key, eg /works/OL1W
297+
"""
298+
return {}
299+
291300
def get_work_ratings(self, work_key: str) -> WorkRatingsSummary | None:
292301
raise NotImplementedError
293302

@@ -340,6 +349,39 @@ def clear_cache(self):
340349
# Nothing's cached, so nothing to clear!
341350
return
342351

352+
@typing.override
353+
async def get_trending_data(self, work_key: str) -> dict:
354+
async with httpx.AsyncClient() as client:
355+
response = await client.get(
356+
get_solr_base_url() + '/get',
357+
params={
358+
'id': work_key,
359+
"fl": ','.join( # noqa: FLY002
360+
(
361+
"trending_score_hourly_sum",
362+
'trending_score_hourly_*',
363+
"trending_score_daily_*",
364+
"trending_z_score",
365+
)
366+
),
367+
},
368+
)
369+
response.raise_for_status()
370+
solr_doc = response.json()['doc'] or {}
371+
372+
return {
373+
field: solr_doc.get(field, 0) for field in get_all_trending_fields()
374+
}
375+
376+
377+
def get_all_trending_fields():
378+
for index in range(24):
379+
yield f'trending_score_hourly_{index}'
380+
for index in range(7):
381+
yield f'trending_score_daily_{index}'
382+
yield 'trending_score_hourly_sum'
383+
yield 'trending_z_score'
384+
343385

344386
class ExternalDataProvider(DataProvider):
345387
"""

openlibrary/solr/solr_types.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,39 @@ class SolrDocument(TypedDict):
6969
ratings_count_3: Optional[int]
7070
ratings_count_4: Optional[int]
7171
ratings_count_5: Optional[int]
72+
trending_score_hourly_0: Optional[int]
73+
trending_score_hourly_1: Optional[int]
74+
trending_score_hourly_2: Optional[int]
75+
trending_score_hourly_3: Optional[int]
76+
trending_score_hourly_4: Optional[int]
77+
trending_score_hourly_5: Optional[int]
78+
trending_score_hourly_6: Optional[int]
79+
trending_score_hourly_7: Optional[int]
80+
trending_score_hourly_8: Optional[int]
81+
trending_score_hourly_9: Optional[int]
82+
trending_score_hourly_10: Optional[int]
83+
trending_score_hourly_11: Optional[int]
84+
trending_score_hourly_12: Optional[int]
85+
trending_score_hourly_13: Optional[int]
86+
trending_score_hourly_14: Optional[int]
87+
trending_score_hourly_15: Optional[int]
88+
trending_score_hourly_16: Optional[int]
89+
trending_score_hourly_17: Optional[int]
90+
trending_score_hourly_18: Optional[int]
91+
trending_score_hourly_19: Optional[int]
92+
trending_score_hourly_20: Optional[int]
93+
trending_score_hourly_21: Optional[int]
94+
trending_score_hourly_22: Optional[int]
95+
trending_score_hourly_23: Optional[int]
96+
trending_score_hourly_sum: Optional[int]
97+
trending_score_daily_0: Optional[int]
98+
trending_score_daily_1: Optional[int]
99+
trending_score_daily_2: Optional[int]
100+
trending_score_daily_3: Optional[int]
101+
trending_score_daily_4: Optional[int]
102+
trending_score_daily_5: Optional[int]
103+
trending_score_daily_6: Optional[int]
104+
trending_z_score: Optional[float]
72105
readinglog_count: Optional[int]
73106
want_to_read_count: Optional[int]
74107
currently_reading_count: Optional[int]

openlibrary/solr/updater/work.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from openlibrary.solr.solr_types import SolrDocument
2020
from openlibrary.solr.updater.abstract import AbstractSolrBuilder, AbstractSolrUpdater
2121
from openlibrary.solr.updater.edition import EditionSolrBuilder
22-
from openlibrary.solr.utils import SolrUpdateRequest, str_to_key
22+
from openlibrary.solr.utils import SolrUpdateRequest, get_solr_next, str_to_key
2323
from openlibrary.utils import uniq
2424
from openlibrary.utils.ddc import choose_sorting_ddc, normalize_ddc
2525
from openlibrary.utils.lcc import choose_sorting_lcc, short_lcc_to_sortable_lcc
@@ -107,8 +107,19 @@ async def update_key(self, work: dict) -> tuple[SolrUpdateRequest, list[str]]:
107107
for iaid in iaids
108108
}
109109

110+
trending_data = {}
111+
if get_solr_next():
112+
trending_data = await self.data_provider.get_trending_data(
113+
work['key']
114+
)
115+
110116
solr_doc = WorkSolrBuilder(
111-
work, editions, authors, self.data_provider, ia_metadata
117+
work,
118+
editions,
119+
authors,
120+
self.data_provider,
121+
ia_metadata,
122+
trending_data,
112123
).build()
113124
except: # noqa: E722
114125
logger.error("failed to update work %s", work['key'], exc_info=True)
@@ -256,12 +267,14 @@ def __init__(
256267
authors: list[dict],
257268
data_provider: DataProvider,
258269
ia_metadata: dict[str, Optional['bp.IALiteMetadata']],
270+
trending_data: dict,
259271
):
260272
self._work = work
261273
self._editions = editions
262274
self._authors = authors
263275
self._ia_metadata = ia_metadata
264276
self._data_provider = data_provider
277+
self._trending_data = trending_data
265278
self._solr_editions = [
266279
EditionSolrBuilder(
267280
e, self, self._ia_metadata.get(e.get('ocaid', '').strip())
@@ -276,7 +289,7 @@ def build(self) -> SolrDocument:
276289
doc |= self.build_legacy_ia_fields()
277290
doc |= self.build_ratings() or {}
278291
doc |= self.build_reading_log() or {}
279-
292+
doc |= self._trending_data
280293
return cast(SolrDocument, doc)
281294

282295
@property

0 commit comments

Comments
 (0)