Skip to content

Commit ae57158

Browse files
Merge pull request #140 from NatLibFi/EKIR-306-Handle-titles-appearing-in-feed-multiple-times
Ekir 306 handle titles appearing in feed multiple times
2 parents 25dae27 + d17d12c commit ae57158

File tree

5 files changed

+262
-26
lines changed

5 files changed

+262
-26
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
"""is_missing and last_checked to License
2+
3+
Revision ID: 4a14dc789118
4+
Revises: d3aaeb6a9e6b
5+
Create Date: 2025-03-14 07:21:18.964916+00:00
6+
7+
"""
8+
import sqlalchemy as sa
9+
10+
from alembic import op
11+
12+
# revision identifiers, used by Alembic.
13+
revision = "4a14dc789118"
14+
down_revision = "d3aaeb6a9e6b"
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade() -> None:
20+
# ### commands auto generated by Alembic - please adjust! ###
21+
op.add_column(
22+
"licenses", sa.Column("last_checked", sa.DateTime(timezone=True), nullable=True)
23+
)
24+
op.add_column("licenses", sa.Column("is_missing", sa.Boolean(), nullable=False))
25+
# ### end Alembic commands ###
26+
27+
28+
def downgrade() -> None:
29+
# ### commands auto generated by Alembic - please adjust! ###
30+
op.drop_column("licenses", "is_missing")
31+
op.drop_column("licenses", "last_checked")
32+
# ### end Alembic commands ###

core/metadata_layer.py

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,9 @@ def add_to_pool(self, db: Session, pool: LicensePool):
553553
for key, value in vars(self).items():
554554
if key != "content_types":
555555
setattr(license_obj, key, value)
556+
# Update the license details
557+
license_obj.is_missing = False
558+
license_obj.last_checked = utc_now()
556559
return license_obj
557560

558561

@@ -1032,13 +1035,27 @@ def apply(
10321035
new_licenses = [
10331036
license.add_to_pool(_db, pool) for license in self.licenses
10341037
]
1035-
for license in old_licenses:
1036-
if license not in new_licenses:
1037-
# In case a license is removed from the feed we need to set it to be unavailable so that it's not used for loans or statistics.
1038-
license.status = LicenseStatus.unavailable
1039-
self.log.warning(
1040-
f"License {license.identifier} has been removed from feed and set to be unavailable"
1041-
)
1038+
# Exaggerate import duration a bit
1039+
import_duration = utc_now() - datetime.timedelta(hours=2)
1040+
for old_license in old_licenses:
1041+
if old_license not in new_licenses:
1042+
# A work can appear several times in the feed with a different license each time. If a license
1043+
# is seen at any time, it's details are kept as they were at that time.
1044+
if (
1045+
old_license.is_missing == False
1046+
and old_license.last_checked
1047+
and old_license.last_checked >= import_duration
1048+
):
1049+
pass
1050+
# In case a license is removed from the feed we need to set it to be unavailable so that
1051+
# it's not used for loans or statistics.
1052+
else:
1053+
old_license.is_missing = True
1054+
old_license.last_checked = utc_now()
1055+
old_license.status = LicenseStatus.unavailable
1056+
self.log.warning(
1057+
f"License {old_license.identifier} is missing from feed so set to be unavailable!"
1058+
)
10421059
changed_availability = pool.update_availability_from_licenses(
10431060
as_of=self.last_checked,
10441061
)

core/model/licensing.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,10 @@ class License(Base, LicenseFunctions):
145145
# License info document terms.concurrency field
146146
terms_concurrency = Column(Integer)
147147

148+
# For tracking missing licenses in import
149+
last_checked = Column(DateTime(timezone=True), default=None)
150+
is_missing = Column(Boolean, default=False, nullable=False)
151+
148152
# A License belongs to one LicensePool.
149153
license_pool_id = Column(Integer, ForeignKey("licensepools.id"), index=True)
150154
license_pool: Mapped[LicensePool] = relationship(

tests/core/test_circulation_data.py

Lines changed: 200 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from copy import deepcopy
33

44
import pytest
5+
from freezegun import freeze_time
56

67
from core.metadata_layer import (
78
CirculationData,
@@ -298,50 +299,230 @@ def test_apply_updates_existing_licenses(self, db: DatabaseTransactionFixture):
298299
assert new_license.id == old_license.id
299300
assert old_license.status == LicenseStatus.unavailable
300301

301-
def test_apply_updates_existing_license_when_removed_from_feed(
302+
@freeze_time("2025-03-13T07:00:00+00:00")
303+
def test_apply_updates_multiple_licenses_with_partial_imports(
302304
self, db: DatabaseTransactionFixture, caplog
303305
):
306+
"""This test covers:
307+
- Licenses that used to be in the pool have disappeared from the feed in which case they should be made
308+
unavailable
309+
- The same work can appear several times in the feed and have a different license each time.
310+
Make sure that all existing licenses are not incorrectly set as unavailable - unless they have really
311+
expired."""
304312
edition, pool = db.edition(with_license_pool=True)
305313

306-
# Start with one license for this pool.
307-
existing_license = db.license(
314+
hour_ago = utc_now() - datetime.timedelta(hours=1)
315+
316+
# Our licensepool in the database has four licenses:
317+
# Valid license A
318+
license_a = db.license(
308319
pool,
320+
identifier="A",
309321
expires=None,
310322
checkouts_left=2,
311323
checkouts_available=3,
312324
status=LicenseStatus.available,
325+
is_missing=None,
326+
last_checked=None,
327+
)
328+
# Valid license B
329+
license_b = db.license(
330+
pool,
331+
identifier="B",
332+
expires=None,
333+
checkouts_left=1,
334+
checkouts_available=2,
335+
status=LicenseStatus.available,
336+
is_missing=None,
337+
last_checked=None,
338+
)
339+
# Valid license C
340+
missing_license = db.license(
341+
pool,
342+
identifier="C",
343+
expires=None,
344+
checkouts_left=5,
345+
checkouts_available=5,
346+
status=LicenseStatus.available,
347+
is_missing=None,
348+
last_checked=None,
349+
)
350+
# And license D that shows no more checkouts and an already unavailable status.
351+
expired_license = db.license(
352+
pool,
353+
identifier="D",
354+
expires=None,
355+
checkouts_left=0,
356+
checkouts_available=0,
357+
status=LicenseStatus.unavailable,
358+
is_missing=None,
359+
last_checked=None,
313360
)
314361

315-
assert isinstance(existing_license.identifier, str)
316-
assert existing_license.checkouts_left == 2
317-
assert existing_license.checkouts_available == 3
318-
assert existing_license.status == LicenseStatus.available
319-
320-
# The feed does not include the license
362+
# First time we see this work (and pool) during import: Only license A and D appear.
321363
circulation_data = CirculationData(
322-
licenses=[], # No licenses in the updated feed
364+
licenses=[
365+
LicenseData(
366+
identifier="A",
367+
status=LicenseStatus.available,
368+
checkouts_available=3,
369+
checkout_url="whatever",
370+
status_url="whatever",
371+
),
372+
LicenseData(
373+
identifier="D",
374+
status=LicenseStatus.unavailable,
375+
checkouts_available=0,
376+
checkout_url="whatever",
377+
status_url="whatever",
378+
),
379+
],
323380
data_source=edition.data_source,
324381
primary_identifier=edition.primary_identifier,
325382
)
326383
with caplog.at_level("WARNING"):
327384
circulation_data.apply(db.session, pool.collection)
328385
db.session.commit()
329386

330-
updated_license = pool.licenses[0]
331-
assert updated_license.id == existing_license.id
387+
# License A is available
388+
assert license_a.status == LicenseStatus.available
389+
assert license_a.is_missing is False
390+
assert license_a.last_checked is not None and license_a.last_checked >= hour_ago
391+
# License B and missing_license are missing (plus they haven't been checked before)
392+
assert license_b.status == LicenseStatus.unavailable
393+
assert license_b.is_missing is True
394+
assert license_b.last_checked is not None and license_b.last_checked >= hour_ago
395+
assert (
396+
f"License {license_b.identifier} is missing from feed so set to be unavailable!"
397+
in caplog.text
398+
)
399+
assert missing_license.status == LicenseStatus.unavailable
332400
assert (
333-
updated_license.status == LicenseStatus.unavailable
334-
) # Status should have changed
401+
missing_license.last_checked is not None
402+
and missing_license.last_checked >= hour_ago
403+
)
404+
assert missing_license.is_missing is True
335405
assert (
336-
updated_license.checkouts_left == 2
337-
) # Checkouts left should remain unchanged.
406+
f"License {missing_license.identifier} is missing from feed so set to be unavailable!"
407+
in caplog.text
408+
)
409+
# License D is in the feed but has run out of checkouts and it's status remains unavailable
410+
assert expired_license.status == LicenseStatus.unavailable
411+
assert expired_license.is_missing is False
412+
assert (
413+
expired_license.last_checked is not None
414+
and expired_license.last_checked >= hour_ago
415+
)
416+
417+
# Second time we see this work (and pool) during the same import: Only license B is in the feed
418+
circulation_data = CirculationData(
419+
licenses=[
420+
LicenseData(
421+
identifier="B",
422+
status=LicenseStatus.available,
423+
checkouts_available=2,
424+
checkout_url="whatever",
425+
status_url="whatever",
426+
)
427+
],
428+
data_source=edition.data_source,
429+
primary_identifier=edition.primary_identifier,
430+
)
431+
432+
with caplog.at_level("WARNING"):
433+
circulation_data.apply(db.session, pool.collection)
434+
db.session.commit()
435+
436+
# License A should remain available
437+
assert license_a.status == LicenseStatus.available
438+
assert license_a.is_missing is False
439+
assert license_a.last_checked is not None and license_a.last_checked >= hour_ago
440+
# License B should now be available and not missing
441+
assert license_b.status == LicenseStatus.available
442+
assert license_b.is_missing is False # type: ignore
443+
assert license_b.last_checked is not None and license_b.last_checked >= hour_ago
444+
# License C is still missing and unavailable
445+
assert missing_license.status == LicenseStatus.unavailable
446+
assert missing_license.is_missing is True
338447
assert (
339-
updated_license.checkouts_available == 3
340-
) # Checkouts available should remain unchanged.
448+
missing_license.last_checked is not None
449+
and missing_license.last_checked >= hour_ago
450+
)
341451
assert (
342-
f"License {existing_license.identifier} has been removed from feed"
452+
f"License {missing_license.identifier} is missing from feed so set to be unavailable!"
343453
in caplog.text
344454
)
455+
# The expired license is also missing this time - it remains unavailable but since we saw it
456+
# when importing the work the first time, it remains not missing.
457+
assert expired_license.status == LicenseStatus.unavailable
458+
assert expired_license.is_missing is False
459+
assert (
460+
expired_license.last_checked is not None
461+
and expired_license.last_checked >= hour_ago
462+
)
463+
464+
# Next day during import we see the title with only license A and the expired license D - again.
465+
with freeze_time("2025-03-14T07:00:00+00:00"):
466+
circulation_data = CirculationData(
467+
licenses=[
468+
LicenseData(
469+
identifier="A",
470+
status=LicenseStatus.available,
471+
checkouts_available=3,
472+
checkout_url="whatever",
473+
status_url="whatever",
474+
),
475+
LicenseData(
476+
identifier="D",
477+
status=LicenseStatus.unavailable,
478+
checkouts_available=0,
479+
checkout_url="whatever",
480+
status_url="whatever",
481+
),
482+
],
483+
data_source=edition.data_source,
484+
primary_identifier=edition.primary_identifier,
485+
)
486+
circulation_data.apply(db.session, pool.collection)
487+
db.session.commit()
488+
489+
next_day = hour_ago + datetime.timedelta(days=1)
490+
491+
# License A is available
492+
assert license_a.status == LicenseStatus.available
493+
assert license_a.is_missing is False
494+
assert (
495+
license_a.last_checked is not None
496+
and license_a.last_checked >= next_day
497+
)
498+
# License B and missing_license are missing again
499+
assert license_b.status == LicenseStatus.unavailable
500+
assert license_b.is_missing is True
501+
assert (
502+
license_b.last_checked is not None
503+
and license_b.last_checked >= next_day
504+
)
505+
assert (
506+
f"License {license_b.identifier} is missing from feed so set to be unavailable!"
507+
in caplog.text
508+
)
509+
assert missing_license.status == LicenseStatus.unavailable
510+
assert (
511+
missing_license.last_checked is not None
512+
and missing_license.last_checked >= next_day
513+
)
514+
assert missing_license.is_missing is True
515+
assert (
516+
f"License {missing_license.identifier} is missing from feed so set to be unavailable!"
517+
in caplog.text
518+
)
519+
# The expired license is still unavailable
520+
assert expired_license.status == LicenseStatus.unavailable
521+
assert expired_license.is_missing is False
522+
assert (
523+
expired_license.last_checked is not None
524+
and expired_license.last_checked >= next_day
525+
)
345526

346527
def test_apply_with_licenses_overrides_availability(
347528
self, db: DatabaseTransactionFixture

tests/fixtures/database.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -652,6 +652,8 @@ def license(
652652
checkouts_available=None,
653653
status=LicenseStatus.available,
654654
terms_concurrency=None,
655+
is_missing=False,
656+
last_checked=None,
655657
) -> License:
656658
identifier = identifier or self.fresh_str()
657659
checkout_url = checkout_url or self.fresh_str()

0 commit comments

Comments
 (0)