Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""is_missing and last_checked to License

Revision ID: 4a14dc789118
Revises: d3aaeb6a9e6b
Create Date: 2025-03-14 07:21:18.964916+00:00

"""
import sqlalchemy as sa

from alembic import op

# revision identifiers, used by Alembic.
revision = "4a14dc789118"
down_revision = "d3aaeb6a9e6b"
branch_labels = None
depends_on = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column(
"licenses", sa.Column("last_checked", sa.DateTime(timezone=True), nullable=True)
)
op.add_column("licenses", sa.Column("is_missing", sa.Boolean(), nullable=False))
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column("licenses", "is_missing")
op.drop_column("licenses", "last_checked")
# ### end Alembic commands ###
31 changes: 24 additions & 7 deletions core/metadata_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,9 @@ def add_to_pool(self, db: Session, pool: LicensePool):
for key, value in vars(self).items():
if key != "content_types":
setattr(license_obj, key, value)
# Update the license details
license_obj.is_missing = False
license_obj.last_checked = utc_now()
return license_obj


Expand Down Expand Up @@ -1032,13 +1035,27 @@ def apply(
new_licenses = [
license.add_to_pool(_db, pool) for license in self.licenses
]
for license in old_licenses:
if license not in new_licenses:
# In case a license is removed from the feed we need to set it to be unavailable so that it's not used for loans or statistics.
license.status = LicenseStatus.unavailable
self.log.warning(
f"License {license.identifier} has been removed from feed and set to be unavailable"
)
# Exaggerate import duration a bit
import_duration = utc_now() - datetime.timedelta(hours=2)
Comment thread
natlibfi-kaisa marked this conversation as resolved.
for old_license in old_licenses:
if old_license not in new_licenses:
# A work can appear several times in the feed with a different license each time. If a license
# is seen at any time, it's details are kept as they were at that time.
if (
old_license.is_missing == False
and old_license.last_checked
and old_license.last_checked >= import_duration
):
pass
# In case a license is removed from the feed we need to set it to be unavailable so that
# it's not used for loans or statistics.
else:
old_license.is_missing = True
old_license.last_checked = utc_now()
old_license.status = LicenseStatus.unavailable
self.log.warning(
f"License {old_license.identifier} is missing from feed so set to be unavailable!"
)
changed_availability = pool.update_availability_from_licenses(
as_of=self.last_checked,
)
Expand Down
4 changes: 4 additions & 0 deletions core/model/licensing.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@ class License(Base, LicenseFunctions):
# License info document terms.concurrency field
terms_concurrency = Column(Integer)

# For tracking missing licenses in import
last_checked = Column(DateTime(timezone=True), default=None)
is_missing = Column(Boolean, default=False, nullable=False)

# A License belongs to one LicensePool.
license_pool_id = Column(Integer, ForeignKey("licensepools.id"), index=True)
license_pool: Mapped[LicensePool] = relationship(
Expand Down
219 changes: 200 additions & 19 deletions tests/core/test_circulation_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from copy import deepcopy

import pytest
from freezegun import freeze_time

from core.metadata_layer import (
CirculationData,
Expand Down Expand Up @@ -298,50 +299,230 @@ def test_apply_updates_existing_licenses(self, db: DatabaseTransactionFixture):
assert new_license.id == old_license.id
assert old_license.status == LicenseStatus.unavailable

def test_apply_updates_existing_license_when_removed_from_feed(
@freeze_time("2025-03-13T07:00:00+00:00")
def test_apply_updates_multiple_licenses_with_partial_imports(
self, db: DatabaseTransactionFixture, caplog
):
"""This test covers:
- Licenses that used to be in the pool have disappeared from the feed in which case they should be made
unavailable
- The same work can appear several times in the feed and have a different license each time.
Make sure that all existing licenses are not incorrectly set as unavailable - unless they have really
expired."""
edition, pool = db.edition(with_license_pool=True)

# Start with one license for this pool.
existing_license = db.license(
hour_ago = utc_now() - datetime.timedelta(hours=1)

# Our licensepool in the database has four licenses:
# Valid license A
license_a = db.license(
pool,
identifier="A",
expires=None,
checkouts_left=2,
checkouts_available=3,
status=LicenseStatus.available,
is_missing=None,
last_checked=None,
)
# Valid license B
license_b = db.license(
pool,
identifier="B",
expires=None,
checkouts_left=1,
checkouts_available=2,
status=LicenseStatus.available,
is_missing=None,
last_checked=None,
)
# Valid license C
missing_license = db.license(
pool,
identifier="C",
expires=None,
checkouts_left=5,
checkouts_available=5,
status=LicenseStatus.available,
is_missing=None,
last_checked=None,
)
# And license D that shows no more checkouts and an already unavailable status.
expired_license = db.license(
pool,
identifier="D",
expires=None,
checkouts_left=0,
checkouts_available=0,
status=LicenseStatus.unavailable,
is_missing=None,
last_checked=None,
)

assert isinstance(existing_license.identifier, str)
assert existing_license.checkouts_left == 2
assert existing_license.checkouts_available == 3
assert existing_license.status == LicenseStatus.available

# The feed does not include the license
# First time we see this work (and pool) during import: Only license A and D appear.
circulation_data = CirculationData(
licenses=[], # No licenses in the updated feed
licenses=[
LicenseData(
identifier="A",
status=LicenseStatus.available,
checkouts_available=3,
checkout_url="whatever",
status_url="whatever",
),
LicenseData(
identifier="D",
status=LicenseStatus.unavailable,
checkouts_available=0,
checkout_url="whatever",
status_url="whatever",
),
],
data_source=edition.data_source,
primary_identifier=edition.primary_identifier,
)
with caplog.at_level("WARNING"):
circulation_data.apply(db.session, pool.collection)
db.session.commit()

updated_license = pool.licenses[0]
assert updated_license.id == existing_license.id
# License A is available
assert license_a.status == LicenseStatus.available
assert license_a.is_missing is False
assert license_a.last_checked is not None and license_a.last_checked >= hour_ago
# License B and missing_license are missing (plus they haven't been checked before)
assert license_b.status == LicenseStatus.unavailable
assert license_b.is_missing is True
assert license_b.last_checked is not None and license_b.last_checked >= hour_ago
assert (
f"License {license_b.identifier} is missing from feed so set to be unavailable!"
in caplog.text
)
assert missing_license.status == LicenseStatus.unavailable
assert (
updated_license.status == LicenseStatus.unavailable
) # Status should have changed
missing_license.last_checked is not None
and missing_license.last_checked >= hour_ago
)
assert missing_license.is_missing is True
assert (
updated_license.checkouts_left == 2
) # Checkouts left should remain unchanged.
f"License {missing_license.identifier} is missing from feed so set to be unavailable!"
in caplog.text
)
# License D is in the feed but has run out of checkouts and it's status remains unavailable
assert expired_license.status == LicenseStatus.unavailable
assert expired_license.is_missing is False
assert (
expired_license.last_checked is not None
and expired_license.last_checked >= hour_ago
)

# Second time we see this work (and pool) during the same import: Only license B is in the feed
circulation_data = CirculationData(
licenses=[
LicenseData(
identifier="B",
status=LicenseStatus.available,
checkouts_available=2,
checkout_url="whatever",
status_url="whatever",
)
],
data_source=edition.data_source,
primary_identifier=edition.primary_identifier,
)

with caplog.at_level("WARNING"):
circulation_data.apply(db.session, pool.collection)
db.session.commit()

# License A should remain available
assert license_a.status == LicenseStatus.available
assert license_a.is_missing is False
assert license_a.last_checked is not None and license_a.last_checked >= hour_ago
# License B should now be available and not missing
assert license_b.status == LicenseStatus.available
assert license_b.is_missing is False # type: ignore
assert license_b.last_checked is not None and license_b.last_checked >= hour_ago
# License C is still missing and unavailable
assert missing_license.status == LicenseStatus.unavailable
assert missing_license.is_missing is True
assert (
updated_license.checkouts_available == 3
) # Checkouts available should remain unchanged.
missing_license.last_checked is not None
and missing_license.last_checked >= hour_ago
)
assert (
f"License {existing_license.identifier} has been removed from feed"
f"License {missing_license.identifier} is missing from feed so set to be unavailable!"
in caplog.text
)
# The expired license is also missing this time - it remains unavailable but since we saw it
# when importing the work the first time, it remains not missing.
assert expired_license.status == LicenseStatus.unavailable
assert expired_license.is_missing is False
assert (
expired_license.last_checked is not None
and expired_license.last_checked >= hour_ago
)

# Next day during import we see the title with only license A and the expired license D - again.
with freeze_time("2025-03-14T07:00:00+00:00"):
circulation_data = CirculationData(
licenses=[
LicenseData(
identifier="A",
status=LicenseStatus.available,
checkouts_available=3,
checkout_url="whatever",
status_url="whatever",
),
LicenseData(
identifier="D",
status=LicenseStatus.unavailable,
checkouts_available=0,
checkout_url="whatever",
status_url="whatever",
),
],
data_source=edition.data_source,
primary_identifier=edition.primary_identifier,
)
circulation_data.apply(db.session, pool.collection)
db.session.commit()

next_day = hour_ago + datetime.timedelta(days=1)

# License A is available
assert license_a.status == LicenseStatus.available
assert license_a.is_missing is False
assert (
license_a.last_checked is not None
and license_a.last_checked >= next_day
)
# License B and missing_license are missing again
assert license_b.status == LicenseStatus.unavailable
assert license_b.is_missing is True
assert (
license_b.last_checked is not None
and license_b.last_checked >= next_day
)
assert (
f"License {license_b.identifier} is missing from feed so set to be unavailable!"
in caplog.text
)
assert missing_license.status == LicenseStatus.unavailable
assert (
missing_license.last_checked is not None
and missing_license.last_checked >= next_day
)
assert missing_license.is_missing is True
assert (
f"License {missing_license.identifier} is missing from feed so set to be unavailable!"
in caplog.text
)
# The expired license is still unavailable
assert expired_license.status == LicenseStatus.unavailable
assert expired_license.is_missing is False
assert (
expired_license.last_checked is not None
and expired_license.last_checked >= next_day
)

def test_apply_with_licenses_overrides_availability(
self, db: DatabaseTransactionFixture
Expand Down
2 changes: 2 additions & 0 deletions tests/fixtures/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,8 @@ def license(
checkouts_available=None,
status=LicenseStatus.available,
terms_concurrency=None,
is_missing=False,
last_checked=None,
) -> License:
identifier = identifier or self.fresh_str()
checkout_url = checkout_url or self.fresh_str()
Expand Down