diff --git a/invenio_app_ils/circulation/indexer.py b/invenio_app_ils/circulation/indexer.py index 63c0cac84..83fb39afb 100644 --- a/invenio_app_ils/circulation/indexer.py +++ b/invenio_app_ils/circulation/indexer.py @@ -15,6 +15,7 @@ from invenio_circulation.proxies import current_circulation from invenio_indexer.api import RecordIndexer from invenio_pidstore.errors import PIDDeletedError +from invenio_search import current_search_client from invenio_app_ils.circulation.utils import resolve_item_from_loan from invenio_app_ils.documents.api import DOCUMENT_PID_TYPE @@ -97,3 +98,75 @@ def index_extra_fields_for_loan(loan_dict): can_circulate_items_count = document["circulation"]["can_circulate_items_count"] loan_dict["can_circulate_items_count"] = can_circulate_items_count + + +def index_stats_fields_for_loan(loan_dict): + """Indexer hook to modify the loan record dict before indexing""" + + creation_date = datetime.fromisoformat(loan_dict["_created"]).date() + start_date = ( + datetime.fromisoformat(loan_dict["start_date"]).date() + if loan_dict.get("start_date") + else None + ) + end_date = ( + datetime.fromisoformat(loan_dict["end_date"]).date() + if loan_dict.get("end_date") + else None + ) + + # Collect extra information relevant for stats + stats = {} + + # Time ranges in days + if start_date and end_date: + loan_duration = (end_date - start_date).days + stats["loan_duration"] = loan_duration + + if creation_date and start_date: + waiting_time = (start_date - creation_date).days + stats["waiting_time"] = waiting_time if waiting_time >= 0 else None + + # Document availability during loan request + stat_events_index_name = "events-stats-loan-transitions" + if current_search_client.indices.exists(index=stat_events_index_name): + loan_pid = loan_dict["pid"] + search_body = { + "query": { + "bool": { + "must": [ + {"term": {"trigger": "request"}}, + {"term": {"pid_value": loan_pid}}, + ], + } + }, + } + + search_result = current_search_client.search( + index=stat_events_index_name, body=search_body + ) + hits = search_result["hits"]["hits"] + if len(hits) == 1: + request_transition_event = hits[0]["_source"] + available_items_during_request_count = request_transition_event[ + "extra_data" + ]["available_items_during_request_count"] + stats["available_items_during_request"] = ( + available_items_during_request_count > 0 + ) + elif len(hits) > 1: + raise ValueError( + f"Multiple request transition events for loan {loan_pid}." + "Expected zero or one." + ) + else: + current_app.logger.error( + "Stats events index '{stat_events_index_name}' does not exist. " + "This is normal during initial setup or if no events have been processed yet. " + "No data is lost, as soon as the events are processed, " \ + "the loan wil lbe reindex and the the stat will be available." + ) + + if not "extra_data" in loan_dict: + loan_dict["extra_data"] = {} + loan_dict["extra_data"]["stats"] = stats diff --git a/invenio_app_ils/circulation/stats/api.py b/invenio_app_ils/circulation/stats/api.py index e8cc9dafd..a992fe656 100644 --- a/invenio_app_ils/circulation/stats/api.py +++ b/invenio_app_ils/circulation/stats/api.py @@ -1,13 +1,18 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2019 CERN. +# Copyright (C) 2019-2025 CERN. # # invenio-app-ils is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. """APIs for ILS circulation statistics.""" +from invenio_search.engine import dsl + from invenio_app_ils.circulation.search import get_most_loaned_documents +from invenio_app_ils.circulation.stats.schemas import ( + _OS_NATIVE_AGGREGATE_FUNCTION_TYPES, +) from invenio_app_ils.proxies import current_app_ils @@ -49,3 +54,98 @@ def fetch_most_loaned_documents(from_date, to_date, bucket_size): ) return res + + +def _generate_metric_agg_field_name(metric): + """Return the aggregation name used for a metric. + + :param metric: Must include 'field' and 'aggregation' keys. + :returns: The aggregation field name in the form '_'. + """ + + return f"{metric['aggregation']}__{metric['field']}" + + +def get_loan_statistics(date_fields, search, requested_group_by, requested_metrics): + """Aggregate loan statistics for requested metrics. + + :param date_fields: List of date fields for the record type. + Date fields require different handling when using them to group by. + :param search: The base search object to apply aggregations on + :param requested_group_by: List of group dictionaries with 'field' and optional 'interval' keys. + Example: [{"field": "start_date", "interval": "monthly"}, {"field": "state"}] + :param requested_metrics: List of metric dictionaries with 'field' and 'aggregation' keys. + Example: [{"field": "loan_duration", "aggregation": "avg"}] + :returns: OpenSearch aggregation results with multi-terms histogram and optional metrics + """ + + # Build composite aggregation + sources = [] + for grouping in requested_group_by: + grouping_field = grouping["field"] + + if grouping_field in date_fields: + sources.append( + { + grouping_field: { + "date_histogram": { + "field": grouping_field, + "calendar_interval": grouping["interval"], + "format": "yyyy-MM-dd", + } + } + } + ) + else: + sources.append({grouping_field: {"terms": {"field": grouping_field}}}) + + composite_agg = dsl.A("composite", sources=sources, size=1000) + + for metric in requested_metrics: + agg_name = _generate_metric_agg_field_name(metric) + + grouping_field = metric["field"] + agg_type = metric["aggregation"] + field_config = {"field": grouping_field} + if agg_type in _OS_NATIVE_AGGREGATE_FUNCTION_TYPES: + composite_agg = composite_agg.metric( + agg_name, dsl.A(agg_type, **field_config) + ) + elif agg_type == "median": + composite_agg = composite_agg.metric( + agg_name, dsl.A("percentiles", percents=[50], **field_config) + ) + + search.aggs.bucket("loan_aggregations", composite_agg) + + # Only retrieve aggregation results + search = search[:0] + result = search.execute() + + # Parse aggregation results + buckets = [] + if hasattr(result.aggregations, "loan_aggregations"): + for bucket in result.aggregations.loan_aggregations.buckets: + metrics_data = {} + for metric in requested_metrics: + agg_name = _generate_metric_agg_field_name(metric) + + if hasattr(bucket, agg_name): + agg_result = getattr(bucket, agg_name) + agg_type = metric["aggregation"] + + if agg_type in _OS_NATIVE_AGGREGATE_FUNCTION_TYPES: + metrics_data[agg_name] = agg_result.value + elif agg_type == "median": + median_value = agg_result.values.get("50.0") + metrics_data[agg_name] = median_value + + bucket_data = { + "key": bucket.key.to_dict(), + "doc_count": bucket.doc_count, + "metrics": metrics_data, + } + + buckets.append(bucket_data) + + return buckets diff --git a/invenio_app_ils/circulation/stats/schemas.py b/invenio_app_ils/circulation/stats/schemas.py new file mode 100644 index 000000000..be9b41d9f --- /dev/null +++ b/invenio_app_ils/circulation/stats/schemas.py @@ -0,0 +1,110 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2025 CERN. +# +# invenio-app-ils is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +"""Marshmallow schemas for loan statistics validation.""" + +import json +import re + +from marshmallow import ( + Schema, + ValidationError, + fields, + pre_load, + validate, + validates_schema, +) + +from invenio_app_ils.errors import InvalidParameterError + +_OS_VALID_FIELD_NAME_PATTERN = re.compile(r"^[A-Za-z0-9_.]+$") +_OS_NATIVE_AGGREGATE_FUNCTION_TYPES = {"avg", "sum", "min", "max"} +_VALID_AGGREGATE_FUNCTION_TYPES = _OS_NATIVE_AGGREGATE_FUNCTION_TYPES.union({"median"}) +_VALID_DATE_INTERVALS = {"1d", "1w", "1M", "1q", "1y"} + + +def validate_field_name(field_name): + """Validate a field name for search to prevent injection attacks. + + :param field_name: The field name to validate + :raises InvalidParameterError: If field name is invalid or potentially malicious + """ + if not _OS_VALID_FIELD_NAME_PATTERN.match(field_name): + raise InvalidParameterError( + description=( + f"Invalid field name '{field_name}'. " + "Field names may contain only alphanumeric characters, underscores, " + "and dots." + ) + ) + + +class SecureFieldNameField(fields.String): + """Marshmallow field that validates field names to prevent injection attacks.""" + + def _deserialize(self, value, attr, data, **kwargs): + """Deserialize and validate field name.""" + + field_name = super()._deserialize(value, attr, data, **kwargs) + validate_field_name(field_name) + return field_name + + +class GroupByItemSchema(Schema): + field = SecureFieldNameField(required=True) + interval = fields.String(validate=validate.OneOf(_VALID_DATE_INTERVALS)) + + @validates_schema + def validate_date_fields(self, data, **kwargs): + """Validate that date fields have an interval and non-date fields do not.""" + + date_fields = self.context["date_fields"] + field = data.get("field") + interval = data.get("interval") + if field in date_fields and not interval: + raise ValidationError( + {"interval": ["Interval is required for date fields."]} + ) + if field not in date_fields and interval is not None: + raise ValidationError( + {"interval": ["Interval must not be provided for non-date fields."]} + ) + + +class MetricItemSchema(Schema): + """Schema for validating a single metric item.""" + + field = SecureFieldNameField(required=True) + aggregation = fields.String( + required=True, validate=validate.OneOf(_VALID_AGGREGATE_FUNCTION_TYPES) + ) + + +class HistogramParamsSchema(Schema): + """Schema for validating the query string parameters for the histogram endpoint""" + + metrics = fields.List(fields.Nested(MetricItemSchema), required=False) + group_by = fields.List( + fields.Nested(GroupByItemSchema), required=True, validate=validate.Length(min=1) + ) + q = fields.String() + + def __init__(self, date_fields, *args, **kwargs): + super().__init__(*args, **kwargs) + self.context = {"date_fields": set(date_fields)} + + @pre_load + def parse_query_string(self, data, **kwargs): + """Parse the metrics and group_by parameters from JSON strings.""" + + try: + for key in ("metrics", "group_by"): + # default value as the field "metrics" is not required + data[key] = json.loads(data.get(key, "[]")) + except Exception as e: + raise ValidationError from e + return data diff --git a/invenio_app_ils/circulation/stats/serializers/__init__.py b/invenio_app_ils/circulation/stats/serializers/__init__.py new file mode 100644 index 000000000..2829d8f7e --- /dev/null +++ b/invenio_app_ils/circulation/stats/serializers/__init__.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Invenio. +# Copyright (C) 2025-2025 CERN. +# +# Invenio is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + + +from invenio_app_ils.circulation.stats.serializers.response import loan_stats_responsify +from invenio_app_ils.circulation.stats.serializers.schema import HistogramStatsV1 + +loan_stats_response = loan_stats_responsify(HistogramStatsV1, "application/json") diff --git a/invenio_app_ils/circulation/stats/serializers/response.py b/invenio_app_ils/circulation/stats/serializers/response.py new file mode 100644 index 000000000..c19b75af6 --- /dev/null +++ b/invenio_app_ils/circulation/stats/serializers/response.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Invenio. +# Copyright (C) 2025-2025 CERN. +# +# Invenio is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +"""Invenio App ILS loan stats response serializers.""" + +import json + +from flask import current_app + + +def loan_stats_responsify(schema_class, mimetype): + """Loan stats response serializer. + + :param schema_class: Schema instance. + :param mimetype: MIME type of response. + """ + + def view(data, code=200, headers=None): + """Generate the response object.""" + # return jsonify(data), code + response_data = schema_class().dump(data) + + response = current_app.response_class( + json.dumps(response_data), mimetype=mimetype + ) + response.status_code = code + + if headers is not None: + response.headers.extend(headers) + return response + + return view diff --git a/invenio_app_ils/circulation/stats/serializers/schema.py b/invenio_app_ils/circulation/stats/serializers/schema.py new file mode 100644 index 000000000..574210ee7 --- /dev/null +++ b/invenio_app_ils/circulation/stats/serializers/schema.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Invenio. +# Copyright (C) 2025-2025 CERN. +# +# Invenio is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +"""Invenio App ILS loan stats serializers schema.""" + +from marshmallow import Schema, fields + + +class BucketSchema(Schema): + """Schema for a single histogram bucket.""" + + doc_count = fields.Int(required=True) + key = fields.Dict(keys=fields.String(), values=fields.String()) + + metrics = fields.Dict( + keys=fields.String(), + values=fields.Float(), + ) + + +class HistogramStatsV1(Schema): + """Schema for a stats histogram response.""" + + buckets = fields.List( + fields.Nested(BucketSchema), + required=True, + description="Statistics buckets.", + ) diff --git a/invenio_app_ils/circulation/stats/views.py b/invenio_app_ils/circulation/stats/views.py index 386f27ee0..33d536ccc 100644 --- a/invenio_app_ils/circulation/stats/views.py +++ b/invenio_app_ils/circulation/stats/views.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2019 CERN. +# Copyright (C) 2019-2025 CERN. # # invenio-app-ils is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. @@ -10,11 +10,21 @@ from datetime import datetime from flask import Blueprint, current_app, request +from invenio_circulation.pidstore.pids import CIRCULATION_LOAN_PID_TYPE +from invenio_circulation.proxies import current_circulation from invenio_pidstore import current_pidstore +from invenio_records_rest.query import default_search_factory from invenio_records_rest.utils import obj_or_import_string from invenio_rest import ContentNegotiatedMethodView - -from invenio_app_ils.circulation.stats.api import fetch_most_loaned_documents +from marshmallow.exceptions import ValidationError + +from invenio_app_ils.circulation.stats.api import ( + fetch_most_loaned_documents, + get_loan_statistics, +) +from invenio_app_ils.circulation.stats.schemas import HistogramParamsSchema +from invenio_app_ils.circulation.stats.serializers import loan_stats_response +from invenio_app_ils.circulation.views import IlsCirculationResource from invenio_app_ils.config import RECORDS_REST_MAX_RESULT_WINDOW from invenio_app_ils.documents.api import DOCUMENT_PID_FETCHER, DOCUMENT_PID_TYPE from invenio_app_ils.errors import InvalidParameterError @@ -46,11 +56,33 @@ def create_most_loaned_documents_view(blueprint, app): ) +def create_loan_histogram_view(blueprint, app): + """Add url rule for loan histogram view.""" + + endpoints = app.config.get("RECORDS_REST_ENDPOINTS") + document_endpoint = endpoints.get(CIRCULATION_LOAN_PID_TYPE) + default_media_type = document_endpoint.get("default_media_type") + loan_stats_serializers = {"application/json": loan_stats_response} + + loan_stats_view_func = LoanHistogramResource.as_view( + LoanHistogramResource.view_name, + serializers=loan_stats_serializers, + default_media_type=default_media_type, + ctx={}, + ) + blueprint.add_url_rule( + "/circulation/loans/stats", + view_func=loan_stats_view_func, + methods=["GET"], + ) + + def create_circulation_stats_blueprint(app): """Add statistics views to the blueprint.""" blueprint = Blueprint("invenio_app_ils_circulation_stats", __name__, url_prefix="") create_most_loaned_documents_view(blueprint, app) + create_loan_histogram_view(blueprint, app) return blueprint @@ -131,3 +163,42 @@ def get(self, *args, **kwargs): pid_fetcher=current_pidstore.fetchers[DOCUMENT_PID_FETCHER], search_result=most_loaned_documents, ) + + +class LoanHistogramResource(IlsCirculationResource): + """Loan stats resource.""" + + view_name = "loan_histogram" + + @need_permissions("stats-loans") + def get(self, **kwargs): + """Get loan statistics.""" + + loan_cls = current_circulation.loan_record_cls + loan_date_fields = ( + loan_cls.DATE_FIELDS + loan_cls.DATETIME_FIELDS + ["_created"] + ) + + schema = HistogramParamsSchema(loan_date_fields) + try: + parsed_args = schema.load(request.args.to_dict()) + except ValidationError as e: + raise InvalidParameterError(description=e.messages) from e + + # Construct search to allow for filtering with the q parameter + search_cls = current_circulation.loan_search_cls + search = search_cls() + search, _ = default_search_factory(self, search) + + aggregation_buckets = get_loan_statistics( + loan_date_fields, + search, + parsed_args["group_by"], + parsed_args["metrics"], + ) + + response = { + "buckets": aggregation_buckets, + } + + return self.make_response(response, 200) diff --git a/invenio_app_ils/config.py b/invenio_app_ils/config.py index dd6e11cf2..7d6c9808b 100644 --- a/invenio_app_ils/config.py +++ b/invenio_app_ils/config.py @@ -43,6 +43,10 @@ from invenio_app_ils.patrons.indexer import PatronIndexer from invenio_app_ils.series.indexer import SeriesIndexer from invenio_app_ils.stats.event_builders import ils_record_changed_event_builder +from invenio_app_ils.stats.processors import ( + LoansEventsIndexer, + filter_extend_transitions, +) from invenio_app_ils.vocabularies.indexer import VocabularyIndexer from .document_requests.api import ( @@ -244,13 +248,21 @@ def _(x): "ils-record-changes-updates", "ils-record-changes-insertions", "ils-record-changes-deletions", + "loan-transitions", ) ], }, "stats-aggregate-events": { "task": "invenio_stats.tasks.aggregate_events", "schedule": timedelta(hours=3), - "args": [("record-view-agg", "file-download-agg", "ils-record-changes-agg")], + "args": [ + ( + "record-view-agg", + "file-download-agg", + "ils-record-changes-agg", + "loan-transitions-agg", + ) + ], }, "clean_locations_past_closures_exceptions": { "task": ( @@ -983,6 +995,26 @@ def _(x): "suffix": "%Y", }, }, + # The following events are used to track loan state transitions and store additional data. + # Only the "extend" transition will be aggregated and used in the way intended by invenio-stats. + # Other transitions, e.g. "request", are used to store additional information, + # like the number of available items when a loan is requested. + # The loan indexer then later queries those events and adds the information to the loan. + "loan-transitions": { + "signal": "invenio_circulation.signals.loan_state_changed", + "templates": "invenio_app_ils.stats.templates.events.loan_transitions", + "event_builders": [ + "invenio_app_ils.stats.event_builders.loan_transition_event_builder", + ], + "cls": LoansEventsIndexer, + "params": { + "preprocessors": [ + "invenio_app_ils.stats.processors.add_loan_transition_unique_id", + ], + "double_click_window": 0, + "suffix": "%Y", + }, + }, } STATS_AGGREGATIONS = { @@ -1042,6 +1074,20 @@ def _(x): query_modifiers=[], ), ), + "loan-transitions-agg": dict( + templates="invenio_app_ils.stats.templates.aggregations.loan_transitions", + cls=StatAggregator, + params=dict( + event="loan-transitions", + field="trigger", + interval="day", + index_interval="year", + copy_fields=dict(), + metric_fields=dict(), + # We only track extension transitions + query_modifiers=[filter_extend_transitions], + ), + ), } STATS_QUERIES = { @@ -1116,6 +1162,18 @@ def _(x): aggregated_fields=["user_id"], ), ), + "loan-extensions": dict( + cls=DateHistogramQuery, + permission_factory=backoffice_read_permission, + params=dict( + index="stats-loan-transitions", + copy_fields=dict(), + required_filters=dict(), + metric_fields=dict( + count=("sum", "count", {}), + ), + ), + ), } # List of available vocabularies diff --git a/invenio_app_ils/ext.py b/invenio_app_ils/ext.py index 58b4bd589..6f5385112 100644 --- a/invenio_app_ils/ext.py +++ b/invenio_app_ils/ext.py @@ -20,10 +20,13 @@ ) from .circulation import config as circulation_config -from .circulation.indexer import index_extra_fields_for_loan +from .circulation.indexer import ( + index_extra_fields_for_loan, + index_stats_fields_for_loan, +) from .circulation.receivers import register_circulation_signals from .document_requests.api import DOCUMENT_REQUEST_PID_TYPE -from .documents.api import DOCUMENT_PID_TYPE, Document +from .documents.api import DOCUMENT_PID_TYPE from .eitems.api import EITEM_PID_TYPE from .files.receivers import register_files_signals from .internal_locations.api import INTERNAL_LOCATION_PID_TYPE @@ -327,3 +330,4 @@ def before_loan_index_hook(sender, json=None, record=None, index=None, **kwargs) :param kwargs: Any other parameters. """ index_extra_fields_for_loan(json) + index_stats_fields_for_loan(json) diff --git a/invenio_app_ils/permissions.py b/invenio_app_ils/permissions.py index c0b69f987..8d46004d3 100644 --- a/invenio_app_ils/permissions.py +++ b/invenio_app_ils/permissions.py @@ -205,6 +205,7 @@ def __init__(self, record): ] _is_backoffice_read_permission = [ "stats-most-loaned", + "stats-loans", "get-notifications-sent-to-patron", ] _is_patron_owner_permission = [ diff --git a/invenio_app_ils/stats/event_builders.py b/invenio_app_ils/stats/event_builders.py index 5aebb5170..fc98f58e6 100644 --- a/invenio_app_ils/stats/event_builders.py +++ b/invenio_app_ils/stats/event_builders.py @@ -14,6 +14,7 @@ from flask_login import current_user from invenio_app_ils.permissions import backoffice_permission +from invenio_app_ils.proxies import current_app_ils from invenio_app_ils.records.api import IlsRecord @@ -71,3 +72,51 @@ def add_record_pid_to_event(event, sender_app, record=None, **kwargs): event.update({"pid_value": record.get("pid")}) return event + + +def loan_transition_event_builder( + event, + sender_app, + transition=None, + initial_loan=None, + loan=None, + trigger=None, + **kwargs +): + """Build an event for a loan state transition.""" + event.update( + { + "timestamp": datetime.datetime.now(datetime.timezone.utc) + .replace(tzinfo=None) + .isoformat(), + "trigger": trigger, + "pid_value": loan["pid"], + } + ) + + if trigger == "request": + # Store how many items were available during request. + # This information is used by the loan indexer and added to the loan. + document_pid = loan["document_pid"] + document_class = current_app_ils.document_record_cls + document = document_class.get_record_by_pid(document_pid) + document_dict = document.replace_refs() + + available_items_during_request_count = document_dict["circulation"][ + "available_items_for_loan_count" + ] + + event.update( + { + "extra_data": { + "available_items_during_request_count": available_items_during_request_count + }, + } + ) + elif trigger == "extend": + # Extensions are aggregated by invenio-stats and no extra information is required + pass + else: + return None + + return event diff --git a/invenio_app_ils/stats/processors.py b/invenio_app_ils/stats/processors.py index d75dfb123..94d99e3b6 100644 --- a/invenio_app_ils/stats/processors.py +++ b/invenio_app_ils/stats/processors.py @@ -8,6 +8,12 @@ """ILS stats preprocessors.""" +from invenio_circulation.proxies import current_circulation +from invenio_search.engine import search +from invenio_stats.processors import EventsIndexer + +from invenio_app_ils.indexer import wait_es_refresh + def add_record_change_ids(doc): """Add unique_id and aggregation_id to the doc.""" @@ -28,3 +34,59 @@ def add_record_change_ids(doc): doc["unique_id"] += f"__{doc['user_id']}" return doc + + +def add_loan_transition_unique_id(doc): + """Add unique_id to the doc for a loan transition event.""" + + doc["unique_id"] = f"{doc['pid_value']}__{doc['trigger']}" + + return doc + + +def filter_extend_transitions(query): + """Filter for extend transitions only""" + + return query.filter("term", trigger="extend") + + +class LoansEventsIndexer(EventsIndexer): + """Events indexer for events related to loans. + + Triggers a reindex on affected loans + """ + + def run(self): + """Process events queue and reindex affected loans. + + First index invenio-stats events that are related to loans. + Afterwards trigger a reindex of the loans for which an event occurred. + The loan indexer can then consume the updated invenio-stats events index. + This reindex is triggered so the loan index has up-to-date information. + + Example: + When a loan is requested, an event is placed in the queue that stores the + loan PID and how many items were available at the time of the request. + When the event is indexed with this class, it is moved from the queue into + the events index. Afterwards, the loan is reindexed and, during this + process, the loan indexer gets the state of the document from the events index. + """ + + # Collect all loan events that occurred from the queue and index them + actions = [action for action in self.actionsiter()] + res = search.helpers.bulk(self.client, actions, stats_only=True, chunk_size=50) + + # Refresh changed event indices so new entries are immediately available + indices = {action["_index"] for action in actions} + for index in indices: + wait_es_refresh(index) + + # Reindex loans that had events to ensure their index contains the most recent information + loan_pids = {action["_source"]["pid_value"] for action in actions} + loan_indexer = current_circulation.loan_indexer() + loan_cls = current_circulation.loan_record_cls + for loan_pid in loan_pids: + loan = loan_cls.get_record_by_pid(loan_pid) + loan_indexer.index(loan) + + return res diff --git a/invenio_app_ils/stats/templates/aggregations/loan_transitions/__init__.py b/invenio_app_ils/stats/templates/aggregations/loan_transitions/__init__.py new file mode 100644 index 000000000..5a8c3ca87 --- /dev/null +++ b/invenio_app_ils/stats/templates/aggregations/loan_transitions/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2025 CERN. +# +# invenio-app-ils is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +"""Loan transitions aggregation templates.""" diff --git a/invenio_app_ils/stats/templates/aggregations/loan_transitions/os-v2/__init__.py b/invenio_app_ils/stats/templates/aggregations/loan_transitions/os-v2/__init__.py new file mode 100644 index 000000000..5a8c3ca87 --- /dev/null +++ b/invenio_app_ils/stats/templates/aggregations/loan_transitions/os-v2/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2025 CERN. +# +# invenio-app-ils is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +"""Loan transitions aggregation templates.""" diff --git a/invenio_app_ils/stats/templates/aggregations/loan_transitions/os-v2/aggr-loan-transitions-v1.json b/invenio_app_ils/stats/templates/aggregations/loan_transitions/os-v2/aggr-loan-transitions-v1.json new file mode 100644 index 000000000..138c541ee --- /dev/null +++ b/invenio_app_ils/stats/templates/aggregations/loan_transitions/os-v2/aggr-loan-transitions-v1.json @@ -0,0 +1,32 @@ +{ + "index_patterns": [ + "__SEARCH_INDEX_PREFIX__stats-loan-transitions-*" + ], + "settings": { + "index": { + "refresh_interval": "5s" + } + }, + "mappings": { + "date_detection": false, + "dynamic": false, + "numeric_detection": false, + "properties": { + "timestamp": { + "type": "date" + }, + "updated_timestamp": { + "type": "date" + }, + "trigger": { + "type": "keyword" + }, + "count": { + "type": "integer" + } + } + }, + "aliases": { + "__SEARCH_INDEX_PREFIX__stats-loan-transitions": {} + } +} diff --git a/invenio_app_ils/stats/templates/events/loan_transitions/__init__.py b/invenio_app_ils/stats/templates/events/loan_transitions/__init__.py new file mode 100644 index 000000000..ab8e740d7 --- /dev/null +++ b/invenio_app_ils/stats/templates/events/loan_transitions/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2025 CERN. +# +# invenio-app-ils is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +"""Loan transitions events templates.""" diff --git a/invenio_app_ils/stats/templates/events/loan_transitions/os-v2/__init__.py b/invenio_app_ils/stats/templates/events/loan_transitions/os-v2/__init__.py new file mode 100644 index 000000000..ab8e740d7 --- /dev/null +++ b/invenio_app_ils/stats/templates/events/loan_transitions/os-v2/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2025 CERN. +# +# invenio-app-ils is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +"""Loan transitions events templates.""" diff --git a/invenio_app_ils/stats/templates/events/loan_transitions/os-v2/loan-transitions-v1.json b/invenio_app_ils/stats/templates/events/loan_transitions/os-v2/loan-transitions-v1.json new file mode 100644 index 000000000..7eda6165f --- /dev/null +++ b/invenio_app_ils/stats/templates/events/loan_transitions/os-v2/loan-transitions-v1.json @@ -0,0 +1,40 @@ +{ + "index_patterns": [ + "__SEARCH_INDEX_PREFIX__events-stats-loan-transitions-*" + ], + "settings": { + "index": { + "refresh_interval": "5s" + } + }, + "mappings": { + "date_detection": false, + "dynamic": false, + "numeric_detection": false, + "properties": { + "timestamp": { + "type": "date" + }, + "updated_timestamp": { + "type": "date" + }, + "unique_id": { + "type": "keyword" + }, + "trigger": { + "type": "keyword" + }, + "pid_value": { + "type": "keyword" + }, + "extra_data": { + "type": "object", + "dynamic": true, + "enabled": true + } + } + }, + "aliases": { + "__SEARCH_INDEX_PREFIX__events-stats-loan-transitions": {} + } +} diff --git a/setup.cfg b/setup.cfg index 91f9192a6..0f4d7216c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -53,7 +53,7 @@ install_requires = invenio-files-rest>=3.2.0,<4.0.0 invenio-banners>=5.0.0,<6.0.0 invenio-pages>=7.1.0,<8.0.0 - invenio-circulation>=3.0.0a1,<4.0.0 + invenio-circulation>=4.0.0,<5.0.0 invenio-opendefinition>=2.0.0a2,<3.0.0 invenio-pidrelations>=1.0.0,<2.0.0 invenio-stats>=5.1.1,<6.0.0 diff --git a/tests/api/circulation/test_loan_extend.py b/tests/api/circulation/test_loan_extend.py index 9e91403ea..146ca7b3d 100644 --- a/tests/api/circulation/test_loan_extend.py +++ b/tests/api/circulation/test_loan_extend.py @@ -12,7 +12,6 @@ from datetime import timedelta import arrow -from flask import url_for from invenio_circulation.api import Loan from invenio_db import db @@ -21,34 +20,24 @@ from tests.helpers import user_login -def _checkout_loan_pid1(client, json_headers, users, loan_params): +def _checkout_loan_pid1(loan_params, checkout_loan): """Create an ongoing loan.""" - def _checkout(loan_pid, params): - """Perform checkout action.""" - user_login(client, "librarian", users) - checkout_url = url_for( - "invenio_circulation_loan_actions.loanid_actions", - pid_value=loan_pid, - action="checkout", - ) - resp = client.post(checkout_url, headers=json_headers, data=json.dumps(params)) - assert resp.status_code == 202 - return resp.get_json() - loan_pid = "loanid-1" params = deepcopy(loan_params) params["document_pid"] = "docid-1" params["item_pid"]["value"] = "itemid-2" - return _checkout(loan_pid, params) + return checkout_loan(loan_pid, params) -def test_loan_extend_permissions(client, json_headers, users, testdata, loan_params): +def test_loan_extend_permissions( + client, json_headers, users, testdata, loan_params, checkout_loan +): """Test loan can be extended.""" params = deepcopy(loan_params) del params["transaction_date"] - loan = _checkout_loan_pid1(client, json_headers, users, params) + loan = _checkout_loan_pid1(params, checkout_loan) tests = [ ("admin", 202), @@ -71,12 +60,12 @@ def test_loan_extend_permissions(client, json_headers, users, testdata, loan_par def test_loan_extension_end_date( - app, client, json_headers, users, testdata, loan_params + app, client, json_headers, users, testdata, loan_params, checkout_loan ): """Test loan end date after extension.""" params = deepcopy(loan_params) del params["transaction_date"] - record = _checkout_loan_pid1(client, json_headers, users, params) + record = _checkout_loan_pid1(params, checkout_loan) extend_url = record["links"]["actions"]["extend"] loan = record["metadata"] diff --git a/tests/api/conftest.py b/tests/api/conftest.py index e936a3e4e..731493702 100644 --- a/tests/api/conftest.py +++ b/tests/api/conftest.py @@ -7,9 +7,11 @@ """Pytest fixtures and plugins for the API application.""" +import json import tempfile import pytest +from flask import url_for from invenio_app.factory import create_api from invenio_circulation.api import Loan from invenio_circulation.pidstore.pids import CIRCULATION_LOAN_PID_TYPE @@ -37,6 +39,7 @@ internal_location_ref_builder, load_json_from_datadir, mint_record_pid, + user_login, ) @@ -181,6 +184,23 @@ def testdata_most_loaned(db, testdata): } +@pytest.fixture() +def testdata_loan_histogram(db, testdata): + """Create, index and return test data for loans histogram.""" + loans_histogram = load_json_from_datadir("loans_histogram.json") + recs = _create_records(db, loans_histogram, Loan, CIRCULATION_LOAN_PID_TYPE) + + ri = RecordIndexer() + for rec in recs: + ri.index(rec) + + current_search.flush_and_refresh(index="loans") + + testdata["loans_histogram"] = loans_histogram + + return testdata + + @pytest.fixture() def item_record(app): """Fixture to return an Item payload.""" @@ -211,6 +231,24 @@ def loan_params(): ) +@pytest.fixture +def checkout_loan(client, json_headers, users): + """Perform loan checkout action.""" + + def checkout(loan_pid, params): + user_login(client, "librarian", users) + checkout_url = url_for( + "invenio_circulation_loan_actions.loanid_actions", + pid_value=loan_pid, + action="checkout", + ) + resp = client.post(checkout_url, headers=json_headers, data=json.dumps(params)) + assert resp.status_code == 202 + return resp.get_json() + + return checkout + + @pytest.fixture() def bucket(bucket_from_dir): """Create temporary bucket fixture.""" diff --git a/tests/api/ils/stats/helpers.py b/tests/api/ils/stats/helpers.py index c6667b1b3..44e078065 100644 --- a/tests/api/ils/stats/helpers.py +++ b/tests/api/ils/stats/helpers.py @@ -24,7 +24,12 @@ def process_and_aggregate_stats(event_types=None, aggregation_types=None): def query_stats(client, stat, params): - """Query stats via the HTTP API.""" + """Query stats via the HTTP API. + + :param client: Flask test client. + :param stat: The stat to query. + :param params: The parameters for the stat query. + """ query = { "queried_stat": { @@ -43,8 +48,46 @@ def query_stats(client, stat, params): def extract_buckets_from_stats_query(response): - """Extract buckets from the stats query response.""" + """Extract buckets from the stats query response. + + :param response: The HTTP response from the query_stats function. + """ data = json.loads(response.data) buckets = data.get("queried_stat").get("buckets", []) return buckets + + +def query_histogram(client, url, group_by, metrics=None, q=None): + """Query a histogram endpoint via the HTTP API. + + :param client: Flask test client. + :param url: The histogram endpoint URL. + :param group_by: List of dicts defining the grouping fields. + :param metrics: List of dicts defining the aggregation metrics. + :param q: The search query. + """ + + params = { + "group_by": json.dumps(group_by), + } + if metrics: + params["metrics"] = json.dumps(metrics) + if q: + params["q"] = q + + response = client.get( + url, + query_string=params, + ) + + return response + + +def extract_buckets_from_histogram(response): + """Extract buckets from the histogram response. + + :param response: The HTTP response from the histogram endpoint. + """ + data = json.loads(response.data) + return data.get("buckets", []) diff --git a/tests/api/ils/stats/test_loan_extension_stats.py b/tests/api/ils/stats/test_loan_extension_stats.py new file mode 100644 index 000000000..88d690384 --- /dev/null +++ b/tests/api/ils/stats/test_loan_extension_stats.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2025 CERN. +# +# invenio-app-ils is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +"""Test loan transitions stats functionality.""" + + +import json +from copy import deepcopy + +from tests.api.ils.stats.helpers import ( + extract_buckets_from_stats_query, + process_and_aggregate_stats, + query_stats, +) +from tests.helpers import user_login, user_logout + + +def _query_loan_extensions_stats(client): + """Query stats via the HTTP API.""" + response = query_stats( + client, + "loan-extensions", + {}, + ) + assert response.status_code == 200 + buckets = extract_buckets_from_stats_query(response) + + total_count = sum(bucket.get("count") for bucket in buckets) + return total_count + + +def test_loan_extensions_histogram( + client, + json_headers, + users, + empty_event_queues, + empty_search, + testdata, + loan_params, + checkout_loan, +): + """Test that loan extensions are tracked correctly.""" + + process_and_aggregate_stats() + user_login(client, "admin", users) + initial_count = _query_loan_extensions_stats(client) + + # checkout and extend loan + loan_pid = "loanid-1" + params = deepcopy(loan_params) + params["document_pid"] = "docid-1" + params["item_pid"]["value"] = "itemid-2" + del params["transaction_date"] + loan = checkout_loan(loan_pid, params) + + extend_url = loan["links"]["actions"]["extend"] + user_login(client, "admin", users) + res = client.post( + extend_url, + headers=json_headers, + data=json.dumps(params), + ) + assert res.status_code == 202 + + process_and_aggregate_stats() + final_count = _query_loan_extensions_stats(client) + assert final_count == initial_count + 1 + + +def test_loan_extensions_stats_permissions(client, users): + """Test that only certain users can access the stats.""" + + stat = "loan-extensions" + tests = [ + ("admin", 200), + ("patron1", 403), + ("librarian", 200), + ("readonly", 200), + ("anonymous", 401), + ] + + params = {} + for username, expected_resp_code in tests: + user_login(client, username, users) + response = query_stats( + client, + stat, + params, + ) + assert response.status_code == expected_resp_code, username + user_logout(client) diff --git a/tests/api/ils/stats/test_loan_stats.py b/tests/api/ils/stats/test_loan_stats.py new file mode 100644 index 000000000..8eebea745 --- /dev/null +++ b/tests/api/ils/stats/test_loan_stats.py @@ -0,0 +1,416 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2025 CERN. +# +# invenio-app-ils is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +"""Test loan stats histogram functionality.""" + +import datetime +import json +from copy import deepcopy + +from flask import url_for +from invenio_circulation.proxies import current_circulation +from invenio_db import db +from invenio_search import current_search + +from invenio_app_ils.items.api import Item +from invenio_app_ils.proxies import current_app_ils +from tests.api.ils.stats.helpers import ( + extract_buckets_from_histogram, + process_and_aggregate_stats, + query_histogram, +) +from tests.helpers import user_login, user_logout + +LOAN_HISTOGRAM_ENDPOINT = "invenio_app_ils_circulation_stats.loan_histogram" + + +HISTOGRAM_LOANS_DOCUMENT_PID = "docid-loan-histogram" +HISTOGRAM_LOANS_AVAILABLE_ITEM_PID = "itemid-loan-histogram-2" + + +def _refresh_loans_index(): + search_cls = current_circulation.loan_search_cls + current_search.flush_and_refresh(index=search_cls.Meta.index) + + +def _query_loan_histogram(client, group_by, metrics=[], q=""): + """Query the loan histogram endpoint via the HTTP API.""" + + # We have a certain document in the testdata that is assigned to all loans used for the tests in this file. + # This allows us expect fixed values from the histogram in our tests, + # even after future changes the standard testdata for loans. + if q != "": + q += " AND " + q += "document_pid: " + HISTOGRAM_LOANS_DOCUMENT_PID + + url = url_for(LOAN_HISTOGRAM_ENDPOINT) + response = query_histogram(client, url, group_by, metrics, q) + assert response.status_code == 200 + + buckets = extract_buckets_from_histogram(response) + return buckets + + +def test_loan_stats_histogram_single_group( + client, + users, + empty_event_queues, + empty_search, + testdata_loan_histogram, +): + """Test histogram with single field grouping.""" + user_login(client, "admin", users) + + group_by = [{"field": "state"}] + buckets = _query_loan_histogram(client, group_by) + + # Should have 3 states: ITEM_ON_LOAN, ITEM_RETURNED, PENDING + assert len(buckets) == 3 + + state_counts = {bucket["key"]["state"]: bucket["doc_count"] for bucket in buckets} + assert state_counts["ITEM_ON_LOAN"] == 1 + assert state_counts["ITEM_RETURNED"] == 2 + assert state_counts["PENDING"] == 1 + + +def test_loan_stats_histogram_date_groups( + client, + users, + empty_event_queues, + empty_search, + testdata_loan_histogram, +): + """Test histogram with date field to group by.""" + user_login(client, "admin", users) + + group_by = [{"field": "start_date", "interval": "1M"}] + buckets = _query_loan_histogram(client, group_by) + + # Should have 3 different date groups: 2024-01, 2024-07, 2025-07 + assert len(buckets) == 3 + + date_counts = { + bucket["key"]["start_date"]: bucket["doc_count"] for bucket in buckets + } + assert date_counts["2024-01-01"] == 1 + assert date_counts["2024-07-01"] == 1 + assert date_counts["2025-07-01"] == 2 + + +def test_loan_stats_histogram_multiple_groups( + client, + users, + empty_event_queues, + empty_search, + testdata_loan_histogram, +): + """Test histogram with multiple fields to group by.""" + + user_login(client, "admin", users) + + group_by = [ + {"field": "start_date", "interval": "1M"}, + {"field": "state"}, + ] + + buckets = _query_loan_histogram(client, group_by) + + # Should have 4 different (date,state) groups + assert len(buckets) == 4 + + date_counts = { + (bucket["key"]["start_date"], bucket["key"]["state"]): bucket["doc_count"] + for bucket in buckets + } + + assert date_counts[("2024-01-01", "ITEM_RETURNED")] == 1 + assert date_counts[("2024-07-01", "ITEM_RETURNED")] == 1 + assert date_counts[("2025-07-01", "ITEM_ON_LOAN")] == 1 + assert date_counts[("2025-07-01", "PENDING")] == 1 + + +def test_loan_stats_histogram_metrics_aggregation( + client, users, empty_event_queues, empty_search, testdata_loan_histogram +): + """Test histogram with various aggregation metrics.""" + + user_login(client, "admin", users) + + group_by = [{"field": "state"}] + field = "extension_count" + + tests = { + "ITEM_ON_LOAN": {"avg": 1.0, "sum": 1, "min": 1, "max": 1, "median": 1}, + "PENDING": {"avg": 2.0, "sum": 2, "min": 2, "max": 2, "median": 2}, + "ITEM_RETURNED": {"avg": 2.0, "sum": 4, "min": 1, "max": 3, "median": 2}, + } + + metrics = [ + {"field": field, "aggregation": agg} + for agg in ["avg", "sum", "min", "max", "median"] + ] + buckets = _query_loan_histogram(client, group_by, metrics) + # Place the buckets in a dict for easier access + histogram_metrics = {bucket["key"]["state"]: bucket for bucket in buckets} + + for group_key, expected_metrics in tests.items(): + for aggregation_type, expected_value in expected_metrics.items(): + assert ( + histogram_metrics[group_key]["metrics"][f"{aggregation_type}__{field}"] + == expected_value + ) + + +def test_loan_stats_histogram_search_query( + client, + users, + empty_event_queues, + empty_search, + testdata_loan_histogram, +): + """Test that the q search query works in loan stats histogram.""" + + user_login(client, "admin", users) + + group_by = [{"field": "state"}] + metrics = [] + q = "start_date:[2025-01-01 TO 2026-01-01]" + + buckets = _query_loan_histogram(client, group_by, metrics, q) + + # Should have 2 states: ITEM_ON_LOAN, PENDING + assert len(buckets) == 2 + + state_counts = {bucket["key"]["state"]: bucket["doc_count"] for bucket in buckets} + assert state_counts["ITEM_ON_LOAN"] == 1 + assert state_counts["PENDING"] == 1 + + +def test_loan_stats_histogram_group_by_document_availability( + client, + users, + empty_event_queues, + empty_search, + json_headers, + testdata_loan_histogram, + loan_params, +): + """Test that the availability of an item during loan request can be used for grouping loans in the histogram.""" + + user_login(client, "admin", users) + + def _request_loan(patron_pid): + url = url_for("invenio_app_ils_circulation.loan_request") + + new_loan = deepcopy(loan_params) + new_loan["patron_pid"] = patron_pid + new_loan["delivery"] = {"method": "PICKUP"} + new_loan["document_pid"] = "docid-loan-histogram" + res = client.post(url, headers=json_headers, data=json.dumps(new_loan)) + assert res.status_code == 202, res.get_json() + loan = res.get_json()["metadata"] + assert loan["state"] == "PENDING" + + group_by = [{"field": "extra_data.stats.available_items_during_request"}] + + # There should be no loans that have the field available_items_during_request indexed on them + process_and_aggregate_stats() + buckets = _query_loan_histogram(client, group_by) + assert len(buckets) == 0 + + # Create loan while one item of the document is available + _request_loan("3") + process_and_aggregate_stats() + _refresh_loans_index() + + buckets = _query_loan_histogram(client, group_by) + assert len(buckets) == 1 + + # Make the documents last available item unavailable + item = Item.get_record_by_pid(HISTOGRAM_LOANS_AVAILABLE_ITEM_PID) + item.update(dict(status="IN_BINDING")) + item.commit() + db.session.commit() + current_app_ils.item_indexer.index(item) + item_search = current_app_ils.item_search_cls + current_search.flush_and_refresh(index=item_search.Meta.index) + + # Now request another loan for the same document + # We need to request this loan with another patron, as it will fail otherwise + _request_loan("4") + process_and_aggregate_stats() + _refresh_loans_index() + + buckets = _query_loan_histogram(client, group_by) + assert len(buckets) == 2 + + # There should be one loan that was requested when no item was available + # and one that was requested when an item was available + availability_counts = { + bucket["key"]["extra_data.stats.available_items_during_request"]: bucket[ + "doc_count" + ] + for bucket in buckets + } + assert availability_counts["True"] == 1 + assert availability_counts["False"] == 1 + + +def test_loan_stats_indexed_fields( + client, + users, + empty_event_queues, + empty_search, + testdata_loan_histogram, +): + """Test loan time ranges being indexed onto loans + + The following time ranges are added to a loan during indexing + * loan_duration + * waiting_time + """ + expected_waiting_time_days = 3 + expected_loan_duration_days = 6 + + # Update the loan to have the expecting waiting_time and loan_duration + loan_pid = "loan-hist-3" + + loan_cls = current_circulation.loan_record_cls + loan = loan_cls.get_record_by_pid(loan_pid) + + now = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None) + loan["start_date"] = ( + (now + datetime.timedelta(days=expected_waiting_time_days)).date().isoformat() + ) + loan["end_date"] = ( + ( + now + + datetime.timedelta(days=expected_waiting_time_days) + + datetime.timedelta(days=expected_loan_duration_days) + ) + .date() + .isoformat() + ) + + loan.commit() + db.session.commit() + current_circulation.loan_indexer().index(loan) + _refresh_loans_index() + + # test if the information is indexed to the loan + loan_search_cls = current_circulation.loan_search_cls + hits = [hit for hit in loan_search_cls().filter("term", pid=loan_pid).scan()] + assert len(hits) == 1 + + stats = hits[0]["extra_data"]["stats"] + assert stats["waiting_time"] == expected_waiting_time_days + assert stats["loan_duration"] == expected_loan_duration_days + + # test if the information is available to be aggregated through the histogram endpoint + user_login(client, "admin", users) + group_by = [{"field": "state"}] + metrics = [ + {"field": "extra_data.stats.waiting_time", "aggregation": "sum"}, + {"field": "extra_data.stats.loan_duration", "aggregation": "sum"}, + ] + q = f"pid:{loan_pid}" + + buckets = _query_loan_histogram(client, group_by, metrics, q) + assert len(buckets) == 1 + metrics_bucket = buckets[0]["metrics"] + + assert ( + metrics_bucket["sum__extra_data.stats.waiting_time"] + == expected_waiting_time_days + ) + assert ( + metrics_bucket["sum__extra_data.stats.loan_duration"] + == expected_loan_duration_days + ) + + +def test_loan_stats_permissions(client, users): + """Test that only certain users can access the loan histogram endpoint.""" + + tests = [ + ("admin", 200), + ("librarian", 200), + ("readonly", 200), + ("patron1", 403), + ("anonymous", 401), + ] + + for username, expected_resp_code in tests: + user_login(client, username, users) + + url = url_for(LOAN_HISTOGRAM_ENDPOINT) + response = query_histogram( + client, + url, + group_by=[{"field": "state"}], + metrics=[], + q="", + ) + + assert ( + response.status_code == expected_resp_code + ), f"Failed for user: {username}" + + user_logout(client) + + +def test_loan_stats_input_validation(client, users): + user_login(client, "admin", users) + url = url_for(LOAN_HISTOGRAM_ENDPOINT) + + # Attempt to use wrong aggregation type + group_by = [{"field": "state"}] + metrics = [{"field": "loan_duration", "aggregation": "script"}] + resp = query_histogram(client, url, group_by, metrics) + assert resp.status_code == 400 + + # Attempt to pass a field with special characters as the metric field + group_by = [{"field": "state"}] + metrics = [{"field": "doc['loan_duration'].value", "aggregation": "avg"}] + resp = query_histogram(client, url, group_by, metrics) + assert resp.status_code == 400 + + # Attempt to pass a field with special characters as the group by field + group_by = [{"field": "doc['loan_duration'].value"}] + metrics = [] + resp = query_histogram(client, url, group_by, metrics) + assert resp.status_code == 400 + + # Attempt to use an invalid date interval + group_by = [{"field": "start_date", "interval": "1z"}] + metrics = [] + resp = query_histogram(client, url, group_by, metrics) + assert resp.status_code == 400 + + # Attempt to use a date field without an interval + group_by = [{"field": "start_date"}] + metrics = [] + resp = query_histogram(client, url, group_by, metrics) + assert resp.status_code == 400 + + # Attempt to use a non date field with an interval + group_by = [{"field": "state", "interval": "1M"}] + metrics = [] + resp = query_histogram(client, url, group_by, metrics) + assert resp.status_code == 400 + + # Missing group_by parameter + group_by = None + metrics = [] + resp = query_histogram(client, url, group_by, metrics) + assert resp.status_code == 400 + + # Empty group_by parameter + group_by = [] + metrics = [] + resp = query_histogram(client, url, group_by, metrics) + assert resp.status_code == 400 diff --git a/tests/api/ils/stats/test_record_changes_stats.py b/tests/api/ils/stats/test_record_changes_stats.py index adc73201b..11343fcb6 100644 --- a/tests/api/ils/stats/test_record_changes_stats.py +++ b/tests/api/ils/stats/test_record_changes_stats.py @@ -125,15 +125,13 @@ def test_record_change_stats_histogram( def test_record_change_stats_per_user( - app, users, empty_event_queues, empty_search, testdata + client, users, empty_event_queues, empty_search, testdata ): """Test that insertions by different users are tracked correctly.""" pid_type = DOCUMENT_PID_TYPE method = "insert" - client = app.test_client() - user_login(client, "admin", users) # Create one record with admin to have a user with existing stats data = testdata["documents"][0].copy() @@ -174,14 +172,13 @@ def test_record_change_stats_per_user( def test_patron_record_changes_not_tracked_per_user( - app, users, empty_event_queues, empty_search, testdata + client, users, empty_event_queues, empty_search, testdata ): """Test that patron users are not tracked in per-user stats.""" pid_type = DOCUMENT_PID_TYPE method = "insert" - client = app.test_client() user_login(client, "admin", users) # Create one record with admin to have a user with existing stats data = testdata["documents"][0].copy() diff --git a/tests/data/documents.json b/tests/data/documents.json index 72d98c538..28468c6cb 100644 --- a/tests/data/documents.json +++ b/tests/data/documents.json @@ -234,5 +234,16 @@ "document_type": "BOOK", "restricted": false, "publication_year": "1950" + }, + { + "$schema": "https://127.0.0.1:5000/schemas/documents/document-v2.0.0.json", + "pid": "docid-loan-histogram", + "created_by": {"type": "script", "value": "demo"}, + "title": "Document used to differentiate loans for the loan histogram test", + "authors": [{ "full_name": "Buhmann, Stefan Yoshi" }], + "abstract": "Just an abstract", + "document_type": "BOOK", + "restricted": false, + "publication_year": "1950" } ] diff --git a/tests/data/items.json b/tests/data/items.json index 166b8ab42..1a466afe1 100644 --- a/tests/data/items.json +++ b/tests/data/items.json @@ -300,5 +300,27 @@ "medium": "NOT_SPECIFIED", "status": "IN_BINDING", "document": {} + }, + { + "pid": "itemid-loan-histogram-1", + "created_by": { "type": "script", "value": "demo" }, + "barcode": "123456789-loan-histogram-1", + "document_pid": "docid-loan-histogram", + "internal_location_pid": "ilocid-1", + "circulation_restriction": "NO_RESTRICTION", + "medium": "NOT_SPECIFIED", + "status": "CAN_CIRCULATE", + "document": {} + }, + { + "pid": "itemid-loan-histogram-2", + "created_by": { "type": "script", "value": "demo" }, + "barcode": "123456789-loan-histogram-2", + "document_pid": "docid-loan-histogram", + "internal_location_pid": "ilocid-1", + "circulation_restriction": "NO_RESTRICTION", + "medium": "NOT_SPECIFIED", + "status": "CAN_CIRCULATE", + "document": {} } ] diff --git a/tests/data/loans_histogram.json b/tests/data/loans_histogram.json new file mode 100644 index 000000000..c53ad040e --- /dev/null +++ b/tests/data/loans_histogram.json @@ -0,0 +1,54 @@ +[ + { + "pid": "loan-hist-1", + "state": "ITEM_ON_LOAN", + "patron_pid": "1", + "document_pid": "docid-loan-histogram", + "item_pid": { + "type": "pitmid", + "value": "itemid-loan-histogram-1" + }, + "transaction_location_pid": "locid-1", + "start_date": "2025-07-01", + "end_date": "2025-12-01", + "extension_count": 1 + }, + { + "pid": "loan-hist-2", + "state": "PENDING", + "patron_pid": "2", + "document_pid": "docid-loan-histogram", + "transaction_location_pid": "locid-1", + "start_date": "2025-07-15", + "end_date": "2025-07-30", + "extension_count": 2 + }, + { + "pid": "loan-hist-3", + "state": "ITEM_RETURNED", + "patron_pid": "2", + "document_pid": "docid-loan-histogram", + "item_pid": { + "type": "pitmid", + "value": "itemid-loan-histogram-1" + }, + "transaction_location_pid": "locid-1", + "start_date": "2024-01-01", + "end_date": "2024-07-01", + "extension_count": 1 + }, + { + "pid": "loan-hist-4", + "state": "ITEM_RETURNED", + "patron_pid": "2", + "document_pid": "docid-loan-histogram", + "item_pid": { + "type": "pitmid", + "value": "itemid-loan-histogram-1" + }, + "transaction_location_pid": "locid-2", + "start_date": "2024-07-01", + "end_date": "2025-01-01", + "extension_count": 3 + } +]