Skip to content

Commit 5a988e2

Browse files
committed
stats: add loan stats endpoint and extend loans index
* the endpoint returns a histogram for loans where requested metrics are grouped by and aggregated * extend the loans with a new stats object * the stats object contains `waiting_time`, `loan_duration` and `available_items_during_request`
1 parent 715e8ac commit 5a988e2

File tree

15 files changed

+948
-9
lines changed

15 files changed

+948
-9
lines changed

invenio_app_ils/circulation/indexer.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from invenio_circulation.proxies import current_circulation
1616
from invenio_indexer.api import RecordIndexer
1717
from invenio_pidstore.errors import PIDDeletedError
18+
from invenio_search import current_search_client
1819

1920
from invenio_app_ils.circulation.utils import resolve_item_from_loan
2021
from invenio_app_ils.documents.api import DOCUMENT_PID_TYPE
@@ -97,3 +98,64 @@ def index_extra_fields_for_loan(loan_dict):
9798

9899
can_circulate_items_count = document["circulation"]["can_circulate_items_count"]
99100
loan_dict["can_circulate_items_count"] = can_circulate_items_count
101+
102+
103+
def index_stats_fields_for_loan(loan_dict):
104+
"""Indexer hook to modify the loan record dict before indexing"""
105+
106+
creation_date = datetime.fromisoformat(loan_dict["_created"]).date()
107+
start_date = (
108+
datetime.fromisoformat(loan_dict["start_date"]).date()
109+
if loan_dict.get("start_date")
110+
else None
111+
)
112+
end_date = (
113+
datetime.fromisoformat(loan_dict["end_date"]).date()
114+
if loan_dict.get("end_date")
115+
else None
116+
)
117+
118+
# Collect extra information relevant for stats
119+
stats = {}
120+
121+
# Time ranges in days
122+
if start_date and end_date:
123+
loan_duration = (end_date - start_date).days
124+
stats["loan_duration"] = loan_duration
125+
126+
if creation_date and start_date:
127+
waiting_time = (start_date - creation_date).days
128+
stats["waiting_time"] = waiting_time if waiting_time >= 0 else None
129+
130+
# Document availability during loan request
131+
stat_events_index_name = "events-stats-loan-transitions"
132+
if current_search_client.indices.exists(index=stat_events_index_name):
133+
search_body = {}
134+
135+
search_body = {
136+
"query": {
137+
"bool": {
138+
"must": [
139+
{"term": {"field": "available_items_during_request_count"}},
140+
{"term": {"pid_value": loan_dict["pid"]}},
141+
],
142+
}
143+
},
144+
}
145+
146+
search_result = current_search_client.search(
147+
index=stat_events_index_name, body=search_body
148+
)
149+
hits = search_result["hits"]["hits"]
150+
if len(hits) == 1:
151+
request_transition_event = hits[0]["_source"]
152+
available_items_during_request_count = request_transition_event["value"]
153+
stats["available_items_during_request"] = (
154+
available_items_during_request_count > 0
155+
)
156+
elif len(hits) > 1:
157+
raise ValueError("Multiple request transition events.")
158+
159+
if not "extensions" in loan_dict:
160+
loan_dict["extensions"] = {}
161+
loan_dict["extensions"]["stats"] = stats

invenio_app_ils/circulation/stats/api.py

Lines changed: 97 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
# -*- coding: utf-8 -*-
22
#
3-
# Copyright (C) 2019 CERN.
3+
# Copyright (C) 2019-2025 CERN.
44
#
55
# invenio-app-ils is free software; you can redistribute it and/or modify it
66
# under the terms of the MIT License; see LICENSE file for more details.
77

88
"""APIs for ILS circulation statistics."""
99

10+
from invenio_search.engine import dsl
11+
1012
from invenio_app_ils.circulation.search import get_most_loaned_documents
13+
from invenio_app_ils.circulation.stats.schemas import (
14+
_OS_NATIVE_AGGREGATE_FUNCTION_TYPES,
15+
)
1116
from invenio_app_ils.proxies import current_app_ils
1217

1318

@@ -49,3 +54,94 @@ def fetch_most_loaned_documents(from_date, to_date, bucket_size):
4954
)
5055

5156
return res
57+
58+
59+
def _generate_metric_agg_field_name(metric):
60+
"""Return the aggregation name used for a metric.
61+
62+
:param metric: Must include 'field' and 'aggregation' keys.
63+
:returns: The aggregation field name in the form '<aggregation>_<field>'.
64+
"""
65+
66+
return f"{metric['aggregation']}__{metric['field']}"
67+
68+
69+
def get_loan_statistics(date_fields, search, group_by, metrics):
70+
"""Aggregate loan statistics for requested metrics.
71+
72+
:param search: The base search object to apply aggregations on
73+
:param group_by: List of group dictionaries with 'field' and optional 'interval' keys.
74+
Example: [{"field": "start_date", "interval": "monthly"}, {"field": "state"}]
75+
:param metrics: List of metric dictionaries with 'field' and 'aggregation' keys.
76+
Example: [{"field": "loan_duration", "aggregation": "avg"}]
77+
:returns: OpenSearch aggregation results with multi-terms histogram and optional metrics
78+
"""
79+
80+
# Build composite aggregation
81+
sources = []
82+
for grouping in group_by:
83+
grouping_field = grouping["field"]
84+
85+
if grouping_field in date_fields:
86+
sources.append(
87+
{
88+
grouping_field: {
89+
"date_histogram": {
90+
"field": grouping_field,
91+
"calendar_interval": grouping["interval"],
92+
"format": "yyyy-MM-dd",
93+
}
94+
}
95+
}
96+
)
97+
else:
98+
sources.append({grouping_field: {"terms": {"field": grouping_field}}})
99+
100+
composite_agg = dsl.A("composite", sources=sources, size=1000)
101+
102+
for metric in metrics:
103+
agg_name = _generate_metric_agg_field_name(metric)
104+
105+
grouping_field = metric["field"]
106+
agg_type = metric["aggregation"]
107+
field_config = {"field": grouping_field}
108+
if agg_type in _OS_NATIVE_AGGREGATE_FUNCTION_TYPES:
109+
composite_agg = composite_agg.metric(
110+
agg_name, dsl.A(agg_type, **field_config)
111+
)
112+
elif agg_type == "median":
113+
composite_agg = composite_agg.metric(
114+
agg_name, dsl.A("percentiles", percents=[50], **field_config)
115+
)
116+
117+
search.aggs.bucket("loan_aggregations", composite_agg)
118+
119+
# Only retrieve aggregation results
120+
search = search[:0]
121+
result = search.execute()
122+
123+
# Parse aggregation results
124+
buckets = []
125+
if hasattr(result.aggregations, "loan_aggregations"):
126+
for bucket in result.aggregations.loan_aggregations.buckets:
127+
bucket_data = {
128+
"key": bucket.key.to_dict(),
129+
"doc_count": bucket.doc_count,
130+
}
131+
132+
for metric in metrics:
133+
agg_name = _generate_metric_agg_field_name(metric)
134+
135+
if hasattr(bucket, agg_name):
136+
agg_result = getattr(bucket, agg_name)
137+
agg_type = metric["aggregation"]
138+
139+
if agg_type in _OS_NATIVE_AGGREGATE_FUNCTION_TYPES:
140+
bucket_data[agg_name] = agg_result.value
141+
elif agg_type == "median":
142+
median_value = agg_result.values.get("50.0")
143+
bucket_data[agg_name] = median_value
144+
145+
buckets.append(bucket_data)
146+
147+
return buckets
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# Copyright (C) 2025 CERN.
4+
#
5+
# invenio-app-ils is free software; you can redistribute it and/or modify it
6+
# under the terms of the MIT License; see LICENSE file for more details.
7+
8+
"""Marshmallow schemas for loan statistics validation."""
9+
10+
import json
11+
import re
12+
13+
from marshmallow import (
14+
Schema,
15+
ValidationError,
16+
fields,
17+
pre_load,
18+
validate,
19+
validates_schema,
20+
)
21+
22+
from invenio_app_ils.errors import InvalidParameterError
23+
24+
_OS_VALID_FIELD_NAME_PATTERN = re.compile(r"^[A-Za-z0-9_.]+$")
25+
_OS_NATIVE_AGGREGATE_FUNCTION_TYPES = {"avg", "sum", "min", "max"}
26+
_VALID_AGGREGATE_FUNCTION_TYPES = _OS_NATIVE_AGGREGATE_FUNCTION_TYPES.union({"median"})
27+
_VALID_DATE_INTERVALS = {"1d", "1w", "1M", "1q", "1y"}
28+
29+
30+
def validate_field_name(field_name):
31+
"""Validate a field name for search to prevent injection attacks.
32+
33+
:param field_name: The field name to validate
34+
:raises InvalidParameterError: If field name is invalid or potentially malicious
35+
"""
36+
if not _OS_VALID_FIELD_NAME_PATTERN.match(field_name):
37+
raise InvalidParameterError(
38+
description=(
39+
f"Invalid field name '{field_name}'. "
40+
"Field names may contain only alphanumeric characters, underscores, "
41+
"and dots."
42+
)
43+
)
44+
45+
46+
class SecureFieldNameField(fields.String):
47+
"""Marshmallow field that validates field names to prevent injection attacks."""
48+
49+
def _deserialize(self, value, attr, data, **kwargs):
50+
"""Deserialize and validate field name."""
51+
52+
field_name = super()._deserialize(value, attr, data, **kwargs)
53+
validate_field_name(field_name)
54+
return field_name
55+
56+
57+
class GroupByItemSchema(Schema):
58+
field = SecureFieldNameField(required=True)
59+
interval = fields.String(validate=validate.OneOf(_VALID_DATE_INTERVALS))
60+
61+
@validates_schema
62+
def validate_date_fields(self, data, **kwargs):
63+
"""Validate that date fields have an interval and non-date fields do not."""
64+
65+
date_fields = self.context["date_fields"]
66+
field = data.get("field")
67+
interval = data.get("interval")
68+
if field in date_fields and not interval:
69+
raise ValidationError(
70+
{"interval": ["Interval is required for date fields."]}
71+
)
72+
if field not in date_fields and interval is not None:
73+
raise ValidationError(
74+
{"interval": ["Interval must not be provided for non-date fields."]}
75+
)
76+
77+
78+
class MetricItemSchema(Schema):
79+
"""Schema for validating a single metric item."""
80+
81+
field = SecureFieldNameField(required=True)
82+
aggregation = fields.String(
83+
required=True, validate=validate.OneOf(_VALID_AGGREGATE_FUNCTION_TYPES)
84+
)
85+
86+
87+
class HistogramParamsSchema(Schema):
88+
"""Schema for validating the query string parameters for the histogram endpoint"""
89+
90+
metrics = fields.List(fields.Nested(MetricItemSchema))
91+
group_by = fields.List(
92+
fields.Nested(GroupByItemSchema), required=True, validate=validate.Length(min=1)
93+
)
94+
q = fields.String()
95+
96+
def __init__(self, date_fields, *args, **kwargs):
97+
super().__init__(*args, **kwargs)
98+
self.context = {"date_fields": set(date_fields)}
99+
100+
@pre_load
101+
def parse_query_string(self, data, **kwargs):
102+
"""Parse the metrics and group_by parameters from JSON strings."""
103+
104+
for key in ("metrics", "group_by"):
105+
data[key] = json.loads(data.get(key))
106+
return data

invenio_app_ils/circulation/stats/views.py

Lines changed: 66 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -*- coding: utf-8 -*-
22
#
3-
# Copyright (C) 2019 CERN.
3+
# Copyright (C) 2019-2025 CERN.
44
#
55
# invenio-app-ils is free software; you can redistribute it and/or modify it
66
# under the terms of the MIT License; see LICENSE file for more details.
@@ -9,12 +9,20 @@
99

1010
from datetime import datetime
1111

12-
from flask import Blueprint, current_app, request
12+
from flask import Blueprint, current_app, jsonify, request
13+
from invenio_circulation.proxies import current_circulation
1314
from invenio_pidstore import current_pidstore
15+
from invenio_records_rest.query import default_search_factory
1416
from invenio_records_rest.utils import obj_or_import_string
1517
from invenio_rest import ContentNegotiatedMethodView
16-
17-
from invenio_app_ils.circulation.stats.api import fetch_most_loaned_documents
18+
from marshmallow.exceptions import ValidationError
19+
20+
from invenio_app_ils.circulation.stats.api import (
21+
fetch_most_loaned_documents,
22+
get_loan_statistics,
23+
)
24+
from invenio_app_ils.circulation.stats.schemas import HistogramParamsSchema
25+
from invenio_app_ils.circulation.views import IlsCirculationResource
1826
from invenio_app_ils.config import RECORDS_REST_MAX_RESULT_WINDOW
1927
from invenio_app_ils.documents.api import DOCUMENT_PID_FETCHER, DOCUMENT_PID_TYPE
2028
from invenio_app_ils.errors import InvalidParameterError
@@ -46,11 +54,26 @@ def create_most_loaned_documents_view(blueprint, app):
4654
)
4755

4856

57+
def create_loan_histogram_view(blueprint, app):
58+
"""Add url rule for loan histogram view."""
59+
loan_stats = LoanHistogramResource.as_view(
60+
LoanHistogramResource.view_name,
61+
serializers={},
62+
ctx=dict(),
63+
)
64+
blueprint.add_url_rule(
65+
"/circulation/stats/loans",
66+
view_func=loan_stats,
67+
methods=["GET"],
68+
)
69+
70+
4971
def create_circulation_stats_blueprint(app):
5072
"""Add statistics views to the blueprint."""
5173
blueprint = Blueprint("invenio_app_ils_circulation_stats", __name__, url_prefix="")
5274

5375
create_most_loaned_documents_view(blueprint, app)
76+
create_loan_histogram_view(blueprint, app)
5477

5578
return blueprint
5679

@@ -131,3 +154,42 @@ def get(self, *args, **kwargs):
131154
pid_fetcher=current_pidstore.fetchers[DOCUMENT_PID_FETCHER],
132155
search_result=most_loaned_documents,
133156
)
157+
158+
159+
class LoanHistogramResource(IlsCirculationResource):
160+
"""Loan stats resource."""
161+
162+
view_name = "loan_histogram"
163+
164+
@need_permissions("stats-loans")
165+
def get(self, **kwargs):
166+
"""Get loan statistics."""
167+
168+
loan_cls = current_circulation.loan_record_cls
169+
loan_date_fields = (
170+
loan_cls.DATE_FIELDS + loan_cls.DATETIME_FIELDS + ["_created"]
171+
)
172+
173+
schema = HistogramParamsSchema(loan_date_fields)
174+
try:
175+
parsed_args = schema.load(request.args.to_dict())
176+
except ValidationError as e:
177+
raise InvalidParameterError(description=e.messages)
178+
179+
# Construct search to allow for filtering with the q parameter
180+
search_cls = current_circulation.loan_search_cls
181+
search = search_cls()
182+
search, _ = default_search_factory(self, search)
183+
184+
aggregation_buckets = get_loan_statistics(
185+
loan_date_fields,
186+
search,
187+
parsed_args["group_by"],
188+
parsed_args["metrics"],
189+
)
190+
191+
response = {
192+
"buckets": aggregation_buckets,
193+
}
194+
195+
return jsonify(response)

0 commit comments

Comments
 (0)