Skip to content

Commit 939bdcd

Browse files
committed
stats: add histogram endpoint for orders
1 parent c7aa3a8 commit 939bdcd

File tree

11 files changed

+309
-147
lines changed

11 files changed

+309
-147
lines changed

invenio_app_ils/circulation/indexer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,8 @@ def index_stats_fields_for_loan(loan_dict):
162162
"Expected zero or one."
163163
)
164164

165+
# Make use of the `extra_data` property as loans are part of `invenio-circulation`,
166+
# which do not expose the `stats` property directly
165167
if not "extra_data" in loan_dict:
166168
loan_dict["extra_data"] = {}
167169
loan_dict["extra_data"]["stats"] = stats

invenio_app_ils/circulation/stats/api.py

Lines changed: 0 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,7 @@
77

88
"""APIs for ILS circulation statistics."""
99

10-
from invenio_search.engine import dsl
11-
1210
from invenio_app_ils.circulation.search import get_most_loaned_documents
13-
from invenio_app_ils.circulation.stats.schemas import (
14-
_OS_NATIVE_AGGREGATE_FUNCTION_TYPES,
15-
)
1611
from invenio_app_ils.proxies import current_app_ils
1712

1813

@@ -54,98 +49,3 @@ def fetch_most_loaned_documents(from_date, to_date, bucket_size):
5449
)
5550

5651
return res
57-
58-
59-
def _generate_metric_agg_field_name(metric):
60-
"""Return the aggregation name used for a metric.
61-
62-
:param metric: Must include 'field' and 'aggregation' keys.
63-
:returns: The aggregation field name in the form '<aggregation>_<field>'.
64-
"""
65-
66-
return f"{metric['aggregation']}__{metric['field']}"
67-
68-
69-
def get_loan_statistics(date_fields, search, requested_group_by, requested_metrics):
70-
"""Aggregate loan statistics for requested metrics.
71-
72-
:param date_fields: List of date fields for the record type.
73-
Date fields require different handling when using them to group by.
74-
:param search: The base search object to apply aggregations on
75-
:param requested_group_by: List of group dictionaries with 'field' and optional 'interval' keys.
76-
Example: [{"field": "start_date", "interval": "monthly"}, {"field": "state"}]
77-
:param requested_metrics: List of metric dictionaries with 'field' and 'aggregation' keys.
78-
Example: [{"field": "loan_duration", "aggregation": "avg"}]
79-
:returns: OpenSearch aggregation results with multi-terms histogram and optional metrics
80-
"""
81-
82-
# Build composite aggregation
83-
sources = []
84-
for grouping in requested_group_by:
85-
grouping_field = grouping["field"]
86-
87-
if grouping_field in date_fields:
88-
sources.append(
89-
{
90-
grouping_field: {
91-
"date_histogram": {
92-
"field": grouping_field,
93-
"calendar_interval": grouping["interval"],
94-
"format": "yyyy-MM-dd",
95-
}
96-
}
97-
}
98-
)
99-
else:
100-
sources.append({grouping_field: {"terms": {"field": grouping_field}}})
101-
102-
composite_agg = dsl.A("composite", sources=sources, size=1000)
103-
104-
for metric in requested_metrics:
105-
agg_name = _generate_metric_agg_field_name(metric)
106-
107-
grouping_field = metric["field"]
108-
agg_type = metric["aggregation"]
109-
field_config = {"field": grouping_field}
110-
if agg_type in _OS_NATIVE_AGGREGATE_FUNCTION_TYPES:
111-
composite_agg = composite_agg.metric(
112-
agg_name, dsl.A(agg_type, **field_config)
113-
)
114-
elif agg_type == "median":
115-
composite_agg = composite_agg.metric(
116-
agg_name, dsl.A("percentiles", percents=[50], **field_config)
117-
)
118-
119-
search.aggs.bucket("loan_aggregations", composite_agg)
120-
121-
# Only retrieve aggregation results
122-
search = search[:0]
123-
result = search.execute()
124-
125-
# Parse aggregation results
126-
buckets = []
127-
if hasattr(result.aggregations, "loan_aggregations"):
128-
for bucket in result.aggregations.loan_aggregations.buckets:
129-
metrics_data = {}
130-
for metric in requested_metrics:
131-
agg_name = _generate_metric_agg_field_name(metric)
132-
133-
if hasattr(bucket, agg_name):
134-
agg_result = getattr(bucket, agg_name)
135-
agg_type = metric["aggregation"]
136-
137-
if agg_type in _OS_NATIVE_AGGREGATE_FUNCTION_TYPES:
138-
metrics_data[agg_name] = agg_result.value
139-
elif agg_type == "median":
140-
median_value = agg_result.values.get("50.0")
141-
metrics_data[agg_name] = median_value
142-
143-
bucket_data = {
144-
"key": bucket.key.to_dict(),
145-
"doc_count": bucket.doc_count,
146-
"metrics": metrics_data,
147-
}
148-
149-
buckets.append(bucket_data)
150-
151-
return buckets

invenio_app_ils/circulation/stats/serializers/__init__.py

Lines changed: 0 additions & 13 deletions
This file was deleted.

invenio_app_ils/circulation/stats/views.py

Lines changed: 14 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,17 @@
1818
from invenio_rest import ContentNegotiatedMethodView
1919
from marshmallow.exceptions import ValidationError
2020

21-
from invenio_app_ils.circulation.stats.api import (
22-
fetch_most_loaned_documents,
23-
get_loan_statistics,
24-
)
25-
from invenio_app_ils.circulation.stats.schemas import HistogramParamsSchema
26-
from invenio_app_ils.circulation.stats.serializers import loan_stats_response
21+
from invenio_app_ils.circulation.stats.api import fetch_most_loaned_documents
2722
from invenio_app_ils.circulation.views import IlsCirculationResource
2823
from invenio_app_ils.config import RECORDS_REST_MAX_RESULT_WINDOW
2924
from invenio_app_ils.documents.api import DOCUMENT_PID_FETCHER, DOCUMENT_PID_TYPE
3025
from invenio_app_ils.errors import InvalidParameterError
3126
from invenio_app_ils.permissions import need_permissions
27+
from invenio_app_ils.stats.histogram import (
28+
HistogramParamsSchema,
29+
create_histogram_view,
30+
get_record_statistics,
31+
)
3232

3333

3434
def create_most_loaned_documents_view(blueprint, app):
@@ -56,33 +56,18 @@ def create_most_loaned_documents_view(blueprint, app):
5656
)
5757

5858

59-
def create_loan_histogram_view(blueprint, app):
60-
"""Add url rule for loan histogram view."""
61-
62-
endpoints = app.config.get("RECORDS_REST_ENDPOINTS")
63-
document_endpoint = endpoints.get(CIRCULATION_LOAN_PID_TYPE)
64-
default_media_type = document_endpoint.get("default_media_type")
65-
loan_stats_serializers = {"application/json": loan_stats_response}
66-
67-
loan_stats_view_func = LoanHistogramResource.as_view(
68-
LoanHistogramResource.view_name,
69-
serializers=loan_stats_serializers,
70-
default_media_type=default_media_type,
71-
ctx={},
72-
)
73-
blueprint.add_url_rule(
74-
"/circulation/loans/stats",
75-
view_func=loan_stats_view_func,
76-
methods=["GET"],
77-
)
78-
79-
8059
def create_circulation_stats_blueprint(app):
8160
"""Add statistics views to the blueprint."""
8261
blueprint = Blueprint("invenio_app_ils_circulation_stats", __name__, url_prefix="")
8362

8463
create_most_loaned_documents_view(blueprint, app)
85-
create_loan_histogram_view(blueprint, app)
64+
create_histogram_view(
65+
blueprint,
66+
app,
67+
CIRCULATION_LOAN_PID_TYPE,
68+
LoanHistogramResource,
69+
"/circulation/loans",
70+
)
8671

8772
return blueprint
8873

@@ -190,7 +175,7 @@ def get(self, **kwargs):
190175
search = search_cls()
191176
search, _ = default_search_factory(self, search)
192177

193-
aggregation_buckets = get_loan_statistics(
178+
aggregation_buckets = get_record_statistics(
194179
loan_date_fields,
195180
search,
196181
parsed_args["group_by"],
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# Copyright (C) 2025 CERN.
4+
#
5+
# invenio-app-ils is free software; you can redistribute it and/or modify it
6+
# under the terms of the MIT License; see LICENSE file for more details.
7+
8+
"""Invenio App ILS histogram statistics."""
9+
10+
from invenio_app_ils.stats.histogram.api import get_record_statistics
11+
from invenio_app_ils.stats.histogram.schemas import HistogramParamsSchema
12+
from invenio_app_ils.stats.histogram.views import create_histogram_view
13+
14+
__all__ = (
15+
"get_record_statistics",
16+
"HistogramParamsSchema",
17+
"create_histogram_view",
18+
)
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# Copyright (C) 2019-2025 CERN.
4+
#
5+
# invenio-app-ils is free software; you can redistribute it and/or modify it
6+
# under the terms of the MIT License; see LICENSE file for more details.
7+
8+
"""APIs for ILS histogram statistics."""
9+
10+
from invenio_search.engine import dsl
11+
12+
from invenio_app_ils.stats.histogram.schemas import (
13+
_OS_NATIVE_AGGREGATE_FUNCTION_TYPES,
14+
)
15+
16+
17+
def _generate_metric_agg_field_name(metric):
18+
"""Return the aggregation name used for a metric.
19+
20+
:param metric: Must include 'field' and 'aggregation' keys.
21+
:returns: The aggregation field name in the form '<aggregation>_<field>'.
22+
"""
23+
24+
return f"{metric['aggregation']}__{metric['field']}"
25+
26+
27+
def get_record_statistics(date_fields, search, requested_group_by, requested_metrics):
28+
"""Aggregate record statistics for requested metrics.
29+
30+
:param date_fields: List of date fields for the record type.
31+
Date fields require different handling when using them to group by.
32+
:param search: The base search object to apply aggregations on.
33+
:param requested_group_by: List of group dictionaries with 'field' and optional 'interval' keys.
34+
Example: [{"field": "start_date", "interval": "monthly"}, {"field": "state"}]
35+
:param requested_metrics: List of metric dictionaries with 'field' and 'aggregation' keys.
36+
Example: [{"field": "loan_duration", "aggregation": "avg"}]
37+
:returns: OpenSearch aggregation results with multi-terms histogram and optional metrics
38+
"""
39+
40+
# Build composite aggregation
41+
sources = []
42+
for grouping in requested_group_by:
43+
grouping_field = grouping["field"]
44+
45+
if grouping_field in date_fields:
46+
sources.append(
47+
{
48+
grouping_field: {
49+
"date_histogram": {
50+
"field": grouping_field,
51+
"calendar_interval": grouping["interval"],
52+
"format": "yyyy-MM-dd",
53+
}
54+
}
55+
}
56+
)
57+
else:
58+
sources.append({grouping_field: {"terms": {"field": grouping_field}}})
59+
60+
composite_agg = dsl.A("composite", sources=sources, size=1000)
61+
62+
for metric in requested_metrics:
63+
agg_name = _generate_metric_agg_field_name(metric)
64+
65+
grouping_field = metric["field"]
66+
agg_type = metric["aggregation"]
67+
field_config = {"field": grouping_field}
68+
if agg_type in _OS_NATIVE_AGGREGATE_FUNCTION_TYPES:
69+
composite_agg = composite_agg.metric(
70+
agg_name, dsl.A(agg_type, **field_config)
71+
)
72+
elif agg_type == "median":
73+
composite_agg = composite_agg.metric(
74+
agg_name, dsl.A("percentiles", percents=[50], **field_config)
75+
)
76+
77+
search.aggs.bucket("aggregations", composite_agg)
78+
79+
# Only retrieve aggregation results
80+
search = search[:0]
81+
result = search.execute()
82+
83+
# Parse aggregation results
84+
buckets = []
85+
if hasattr(result.aggregations, "aggregations"):
86+
for bucket in getattr(result.aggregations, "aggregations").buckets:
87+
metrics_data = {}
88+
for metric in requested_metrics:
89+
agg_name = _generate_metric_agg_field_name(metric)
90+
91+
if hasattr(bucket, agg_name):
92+
agg_result = getattr(bucket, agg_name)
93+
agg_type = metric["aggregation"]
94+
95+
if agg_type in _OS_NATIVE_AGGREGATE_FUNCTION_TYPES:
96+
metrics_data[agg_name] = agg_result.value
97+
elif agg_type == "median":
98+
median_value = agg_result.values.get("50.0")
99+
metrics_data[agg_name] = median_value
100+
101+
bucket_data = {
102+
"key": bucket.key.to_dict(),
103+
"doc_count": bucket.doc_count,
104+
"metrics": metrics_data,
105+
}
106+
107+
buckets.append(bucket_data)
108+
109+
return buckets

0 commit comments

Comments
 (0)