-
Notifications
You must be signed in to change notification settings - Fork 53
Loan statistics #1256
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Loan statistics #1256
Changes from all commits
d874713
0863a6b
715e8ac
b82b815
ec52b93
5c9856b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,6 +15,7 @@ | |
| from invenio_circulation.proxies import current_circulation | ||
| from invenio_indexer.api import RecordIndexer | ||
| from invenio_pidstore.errors import PIDDeletedError | ||
| from invenio_search import current_search_client | ||
|
|
||
| from invenio_app_ils.circulation.utils import resolve_item_from_loan | ||
| from invenio_app_ils.documents.api import DOCUMENT_PID_TYPE | ||
|
|
@@ -97,3 +98,75 @@ def index_extra_fields_for_loan(loan_dict): | |
|
|
||
| can_circulate_items_count = document["circulation"]["can_circulate_items_count"] | ||
| loan_dict["can_circulate_items_count"] = can_circulate_items_count | ||
|
|
||
|
|
||
| def index_stats_fields_for_loan(loan_dict): | ||
| """Indexer hook to modify the loan record dict before indexing""" | ||
|
|
||
| creation_date = datetime.fromisoformat(loan_dict["_created"]).date() | ||
| start_date = ( | ||
| datetime.fromisoformat(loan_dict["start_date"]).date() | ||
| if loan_dict.get("start_date") | ||
| else None | ||
| ) | ||
| end_date = ( | ||
| datetime.fromisoformat(loan_dict["end_date"]).date() | ||
| if loan_dict.get("end_date") | ||
| else None | ||
| ) | ||
|
|
||
| # Collect extra information relevant for stats | ||
| stats = {} | ||
|
|
||
| # Time ranges in days | ||
| if start_date and end_date: | ||
| loan_duration = (end_date - start_date).days | ||
| stats["loan_duration"] = loan_duration | ||
|
|
||
| if creation_date and start_date: | ||
| waiting_time = (start_date - creation_date).days | ||
| stats["waiting_time"] = waiting_time if waiting_time >= 0 else None | ||
|
|
||
| # Document availability during loan request | ||
| stat_events_index_name = "events-stats-loan-transitions" | ||
| if current_search_client.indices.exists(index=stat_events_index_name): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would be better to fail rather than silently not index things if this conditional statatement is not fulfiled. ping @ntarocco WDYT?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for context: the check was added because if there is no event at the moment a loan is indexed, there will be no events index and the loan indexer will throw an error in the lines below the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree, if it is required before running this code (weird, shouldn't it be created automatically if not existing?), then I would fail instead.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added a warning when this happens but still would not fail. While the index is required for adding a specific stat field (namely
Also note: |
||
| loan_pid = loan_dict["pid"] | ||
| search_body = { | ||
| "query": { | ||
| "bool": { | ||
| "must": [ | ||
| {"term": {"trigger": "request"}}, | ||
| {"term": {"pid_value": loan_pid}}, | ||
| ], | ||
| } | ||
| }, | ||
| } | ||
|
|
||
| search_result = current_search_client.search( | ||
| index=stat_events_index_name, body=search_body | ||
| ) | ||
| hits = search_result["hits"]["hits"] | ||
| if len(hits) == 1: | ||
| request_transition_event = hits[0]["_source"] | ||
| available_items_during_request_count = request_transition_event[ | ||
| "extra_data" | ||
| ]["available_items_during_request_count"] | ||
| stats["available_items_during_request"] = ( | ||
| available_items_during_request_count > 0 | ||
| ) | ||
| elif len(hits) > 1: | ||
| raise ValueError( | ||
| f"Multiple request transition events for loan {loan_pid}." | ||
| "Expected zero or one." | ||
| ) | ||
| else: | ||
| current_app.logger.error( | ||
| "Stats events index '{stat_events_index_name}' does not exist. " | ||
| "This is normal during initial setup or if no events have been processed yet. " | ||
| "No data is lost, as soon as the events are processed, " \ | ||
| "the loan wil lbe reindex and the the stat will be available." | ||
| ) | ||
|
|
||
| if not "extra_data" in loan_dict: | ||
| loan_dict["extra_data"] = {} | ||
| loan_dict["extra_data"]["stats"] = stats | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,110 @@ | ||
| # -*- coding: utf-8 -*- | ||
| # | ||
| # Copyright (C) 2025 CERN. | ||
| # | ||
| # invenio-app-ils is free software; you can redistribute it and/or modify it | ||
| # under the terms of the MIT License; see LICENSE file for more details. | ||
|
|
||
| """Marshmallow schemas for loan statistics validation.""" | ||
|
|
||
| import json | ||
| import re | ||
|
|
||
| from marshmallow import ( | ||
| Schema, | ||
| ValidationError, | ||
| fields, | ||
| pre_load, | ||
| validate, | ||
| validates_schema, | ||
| ) | ||
|
|
||
| from invenio_app_ils.errors import InvalidParameterError | ||
|
|
||
| _OS_VALID_FIELD_NAME_PATTERN = re.compile(r"^[A-Za-z0-9_.]+$") | ||
| _OS_NATIVE_AGGREGATE_FUNCTION_TYPES = {"avg", "sum", "min", "max"} | ||
| _VALID_AGGREGATE_FUNCTION_TYPES = _OS_NATIVE_AGGREGATE_FUNCTION_TYPES.union({"median"}) | ||
| _VALID_DATE_INTERVALS = {"1d", "1w", "1M", "1q", "1y"} | ||
|
|
||
|
|
||
| def validate_field_name(field_name): | ||
| """Validate a field name for search to prevent injection attacks. | ||
|
|
||
| :param field_name: The field name to validate | ||
| :raises InvalidParameterError: If field name is invalid or potentially malicious | ||
| """ | ||
| if not _OS_VALID_FIELD_NAME_PATTERN.match(field_name): | ||
| raise InvalidParameterError( | ||
| description=( | ||
| f"Invalid field name '{field_name}'. " | ||
| "Field names may contain only alphanumeric characters, underscores, " | ||
| "and dots." | ||
| ) | ||
| ) | ||
|
|
||
|
|
||
| class SecureFieldNameField(fields.String): | ||
| """Marshmallow field that validates field names to prevent injection attacks.""" | ||
|
|
||
| def _deserialize(self, value, attr, data, **kwargs): | ||
| """Deserialize and validate field name.""" | ||
|
|
||
| field_name = super()._deserialize(value, attr, data, **kwargs) | ||
| validate_field_name(field_name) | ||
| return field_name | ||
|
|
||
|
|
||
| class GroupByItemSchema(Schema): | ||
| field = SecureFieldNameField(required=True) | ||
| interval = fields.String(validate=validate.OneOf(_VALID_DATE_INTERVALS)) | ||
|
|
||
| @validates_schema | ||
| def validate_date_fields(self, data, **kwargs): | ||
| """Validate that date fields have an interval and non-date fields do not.""" | ||
|
|
||
| date_fields = self.context["date_fields"] | ||
| field = data.get("field") | ||
| interval = data.get("interval") | ||
| if field in date_fields and not interval: | ||
| raise ValidationError( | ||
| {"interval": ["Interval is required for date fields."]} | ||
| ) | ||
| if field not in date_fields and interval is not None: | ||
| raise ValidationError( | ||
| {"interval": ["Interval must not be provided for non-date fields."]} | ||
| ) | ||
|
|
||
|
|
||
| class MetricItemSchema(Schema): | ||
| """Schema for validating a single metric item.""" | ||
|
|
||
| field = SecureFieldNameField(required=True) | ||
| aggregation = fields.String( | ||
| required=True, validate=validate.OneOf(_VALID_AGGREGATE_FUNCTION_TYPES) | ||
| ) | ||
|
|
||
|
|
||
| class HistogramParamsSchema(Schema): | ||
| """Schema for validating the query string parameters for the histogram endpoint""" | ||
|
|
||
| metrics = fields.List(fields.Nested(MetricItemSchema), required=False) | ||
| group_by = fields.List( | ||
| fields.Nested(GroupByItemSchema), required=True, validate=validate.Length(min=1) | ||
| ) | ||
| q = fields.String() | ||
|
|
||
| def __init__(self, date_fields, *args, **kwargs): | ||
| super().__init__(*args, **kwargs) | ||
| self.context = {"date_fields": set(date_fields)} | ||
|
|
||
| @pre_load | ||
| def parse_query_string(self, data, **kwargs): | ||
| """Parse the metrics and group_by parameters from JSON strings.""" | ||
|
|
||
| try: | ||
| for key in ("metrics", "group_by"): | ||
| # default value as the field "metrics" is not required | ||
| data[key] = json.loads(data.get(key, "[]")) | ||
| except Exception as e: | ||
| raise ValidationError from e | ||
| return data |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| # -*- coding: utf-8 -*- | ||
| # | ||
| # This file is part of Invenio. | ||
| # Copyright (C) 2025-2025 CERN. | ||
| # | ||
| # Invenio is free software; you can redistribute it and/or modify it | ||
| # under the terms of the MIT License; see LICENSE file for more details. | ||
|
|
||
|
|
||
| from invenio_app_ils.circulation.stats.serializers.response import loan_stats_responsify | ||
| from invenio_app_ils.circulation.stats.serializers.schema import HistogramStatsV1 | ||
|
|
||
| loan_stats_response = loan_stats_responsify(HistogramStatsV1, "application/json") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| # -*- coding: utf-8 -*- | ||
| # | ||
| # This file is part of Invenio. | ||
| # Copyright (C) 2025-2025 CERN. | ||
| # | ||
| # Invenio is free software; you can redistribute it and/or modify it | ||
| # under the terms of the MIT License; see LICENSE file for more details. | ||
|
|
||
| """Invenio App ILS loan stats response serializers.""" | ||
|
|
||
| import json | ||
|
|
||
| from flask import current_app | ||
|
|
||
|
|
||
| def loan_stats_responsify(schema_class, mimetype): | ||
| """Loan stats response serializer. | ||
|
|
||
| :param schema_class: Schema instance. | ||
| :param mimetype: MIME type of response. | ||
| """ | ||
|
|
||
| def view(data, code=200, headers=None): | ||
| """Generate the response object.""" | ||
| # return jsonify(data), code | ||
| response_data = schema_class().dump(data) | ||
|
|
||
| response = current_app.response_class( | ||
| json.dumps(response_data), mimetype=mimetype | ||
| ) | ||
| response.status_code = code | ||
|
|
||
| if headers is not None: | ||
| response.headers.extend(headers) | ||
| return response | ||
|
|
||
| return view |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| # -*- coding: utf-8 -*- | ||
| # | ||
| # This file is part of Invenio. | ||
| # Copyright (C) 2025-2025 CERN. | ||
| # | ||
| # Invenio is free software; you can redistribute it and/or modify it | ||
| # under the terms of the MIT License; see LICENSE file for more details. | ||
|
|
||
| """Invenio App ILS loan stats serializers schema.""" | ||
|
|
||
| from marshmallow import Schema, fields | ||
|
|
||
|
|
||
| class BucketSchema(Schema): | ||
| """Schema for a single histogram bucket.""" | ||
|
|
||
| doc_count = fields.Int(required=True) | ||
| key = fields.Dict(keys=fields.String(), values=fields.String()) | ||
|
|
||
| metrics = fields.Dict( | ||
| keys=fields.String(), | ||
| values=fields.Float(), | ||
| ) | ||
|
|
||
|
|
||
| class HistogramStatsV1(Schema): | ||
| """Schema for a stats histogram response.""" | ||
|
|
||
| buckets = fields.List( | ||
| fields.Nested(BucketSchema), | ||
| required=True, | ||
| description="Statistics buckets.", | ||
| ) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Isn't it better to return
0thanNone?If we want to distinguish self-checkout from normal loans, this won't be enough (since a normal loan can be retrieved on the same day).
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just wanted to make sure there are no negative values, which would skew the statistics. Like when doing a sum over the waiting time and there is one loan where the
start_datewas manually set before thecreation_dateI don't think it should influence the waiting time statistic. But it might be good to have a discussion about how to handle such cases?Regarding distinguishing
self-checkoutandnormalloans: one can filter with theqparameter for the delivery option.