11"""Data endpoints optimized for reports in the Reporter blueprint."""
22
33from datetime import datetime
4- from datetime import timedelta
54from operator import itemgetter
65
76from dateutil .rrule import DAILY
87from dateutil .rrule import rrule
8+ from sqlalchemy import case
99
1010from AIPscan import db
1111from AIPscan .Data import fields
@@ -702,8 +702,12 @@ def storage_locations_usage_over_time(
702702 :param storage_service_id: Storage Service ID (int)
703703 :param start_date: Inclusive AIP creation start date
704704 (datetime.datetime object)
705- :param end_date: Inclusive AIP creation end date
706- (datetime.datetime object)
705+ :param end_date: Exclusive upper bound for AIP creation timestamps
706+ (datetime.datetime). Callers that work with inclusive calendar dates
707+ should use ``parse_datetime_bound(..., upper=True)``.
708+ For example, the range “2024-01-01 to 2024-01-31” becomes:
709+ - ``start_date = 2024-01-01 00:00:00``, and
710+ - ``end_date = 2024-02-01 00:00:00``
707711 :param cumulative: Flag indicating whether to calculate cumulatively, where
708712 each month adds to previous totals (bool)
709713
@@ -714,35 +718,91 @@ def storage_locations_usage_over_time(
714718 report = {}
715719 report [fields .FIELD_STORAGE_NAME ] = get_storage_service_name (storage_service_id )
716720
717- locations = _get_storage_locations (storage_service_id )
721+ locations = sorted (
722+ _get_storage_locations (storage_service_id ), key = lambda loc : loc .id
723+ )
724+ location_ids = [loc .id for loc in locations ]
718725 days = _get_days_covered_by_date_range (storage_service_id , start_date , end_date )
719726
720- results = {}
727+ # Build a single aggregated query that returns daily metrics per location.
728+ # MySQL: DATE(AIP.create_date) groups by calendar day.
729+ day_col = db .func .date (AIP .create_date ).label ("day" )
721730
722- for day in days :
723- daily_locations_data = []
731+ # CASE expression to count only original files.
732+ orig_file_case = case ((File .file_type == FileType .original , 1 ), else_ = 0 )
733+
734+ aggregated = (
735+ db .session .query (
736+ day_col ,
737+ AIP .storage_location_id .label ("storage_location_id" ),
738+ db .func .count (db .func .distinct (AIP .id )).label ("aips" ),
739+ db .func .coalesce (db .func .sum (File .size ), 0 ).label ("size_sum" ),
740+ db .func .coalesce (db .func .sum (orig_file_case ), 0 ).label ("orig_files" ),
741+ )
742+ .outerjoin (File , File .aip_id == AIP .id )
743+ .filter (AIP .storage_service_id == storage_service_id )
744+ .filter (AIP .create_date >= start_date )
745+ .filter (AIP .create_date < end_date )
746+ .group_by (day_col , AIP .storage_location_id )
747+ .all ()
748+ )
724749
725- for location in locations :
726- loc_info = {}
750+ # Map aggregated rows for quick lookup.
751+ daily_loc_metrics = {} # {day_str: {loc_id: {aips,size,files}}}
752+ for row in aggregated :
753+ # row.day can be date/datetime; normalize to YYYY-MM-DD string
754+ day_str = str (row .day )
755+ loc_id = row .storage_location_id
756+ day_bucket = daily_loc_metrics .setdefault (day_str , {})
757+ day_bucket [loc_id ] = {
758+ fields .FIELD_AIPS : int (row .aips or 0 ),
759+ fields .FIELD_SIZE : int (row .size_sum or 0 ),
760+ fields .FIELD_FILE_COUNT : int (row .orig_files or 0 ),
761+ }
762+
763+ # Prepare cumulative running totals per location if needed
764+ running_totals = {
765+ loc_id : {fields .FIELD_AIPS : 0 , fields .FIELD_SIZE : 0 , fields .FIELD_FILE_COUNT : 0 }
766+ for loc_id in location_ids
767+ }
727768
728- subquery_start_date = datetime .strptime (day , "%Y-%m-%d" )
729- subquery_end_date = subquery_start_date + timedelta (days = 1 )
730- if cumulative :
731- subquery_start_date = datetime .strptime (days [0 ], "%Y-%m-%d" )
769+ results = {}
770+ for day in days :
771+ daily_locations_data = []
732772
733- loc_info [fields .FIELD_ID ] = location .id
734- loc_info [fields .FIELD_UUID ] = location .uuid
735- loc_info [fields .FIELD_STORAGE_LOCATION ] = location .description
736- loc_info [fields .FIELD_AIPS ] = location .aip_count (
737- subquery_start_date , subquery_end_date
738- )
739- loc_info [fields .FIELD_SIZE ] = location .aip_total_size (
740- subquery_start_date , subquery_end_date
741- )
742- loc_info [fields .FIELD_FILE_COUNT ] = location .file_count (
743- subquery_start_date , subquery_end_date
773+ # Use aggregated metrics for the given day; default to zeros.
774+ day_metrics = daily_loc_metrics .get (day , {})
775+ for loc in locations :
776+ base = day_metrics .get (
777+ loc .id ,
778+ {
779+ fields .FIELD_AIPS : 0 ,
780+ fields .FIELD_SIZE : 0 ,
781+ fields .FIELD_FILE_COUNT : 0 ,
782+ },
744783 )
745784
785+ if cumulative :
786+ run = running_totals [loc .id ]
787+ run [fields .FIELD_AIPS ] += base [fields .FIELD_AIPS ]
788+ run [fields .FIELD_SIZE ] += base [fields .FIELD_SIZE ]
789+ run [fields .FIELD_FILE_COUNT ] += base [fields .FIELD_FILE_COUNT ]
790+ aips_val = run [fields .FIELD_AIPS ]
791+ size_val = run [fields .FIELD_SIZE ]
792+ files_val = run [fields .FIELD_FILE_COUNT ]
793+ else :
794+ aips_val = base [fields .FIELD_AIPS ]
795+ size_val = base [fields .FIELD_SIZE ]
796+ files_val = base [fields .FIELD_FILE_COUNT ]
797+
798+ loc_info = {
799+ fields .FIELD_ID : loc .id ,
800+ fields .FIELD_UUID : loc .uuid ,
801+ fields .FIELD_STORAGE_LOCATION : loc .description ,
802+ fields .FIELD_AIPS : aips_val ,
803+ fields .FIELD_SIZE : size_val ,
804+ fields .FIELD_FILE_COUNT : files_val ,
805+ }
746806 daily_locations_data .append (loc_info )
747807
748808 results [day ] = daily_locations_data
0 commit comments