2727import operator
2828from collections import defaultdict
2929from dataclasses import dataclass
30+ from datetime import timedelta
3031from typing import TYPE_CHECKING , Any , ClassVar , final
3132
3233from django import http
@@ -621,10 +622,15 @@ def make_page_timing_stats_list(
621622 PPerm .included_in_grade_statistics )
622623 )
623624
624- # Annotate each submitted-answer visit with its elapsed duration and push
625- # Count/Avg/Min/Max aggregation to the database. StdDev is not universally
625+ # Annotate each submitted-answer visit with its elapsed duration.
626+ # Negative durations (clock-skew or data anomalies where the answer visit
627+ # was recorded before the page view) are excluded at the database level so
628+ # that the aggregated values and the per-row durations fetched for stddev
629+ # are computed from the same filtered set.
630+ # Count/Avg/Min/Max are pushed to the database. StdDev is not universally
626631 # supported across database backends (notably absent from SQLite), so sample
627- # standard deviation is computed in Python from the per-page duration lists.
632+ # standard deviation is computed in Python from a second fetch of the
633+ # per-page duration values. This is a known two-query approach for stddev.
628634 annotated_qs = (visits_qs
629635 .annotate (preceding_visit_time = preceding_time_sq )
630636 .filter (preceding_visit_time__isnull = False )
@@ -634,6 +640,7 @@ def make_page_timing_stats_list(
634640 output_field = DurationField (),
635641 )
636642 )
643+ .filter (duration__gte = timedelta (0 ))
637644 )
638645
639646 page_agg = (annotated_qs
@@ -650,12 +657,7 @@ def make_page_timing_stats_list(
650657 page_durations : dict [tuple [str , str ], list [float ]] = defaultdict (list )
651658 for row in (annotated_qs
652659 .values_list ("page_data__group_id" , "page_data__page_id" , "duration" )):
653- key = (row [0 ], row [1 ])
654- minutes = row [2 ].total_seconds () / 60
655- # Skip negative durations; these can arise from clock-skew or data
656- # anomalies where the answer visit was recorded before the page view.
657- if minutes >= 0 :
658- page_durations [key ].append (minutes )
660+ page_durations [row [0 ], row [1 ]].append (row [2 ].total_seconds () / 60 )
659661
660662 # Build result list.
661663 result : list [PageTimingStats ] = []
@@ -677,12 +679,13 @@ def make_page_timing_stats_list(
677679 )
678680
679681 # Sample stddev (requires at least two observations).
682+ # avg_time and page_durations are derived from the same filtered set, so
683+ # they are consistent.
680684 stddev_time : float | None = None
681- if count >= 2 and avg_time is not None :
682- times = page_durations .get ((group_id , page_id ), [])
683- if len (times ) >= 2 :
684- variance = sum ((t - avg_time ) ** 2 for t in times ) / (len (times ) - 1 )
685- stddev_time = variance ** 0.5
685+ times = page_durations .get ((group_id , page_id ), [])
686+ if len (times ) >= 2 and avg_time is not None :
687+ variance = sum ((t - avg_time ) ** 2 for t in times ) / (len (times ) - 1 )
688+ stddev_time = variance ** 0.5
686689
687690 result .append (PageTimingStats (
688691 group_id = group_id ,
0 commit comments