Skip to content

Commit ee96771

Browse files
cshiels-ieclaude
andcommitted
[AAP-73135] Revert to sync_mode after batch-limit approach proven insufficient
Testing showed the batch-limit heuristic still dropped events: - 17 chunks in one batch (~425 KB tracked): only 12 arrived - 26 chunks split into two batches: only 17 arrived Root cause: the SDK adds ~2-3 KB of per-event metadata (context, timestamps, messageId, integrations) that our data-size estimate did not account for, pushing actual batch bodies over Segment's 500 KB limit despite our 450 KB threshold. sync_mode=True sends each track() as a separate blocking HTTP request (~25 KB each) instead of batching, which eliminates the batch-size problem entirely. Local end-to-end testing confirmed all 15 chunks arrive reliably with sync_mode=True. Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
1 parent fdb8e12 commit ee96771

2 files changed

Lines changed: 13 additions & 47 deletions

File tree

metrics_utility/library/storage/segment.py

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,6 @@ class StorageSegment:
2121
# including `properties` wrapper, event name, and segment_meta; keep this conservative.
2222
REGULAR_MESSAGE_LIMIT = 24 * 1024
2323

24-
# Segment enforces a 500KB limit per batch POST. We flush before reaching it,
25-
# leaving headroom for per-event metadata the SDK adds (anonymousId, timestamp,
26-
# context, etc.). EVENT_OVERHEAD is a conservative per-event estimate.
27-
BATCH_SIZE_LIMIT = 450 * 1024
28-
EVENT_OVERHEAD = 512
29-
3024
def __init__(self, **settings):
3125
self.debug = settings.get('debug', False)
3226
self.user_id = settings.get('user_id', 'unknown')
@@ -136,6 +130,11 @@ def put(self, artifact_name, *, filename=None, fileobj=None, dict=None, event_na
136130
# Configure Segment client
137131
analytics.write_key = self.write_key
138132
analytics.debug = self.debug
133+
# sync_mode makes each track() a blocking HTTP request instead of queuing to a
134+
# background thread. Without it the SDK batches all chunks into one POST which
135+
# can silently exceed Segment's 500 KB batch limit and drop events, returning
136+
# HTTP 200 with no error callback fired.
137+
analytics.sync_mode = True
139138

140139
max_size = self.REGULAR_MESSAGE_LIMIT
141140
chunks = self._split_into_chunks(dict, max_size)
@@ -150,17 +149,9 @@ def put(self, artifact_name, *, filename=None, fileobj=None, dict=None, event_na
150149
segment_meta = {}
151150
message_id = segment_meta.get('message_id', None)
152151

153-
# Send each chunk, flushing before the batch would exceed Segment's 500KB limit
154-
batch_bytes = 0
152+
# Send each chunk
155153
for i, chunk in enumerate(chunks, 1):
156154
chunk_size = self._calculate_size(chunk)
157-
event_size = chunk_size + self.EVENT_OVERHEAD
158-
159-
if batch_bytes + event_size > self.BATCH_SIZE_LIMIT and batch_bytes > 0:
160-
if self.debug:
161-
print(f'Flushing batch at {batch_bytes} bytes before adding chunk {i}', file=sys.stderr)
162-
analytics.flush()
163-
batch_bytes = 0
164155

165156
# chunk hash = sha256(message hash + chunk index)
166157
if message_id:
@@ -187,9 +178,8 @@ def put(self, artifact_name, *, filename=None, fileobj=None, dict=None, event_na
187178
},
188179
**segment_meta,
189180
)
190-
batch_bytes += event_size
191181

192-
# Flush remaining queued events
182+
# Flush to ensure all events are sent
193183
analytics.flush()
194184

195185
return chunks

metrics_utility/test/library/test_storage_segment.py

Lines changed: 6 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,6 @@ def test_put_sends_multiple_chunks_for_large_data(self, mock_analytics):
133133
# Should split into 7 chunks as tested earlier
134134
assert len(chunks) == 7
135135
assert mock_analytics.track.call_count == 7
136-
# 7 chunks well under 450 KB batch limit — only the final flush should fire
137136
assert mock_analytics.flush.call_count == 1
138137

139138
# Verify chunk numbering in the calls
@@ -145,43 +144,20 @@ def test_put_sends_multiple_chunks_for_large_data(self, mock_analytics):
145144

146145
@patch('metrics_utility.library.storage.segment.analytics')
147146
@patch('metrics_utility.library.storage.segment.SEGMENT_AVAILABLE', True)
148-
def test_put_single_flush_when_chunks_fit_in_one_batch(self, mock_analytics):
149-
"""All chunks are sent in one batch when total size is below BATCH_SIZE_LIMIT."""
150-
mock_analytics.track = Mock()
151-
mock_analytics.flush = Mock()
152-
153-
storage_segment = StorageSegment(write_key='test_write_key', debug=False)
154-
chunks = storage_segment.put(
155-
artifact_name='test_artifact',
156-
dict=segment_data_large,
157-
event_name='Test Event',
158-
)
147+
def test_put_sync_mode_enabled(self, mock_analytics):
148+
"""sync_mode is set to True so each track() is a blocking HTTP request.
159149
160-
assert mock_analytics.track.call_count == len(chunks)
161-
assert mock_analytics.flush.call_count == 1
162-
163-
@patch('metrics_utility.library.storage.segment.analytics')
164-
@patch('metrics_utility.library.storage.segment.SEGMENT_AVAILABLE', True)
165-
def test_put_flushes_mid_loop_when_batch_limit_reached(self, mock_analytics):
166-
"""flush() is called before queuing a chunk that would push the batch over BATCH_SIZE_LIMIT.
167-
168-
Segment silently drops events from batch POSTs that exceed 500 KB and returns
169-
HTTP 200, so we flush proactively before reaching that threshold.
150+
Without sync_mode the SDK batches all chunks into one POST which can silently
151+
exceed Segment's 500 KB batch limit and drop events with no error raised.
170152
"""
171153
mock_analytics.track = Mock()
172154
mock_analytics.flush = Mock()
173155

174156
storage_segment = StorageSegment(write_key='test_write_key', debug=False)
175-
# Lower the limit so each large (~24 KB) chunk forces its own batch
176-
storage_segment.BATCH_SIZE_LIMIT = storage_segment.REGULAR_MESSAGE_LIMIT + storage_segment.EVENT_OVERHEAD
177-
178-
chunks = storage_segment.put(
157+
storage_segment.put(
179158
artifact_name='test_artifact',
180159
dict=segment_data_large,
181160
event_name='Test Event',
182161
)
183162

184-
# Every chunk must still be tracked — none dropped
185-
assert mock_analytics.track.call_count == len(chunks)
186-
# Multiple flushes confirm that mid-loop splitting occurred
187-
assert mock_analytics.flush.call_count > 1
163+
assert mock_analytics.sync_mode is True

0 commit comments

Comments
 (0)