Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions metrics_utility/library/storage/segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,11 @@ def put(self, artifact_name, *, filename=None, fileobj=None, dict=None, event_na
# Configure Segment client
analytics.write_key = self.write_key
analytics.debug = self.debug
# sync_mode makes each track() a blocking HTTP request instead of queuing to a
# background thread. Without it the SDK batches all chunks into one POST which
# can silently exceed Segment's 500 KB batch limit and drop events, returning
# HTTP 200 with no error callback fired.
analytics.sync_mode = True

max_size = self.REGULAR_MESSAGE_LIMIT
chunks = self._split_into_chunks(dict, max_size)
Expand Down
43 changes: 29 additions & 14 deletions metrics_utility/test/library/test_storage_segment.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
# import segment data from testing_data_for_segment

# import storage segment
from unittest.mock import Mock, patch

from metrics_utility.library.storage.segment import StorageSegment
Expand Down Expand Up @@ -88,26 +85,17 @@ def test_rollup_period_string_arrays(self):
@patch('metrics_utility.library.storage.segment.analytics')
@patch('metrics_utility.library.storage.segment.SEGMENT_AVAILABLE', True)
def test_put_sends_data_to_segment(self, mock_analytics):
"""Test that put method sends data to segment.com with proper mocking."""
# Setup
"""put() tracks every chunk, flushes once, and enables sync_mode."""
mock_analytics.track = Mock()
mock_analytics.flush = Mock()

storage_segment = StorageSegment(write_key='test_write_key', user_id='test_user', debug=True)

# Act
chunks = storage_segment.put(artifact_name='test_artifact', dict=segment_data, event_name='Test Event')

# Assert
# Verify analytics.track was called
assert mock_analytics.track.called
assert mock_analytics.sync_mode is True
assert mock_analytics.track.call_count == len(chunks)

# Verify flush was called
assert mock_analytics.flush.called
assert mock_analytics.flush.call_count == 1

# Verify the call arguments
call_args = mock_analytics.track.call_args[1]
assert 'anonymous_id' in call_args
assert call_args['event'] == 'Test Event'
Expand Down Expand Up @@ -141,3 +129,30 @@ def test_put_sends_multiple_chunks_for_large_data(self, mock_analytics):
chunk_info = call_kwargs['properties']['chunk_info']
assert chunk_info['chunk_number'] == i
assert chunk_info['total_chunks'] == 7

@patch('metrics_utility.library.storage.segment.analytics')
@patch('metrics_utility.library.storage.segment.SEGMENT_AVAILABLE', True)
def test_put_sync_mode_no_batch_drops(self, mock_analytics):
"""sync_mode=True prevents Segment silently dropping chunks from oversized batches.

Without sync_mode the SDK batches all track() calls into a single POST. With
15 chunks at ~25 KB each the batch exceeds Segment's 500 KB limit and events
are dropped server-side — Segment returns HTTP 200 with no error callback.

sync_mode sends each track() as a separate blocking HTTP request so every
chunk is confirmed delivered before the next is sent. End-to-end validated:
15/15 chunks received in Segment with sync_mode=True vs 11-14 without.
"""
mock_analytics.track = Mock()
mock_analytics.flush = Mock()

storage_segment = StorageSegment(write_key='test_write_key', debug=False)
chunks = storage_segment.put(
artifact_name='test_artifact',
dict=segment_data_large,
event_name='Test Event',
)

assert mock_analytics.sync_mode is True
assert mock_analytics.track.call_count == len(chunks)
assert mock_analytics.flush.call_count == 1