Skip to content

Commit 66306ee

Browse files
authored
Merge pull request #27 from ClipABit/unit-integration-tests
Unit test - Pinecone Connector
2 parents ec852d4 + 596b706 commit 66306ee

File tree

2 files changed

+292
-0
lines changed

2 files changed

+292
-0
lines changed

backend/tests/conftest.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from preprocessing.compressor import Compressor
2424
from preprocessing.preprocessor import Preprocessor
2525
from models.metadata import VideoChunk
26+
from database.pinecone_connector import PineconeConnector
2627

2728

2829
# ==============================================================================
@@ -126,6 +127,12 @@ def sample_video_chunk() -> VideoChunk:
126127
)
127128

128129

130+
@pytest.fixture
131+
def sample_embedding() -> np.ndarray:
132+
"""Sample embedding vector for testing (512-dimensional, typical CLIP embedding size)."""
133+
return np.random.rand(512).astype(np.float32)
134+
135+
129136
# ==============================================================================
130137
# COMPONENT FIXTURES
131138
# ==============================================================================
@@ -194,6 +201,21 @@ def mock_modal_dict(mocker):
194201
return fake_dict
195202

196203

204+
@pytest.fixture
205+
def mock_pinecone_connector(mocker):
206+
"""Mock PineconeConnector with all necessary mocks set up"""
207+
208+
mock_pinecone = mocker.patch('database.pinecone_connector.Pinecone')
209+
mock_client = mocker.MagicMock()
210+
mock_index = mocker.MagicMock()
211+
mock_pinecone.return_value = mock_client
212+
mock_client.Index.return_value = mock_index
213+
214+
connector = PineconeConnector(api_key="test-key", index_name="test-index")
215+
216+
return connector, mock_index, mock_client, mock_pinecone
217+
218+
197219
# ==============================================================================
198220
# PYTEST CONFIG
199221
# ==============================================================================
Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
import numpy as np
2+
from database.pinecone_connector import PineconeConnector
3+
4+
5+
class TestPineconeConnectorInitialization:
6+
"""Test connector initialization."""
7+
8+
def test_initializes_with_api_key_and_index_name(self, mocker):
9+
"""Verify connector initializes with required parameters."""
10+
mock_pinecone = mocker.patch('database.pinecone_connector.Pinecone')
11+
mock_client = mocker.MagicMock()
12+
mock_pinecone.return_value = mock_client
13+
14+
connector = PineconeConnector(api_key="test-api-key", index_name="test-index")
15+
16+
assert connector.index_name == "test-index"
17+
mock_pinecone.assert_called_once_with(api_key="test-api-key")
18+
assert connector.client == mock_client
19+
20+
21+
class TestUpsertChunk:
22+
"""Test chunk upsert operations."""
23+
24+
def test_upsert_chunk_success(self, mock_pinecone_connector, sample_embedding):
25+
"""Verify successful chunk upsert."""
26+
connector, mock_index, mock_client, _ = mock_pinecone_connector
27+
28+
result = connector.upsert_chunk(
29+
chunk_id="chunk-123",
30+
chunk_embedding=sample_embedding,
31+
namespace="test-namespace",
32+
metadata={"video_id": "video-1", "start_time": 0.0}
33+
)
34+
35+
assert result is True
36+
mock_client.Index.assert_called_once_with("test-index")
37+
mock_index.upsert.assert_called_once()
38+
call_args = mock_index.upsert.call_args
39+
assert call_args[1]['namespace'] == "test-namespace"
40+
vectors = call_args[1]['vectors']
41+
assert len(vectors) == 1
42+
assert vectors[0][0] == "chunk-123"
43+
assert vectors[0][2]["video_id"] == "video-1"
44+
45+
def test_upsert_chunk_with_default_namespace(self, mock_pinecone_connector, sample_embedding):
46+
"""Verify upsert uses default namespace when not specified."""
47+
connector, mock_index, _, _ = mock_pinecone_connector
48+
49+
result = connector.upsert_chunk(
50+
chunk_id="chunk-123",
51+
chunk_embedding=sample_embedding
52+
)
53+
54+
assert result is True
55+
call_args = mock_index.upsert.call_args
56+
assert call_args[1]['namespace'] == "__default__"
57+
58+
def test_upsert_chunk_without_metadata(self, mock_pinecone_connector, sample_embedding):
59+
"""Verify upsert works with no metadata provided."""
60+
connector, mock_index, _, _ = mock_pinecone_connector
61+
62+
result = connector.upsert_chunk(
63+
chunk_id="chunk-123",
64+
chunk_embedding=sample_embedding
65+
)
66+
67+
assert result is True
68+
call_args = mock_index.upsert.call_args
69+
vectors = call_args[1]['vectors']
70+
assert vectors[0][2] == {} # Empty metadata dict
71+
72+
def test_upsert_chunk_converts_numpy_to_list(self, mock_pinecone_connector, sample_embedding):
73+
"""Verify numpy array is converted to list before upsert."""
74+
connector, mock_index, _, _ = mock_pinecone_connector
75+
76+
connector.upsert_chunk(
77+
chunk_id="chunk-123",
78+
chunk_embedding=sample_embedding
79+
)
80+
81+
call_args = mock_index.upsert.call_args
82+
vectors = call_args[1]['vectors']
83+
# Verify embedding was converted to list (not numpy array)
84+
assert isinstance(vectors[0][1], list)
85+
assert not isinstance(vectors[0][1], np.ndarray)
86+
87+
def test_upsert_chunk_handles_exception(self, mock_pinecone_connector, sample_embedding):
88+
"""Verify upsert returns False on exception."""
89+
connector, mock_index, _, _ = mock_pinecone_connector
90+
mock_index.upsert.side_effect = Exception("Pinecone error")
91+
92+
result = connector.upsert_chunk(
93+
chunk_id="chunk-123",
94+
chunk_embedding=sample_embedding
95+
)
96+
97+
assert result is False
98+
99+
def test_upsert_multiple_chunks(self, mock_pinecone_connector, sample_embedding):
100+
"""Verify multiple chunks can be upserted."""
101+
connector, mock_index, _, _ = mock_pinecone_connector
102+
103+
result1 = connector.upsert_chunk("chunk-1", sample_embedding, metadata={"id": 1})
104+
result2 = connector.upsert_chunk("chunk-2", sample_embedding, metadata={"id": 2})
105+
result3 = connector.upsert_chunk("chunk-3", sample_embedding, metadata={"id": 3})
106+
107+
assert result1 is True
108+
assert result2 is True
109+
assert result3 is True
110+
assert mock_index.upsert.call_count == 3
111+
112+
113+
class TestQueryChunks:
114+
"""Test chunk query operations."""
115+
116+
def test_query_chunks_success(self, mock_pinecone_connector, sample_embedding):
117+
"""Verify successful chunk query."""
118+
connector, mock_index, _, _ = mock_pinecone_connector
119+
120+
# Mock query response
121+
mock_response = {
122+
'matches': [
123+
{'id': 'chunk-1', 'score': 0.95, 'metadata': {'video_id': 'video-1'}},
124+
{'id': 'chunk-2', 'score': 0.87, 'metadata': {'video_id': 'video-1'}},
125+
{'id': 'chunk-3', 'score': 0.82, 'metadata': {'video_id': 'video-2'}}
126+
]
127+
}
128+
mock_index.query.return_value = mock_response
129+
130+
results = connector.query_chunks(
131+
query_embedding=sample_embedding,
132+
namespace="test-namespace",
133+
top_k=3
134+
)
135+
136+
assert len(results) == 3
137+
assert results[0]['id'] == 'chunk-1'
138+
assert results[0]['score'] == 0.95
139+
mock_index.query.assert_called_once()
140+
call_args = mock_index.query.call_args
141+
assert call_args[1]['namespace'] == "test-namespace"
142+
assert call_args[1]['top_k'] == 3
143+
assert call_args[1]['include_metadata'] is True
144+
145+
def test_query_chunks_with_default_namespace(self, mock_pinecone_connector, sample_embedding):
146+
"""Verify query uses default namespace when not specified."""
147+
connector, mock_index, _, _ = mock_pinecone_connector
148+
mock_index.query.return_value = {'matches': []}
149+
150+
connector.query_chunks(query_embedding=sample_embedding)
151+
152+
call_args = mock_index.query.call_args
153+
assert call_args[1]['namespace'] == "__default__"
154+
155+
def test_query_chunks_with_default_top_k(self, mock_pinecone_connector, sample_embedding):
156+
"""Verify query uses default top_k when not specified."""
157+
connector, mock_index, _, _ = mock_pinecone_connector
158+
mock_index.query.return_value = {'matches': []}
159+
160+
connector.query_chunks(query_embedding=sample_embedding)
161+
162+
call_args = mock_index.query.call_args
163+
assert call_args[1]['top_k'] == 5
164+
165+
def test_query_chunks_converts_numpy_to_list(self, mock_pinecone_connector, sample_embedding):
166+
"""Verify numpy array is converted to list before query."""
167+
connector, mock_index, _, _ = mock_pinecone_connector
168+
mock_index.query.return_value = {'matches': []}
169+
170+
connector.query_chunks(query_embedding=sample_embedding)
171+
172+
call_args = mock_index.query.call_args
173+
# Verify embedding was converted to list (not numpy array)
174+
assert isinstance(call_args[1]['vector'], list)
175+
assert not isinstance(call_args[1]['vector'], np.ndarray)
176+
177+
def test_query_chunks_handles_exception(self, mock_pinecone_connector, sample_embedding):
178+
"""Verify query returns empty list on exception."""
179+
connector, mock_index, _, _ = mock_pinecone_connector
180+
mock_index.query.side_effect = Exception("Pinecone error")
181+
182+
results = connector.query_chunks(query_embedding=sample_embedding)
183+
184+
assert results == []
185+
186+
def test_query_chunks_with_custom_top_k(self, mock_pinecone_connector, sample_embedding):
187+
"""Verify query respects custom top_k parameter."""
188+
connector, mock_index, _, _ = mock_pinecone_connector
189+
190+
mock_response = {
191+
'matches': [
192+
{'id': f'chunk-{i}', 'score': 0.9 - i*0.1, 'metadata': {}}
193+
for i in range(10)
194+
]
195+
}
196+
mock_index.query.return_value = mock_response
197+
198+
results = connector.query_chunks(
199+
query_embedding=sample_embedding,
200+
top_k=10
201+
)
202+
203+
assert len(results) == 10
204+
call_args = mock_index.query.call_args
205+
assert call_args[1]['top_k'] == 10
206+
207+
def test_query_chunks_returns_empty_list_when_no_matches(self, mock_pinecone_connector, sample_embedding):
208+
"""Verify query returns empty list when no matches found."""
209+
connector, mock_index, _, _ = mock_pinecone_connector
210+
mock_index.query.return_value = {'matches': []}
211+
212+
results = connector.query_chunks(query_embedding=sample_embedding)
213+
214+
assert results == []
215+
216+
217+
class TestEdgeCases:
218+
"""Test edge cases and error handling."""
219+
220+
def test_upsert_with_empty_embedding(self, mock_pinecone_connector):
221+
"""Verify upsert handles empty embedding array."""
222+
connector, mock_index, _, _ = mock_pinecone_connector
223+
224+
empty_embedding = np.array([])
225+
result = connector.upsert_chunk("chunk-123", empty_embedding)
226+
227+
assert result is True
228+
call_args = mock_index.upsert.call_args
229+
vectors = call_args[1]['vectors']
230+
assert vectors[0][1] == [] # Empty list
231+
232+
def test_query_with_empty_embedding(self, mock_pinecone_connector):
233+
"""Verify query handles empty embedding array."""
234+
connector, mock_index, _, _ = mock_pinecone_connector
235+
mock_index.query.return_value = {'matches': []}
236+
237+
empty_embedding = np.array([])
238+
results = connector.query_chunks(empty_embedding)
239+
240+
assert results == []
241+
call_args = mock_index.query.call_args
242+
assert call_args[1]['vector'] == []
243+
244+
def test_upsert_with_large_metadata(self, mock_pinecone_connector, sample_embedding):
245+
"""Verify upsert handles large metadata dictionaries."""
246+
connector, mock_index, _, _ = mock_pinecone_connector
247+
248+
large_metadata = {f"key_{i}": f"value_{i}" for i in range(100)}
249+
result = connector.upsert_chunk("chunk-123", sample_embedding, metadata=large_metadata)
250+
251+
assert result is True
252+
call_args = mock_index.upsert.call_args
253+
vectors = call_args[1]['vectors']
254+
assert len(vectors[0][2]) == 100
255+
256+
def test_different_namespaces_isolated(self, mock_pinecone_connector, sample_embedding):
257+
"""Verify different namespaces are handled separately."""
258+
connector, mock_index, _, _ = mock_pinecone_connector
259+
260+
# Upsert to different namespaces
261+
connector.upsert_chunk("chunk-1", sample_embedding, namespace="namespace-1")
262+
connector.upsert_chunk("chunk-2", sample_embedding, namespace="namespace-2")
263+
264+
# Verify both calls used correct namespaces
265+
assert mock_index.upsert.call_count == 2
266+
call1_namespace = mock_index.upsert.call_args_list[0][1]['namespace']
267+
call2_namespace = mock_index.upsert.call_args_list[1][1]['namespace']
268+
assert call1_namespace == "namespace-1"
269+
assert call2_namespace == "namespace-2"
270+

0 commit comments

Comments
 (0)