|
| 1 | +import numpy as np |
| 2 | +from database.pinecone_connector import PineconeConnector |
| 3 | + |
| 4 | + |
| 5 | +class TestPineconeConnectorInitialization: |
| 6 | + """Test connector initialization.""" |
| 7 | + |
| 8 | + def test_initializes_with_api_key_and_index_name(self, mocker): |
| 9 | + """Verify connector initializes with required parameters.""" |
| 10 | + mock_pinecone = mocker.patch('database.pinecone_connector.Pinecone') |
| 11 | + mock_client = mocker.MagicMock() |
| 12 | + mock_pinecone.return_value = mock_client |
| 13 | + |
| 14 | + connector = PineconeConnector(api_key="test-api-key", index_name="test-index") |
| 15 | + |
| 16 | + assert connector.index_name == "test-index" |
| 17 | + mock_pinecone.assert_called_once_with(api_key="test-api-key") |
| 18 | + assert connector.client == mock_client |
| 19 | + |
| 20 | + |
| 21 | +class TestUpsertChunk: |
| 22 | + """Test chunk upsert operations.""" |
| 23 | + |
| 24 | + def test_upsert_chunk_success(self, mock_pinecone_connector, sample_embedding): |
| 25 | + """Verify successful chunk upsert.""" |
| 26 | + connector, mock_index, mock_client, _ = mock_pinecone_connector |
| 27 | + |
| 28 | + result = connector.upsert_chunk( |
| 29 | + chunk_id="chunk-123", |
| 30 | + chunk_embedding=sample_embedding, |
| 31 | + namespace="test-namespace", |
| 32 | + metadata={"video_id": "video-1", "start_time": 0.0} |
| 33 | + ) |
| 34 | + |
| 35 | + assert result is True |
| 36 | + mock_client.Index.assert_called_once_with("test-index") |
| 37 | + mock_index.upsert.assert_called_once() |
| 38 | + call_args = mock_index.upsert.call_args |
| 39 | + assert call_args[1]['namespace'] == "test-namespace" |
| 40 | + vectors = call_args[1]['vectors'] |
| 41 | + assert len(vectors) == 1 |
| 42 | + assert vectors[0][0] == "chunk-123" |
| 43 | + assert vectors[0][2]["video_id"] == "video-1" |
| 44 | + |
| 45 | + def test_upsert_chunk_with_default_namespace(self, mock_pinecone_connector, sample_embedding): |
| 46 | + """Verify upsert uses default namespace when not specified.""" |
| 47 | + connector, mock_index, _, _ = mock_pinecone_connector |
| 48 | + |
| 49 | + result = connector.upsert_chunk( |
| 50 | + chunk_id="chunk-123", |
| 51 | + chunk_embedding=sample_embedding |
| 52 | + ) |
| 53 | + |
| 54 | + assert result is True |
| 55 | + call_args = mock_index.upsert.call_args |
| 56 | + assert call_args[1]['namespace'] == "__default__" |
| 57 | + |
| 58 | + def test_upsert_chunk_without_metadata(self, mock_pinecone_connector, sample_embedding): |
| 59 | + """Verify upsert works with no metadata provided.""" |
| 60 | + connector, mock_index, _, _ = mock_pinecone_connector |
| 61 | + |
| 62 | + result = connector.upsert_chunk( |
| 63 | + chunk_id="chunk-123", |
| 64 | + chunk_embedding=sample_embedding |
| 65 | + ) |
| 66 | + |
| 67 | + assert result is True |
| 68 | + call_args = mock_index.upsert.call_args |
| 69 | + vectors = call_args[1]['vectors'] |
| 70 | + assert vectors[0][2] == {} # Empty metadata dict |
| 71 | + |
| 72 | + def test_upsert_chunk_converts_numpy_to_list(self, mock_pinecone_connector, sample_embedding): |
| 73 | + """Verify numpy array is converted to list before upsert.""" |
| 74 | + connector, mock_index, _, _ = mock_pinecone_connector |
| 75 | + |
| 76 | + connector.upsert_chunk( |
| 77 | + chunk_id="chunk-123", |
| 78 | + chunk_embedding=sample_embedding |
| 79 | + ) |
| 80 | + |
| 81 | + call_args = mock_index.upsert.call_args |
| 82 | + vectors = call_args[1]['vectors'] |
| 83 | + # Verify embedding was converted to list (not numpy array) |
| 84 | + assert isinstance(vectors[0][1], list) |
| 85 | + assert not isinstance(vectors[0][1], np.ndarray) |
| 86 | + |
| 87 | + def test_upsert_chunk_handles_exception(self, mock_pinecone_connector, sample_embedding): |
| 88 | + """Verify upsert returns False on exception.""" |
| 89 | + connector, mock_index, _, _ = mock_pinecone_connector |
| 90 | + mock_index.upsert.side_effect = Exception("Pinecone error") |
| 91 | + |
| 92 | + result = connector.upsert_chunk( |
| 93 | + chunk_id="chunk-123", |
| 94 | + chunk_embedding=sample_embedding |
| 95 | + ) |
| 96 | + |
| 97 | + assert result is False |
| 98 | + |
| 99 | + def test_upsert_multiple_chunks(self, mock_pinecone_connector, sample_embedding): |
| 100 | + """Verify multiple chunks can be upserted.""" |
| 101 | + connector, mock_index, _, _ = mock_pinecone_connector |
| 102 | + |
| 103 | + result1 = connector.upsert_chunk("chunk-1", sample_embedding, metadata={"id": 1}) |
| 104 | + result2 = connector.upsert_chunk("chunk-2", sample_embedding, metadata={"id": 2}) |
| 105 | + result3 = connector.upsert_chunk("chunk-3", sample_embedding, metadata={"id": 3}) |
| 106 | + |
| 107 | + assert result1 is True |
| 108 | + assert result2 is True |
| 109 | + assert result3 is True |
| 110 | + assert mock_index.upsert.call_count == 3 |
| 111 | + |
| 112 | + |
| 113 | +class TestQueryChunks: |
| 114 | + """Test chunk query operations.""" |
| 115 | + |
| 116 | + def test_query_chunks_success(self, mock_pinecone_connector, sample_embedding): |
| 117 | + """Verify successful chunk query.""" |
| 118 | + connector, mock_index, _, _ = mock_pinecone_connector |
| 119 | + |
| 120 | + # Mock query response |
| 121 | + mock_response = { |
| 122 | + 'matches': [ |
| 123 | + {'id': 'chunk-1', 'score': 0.95, 'metadata': {'video_id': 'video-1'}}, |
| 124 | + {'id': 'chunk-2', 'score': 0.87, 'metadata': {'video_id': 'video-1'}}, |
| 125 | + {'id': 'chunk-3', 'score': 0.82, 'metadata': {'video_id': 'video-2'}} |
| 126 | + ] |
| 127 | + } |
| 128 | + mock_index.query.return_value = mock_response |
| 129 | + |
| 130 | + results = connector.query_chunks( |
| 131 | + query_embedding=sample_embedding, |
| 132 | + namespace="test-namespace", |
| 133 | + top_k=3 |
| 134 | + ) |
| 135 | + |
| 136 | + assert len(results) == 3 |
| 137 | + assert results[0]['id'] == 'chunk-1' |
| 138 | + assert results[0]['score'] == 0.95 |
| 139 | + mock_index.query.assert_called_once() |
| 140 | + call_args = mock_index.query.call_args |
| 141 | + assert call_args[1]['namespace'] == "test-namespace" |
| 142 | + assert call_args[1]['top_k'] == 3 |
| 143 | + assert call_args[1]['include_metadata'] is True |
| 144 | + |
| 145 | + def test_query_chunks_with_default_namespace(self, mock_pinecone_connector, sample_embedding): |
| 146 | + """Verify query uses default namespace when not specified.""" |
| 147 | + connector, mock_index, _, _ = mock_pinecone_connector |
| 148 | + mock_index.query.return_value = {'matches': []} |
| 149 | + |
| 150 | + connector.query_chunks(query_embedding=sample_embedding) |
| 151 | + |
| 152 | + call_args = mock_index.query.call_args |
| 153 | + assert call_args[1]['namespace'] == "__default__" |
| 154 | + |
| 155 | + def test_query_chunks_with_default_top_k(self, mock_pinecone_connector, sample_embedding): |
| 156 | + """Verify query uses default top_k when not specified.""" |
| 157 | + connector, mock_index, _, _ = mock_pinecone_connector |
| 158 | + mock_index.query.return_value = {'matches': []} |
| 159 | + |
| 160 | + connector.query_chunks(query_embedding=sample_embedding) |
| 161 | + |
| 162 | + call_args = mock_index.query.call_args |
| 163 | + assert call_args[1]['top_k'] == 5 |
| 164 | + |
| 165 | + def test_query_chunks_converts_numpy_to_list(self, mock_pinecone_connector, sample_embedding): |
| 166 | + """Verify numpy array is converted to list before query.""" |
| 167 | + connector, mock_index, _, _ = mock_pinecone_connector |
| 168 | + mock_index.query.return_value = {'matches': []} |
| 169 | + |
| 170 | + connector.query_chunks(query_embedding=sample_embedding) |
| 171 | + |
| 172 | + call_args = mock_index.query.call_args |
| 173 | + # Verify embedding was converted to list (not numpy array) |
| 174 | + assert isinstance(call_args[1]['vector'], list) |
| 175 | + assert not isinstance(call_args[1]['vector'], np.ndarray) |
| 176 | + |
| 177 | + def test_query_chunks_handles_exception(self, mock_pinecone_connector, sample_embedding): |
| 178 | + """Verify query returns empty list on exception.""" |
| 179 | + connector, mock_index, _, _ = mock_pinecone_connector |
| 180 | + mock_index.query.side_effect = Exception("Pinecone error") |
| 181 | + |
| 182 | + results = connector.query_chunks(query_embedding=sample_embedding) |
| 183 | + |
| 184 | + assert results == [] |
| 185 | + |
| 186 | + def test_query_chunks_with_custom_top_k(self, mock_pinecone_connector, sample_embedding): |
| 187 | + """Verify query respects custom top_k parameter.""" |
| 188 | + connector, mock_index, _, _ = mock_pinecone_connector |
| 189 | + |
| 190 | + mock_response = { |
| 191 | + 'matches': [ |
| 192 | + {'id': f'chunk-{i}', 'score': 0.9 - i*0.1, 'metadata': {}} |
| 193 | + for i in range(10) |
| 194 | + ] |
| 195 | + } |
| 196 | + mock_index.query.return_value = mock_response |
| 197 | + |
| 198 | + results = connector.query_chunks( |
| 199 | + query_embedding=sample_embedding, |
| 200 | + top_k=10 |
| 201 | + ) |
| 202 | + |
| 203 | + assert len(results) == 10 |
| 204 | + call_args = mock_index.query.call_args |
| 205 | + assert call_args[1]['top_k'] == 10 |
| 206 | + |
| 207 | + def test_query_chunks_returns_empty_list_when_no_matches(self, mock_pinecone_connector, sample_embedding): |
| 208 | + """Verify query returns empty list when no matches found.""" |
| 209 | + connector, mock_index, _, _ = mock_pinecone_connector |
| 210 | + mock_index.query.return_value = {'matches': []} |
| 211 | + |
| 212 | + results = connector.query_chunks(query_embedding=sample_embedding) |
| 213 | + |
| 214 | + assert results == [] |
| 215 | + |
| 216 | + |
| 217 | +class TestEdgeCases: |
| 218 | + """Test edge cases and error handling.""" |
| 219 | + |
| 220 | + def test_upsert_with_empty_embedding(self, mock_pinecone_connector): |
| 221 | + """Verify upsert handles empty embedding array.""" |
| 222 | + connector, mock_index, _, _ = mock_pinecone_connector |
| 223 | + |
| 224 | + empty_embedding = np.array([]) |
| 225 | + result = connector.upsert_chunk("chunk-123", empty_embedding) |
| 226 | + |
| 227 | + assert result is True |
| 228 | + call_args = mock_index.upsert.call_args |
| 229 | + vectors = call_args[1]['vectors'] |
| 230 | + assert vectors[0][1] == [] # Empty list |
| 231 | + |
| 232 | + def test_query_with_empty_embedding(self, mock_pinecone_connector): |
| 233 | + """Verify query handles empty embedding array.""" |
| 234 | + connector, mock_index, _, _ = mock_pinecone_connector |
| 235 | + mock_index.query.return_value = {'matches': []} |
| 236 | + |
| 237 | + empty_embedding = np.array([]) |
| 238 | + results = connector.query_chunks(empty_embedding) |
| 239 | + |
| 240 | + assert results == [] |
| 241 | + call_args = mock_index.query.call_args |
| 242 | + assert call_args[1]['vector'] == [] |
| 243 | + |
| 244 | + def test_upsert_with_large_metadata(self, mock_pinecone_connector, sample_embedding): |
| 245 | + """Verify upsert handles large metadata dictionaries.""" |
| 246 | + connector, mock_index, _, _ = mock_pinecone_connector |
| 247 | + |
| 248 | + large_metadata = {f"key_{i}": f"value_{i}" for i in range(100)} |
| 249 | + result = connector.upsert_chunk("chunk-123", sample_embedding, metadata=large_metadata) |
| 250 | + |
| 251 | + assert result is True |
| 252 | + call_args = mock_index.upsert.call_args |
| 253 | + vectors = call_args[1]['vectors'] |
| 254 | + assert len(vectors[0][2]) == 100 |
| 255 | + |
| 256 | + def test_different_namespaces_isolated(self, mock_pinecone_connector, sample_embedding): |
| 257 | + """Verify different namespaces are handled separately.""" |
| 258 | + connector, mock_index, _, _ = mock_pinecone_connector |
| 259 | + |
| 260 | + # Upsert to different namespaces |
| 261 | + connector.upsert_chunk("chunk-1", sample_embedding, namespace="namespace-1") |
| 262 | + connector.upsert_chunk("chunk-2", sample_embedding, namespace="namespace-2") |
| 263 | + |
| 264 | + # Verify both calls used correct namespaces |
| 265 | + assert mock_index.upsert.call_count == 2 |
| 266 | + call1_namespace = mock_index.upsert.call_args_list[0][1]['namespace'] |
| 267 | + call2_namespace = mock_index.upsert.call_args_list[1][1]['namespace'] |
| 268 | + assert call1_namespace == "namespace-1" |
| 269 | + assert call2_namespace == "namespace-2" |
| 270 | + |
0 commit comments