Skip to content

Commit a8118ac

Browse files
authored
Facebook sync (May 2019) + relicense (#838)
Changelog: - changed license: BSD+Patents -> MIT - propagates exceptions raised in sub-indexes of IndexShards and IndexReplicas - support for searching several inverted lists in parallel (parallel_mode != 0) - better support for PQ codes where nbit != 8 or 16 - IVFSpectralHash implementation: spectral hash codes inside an IVF - 6-bit per component scalar quantizer (4 and 8 bit were already supported) - combinations of inverted lists: HStackInvertedLists and VStackInvertedLists - configurable number of threads for OnDiskInvertedLists prefetching (including 0=no prefetch) - more test and demo code compatible with Python 3 (print with parentheses) - refactored benchmark code: data loading is now in a single file
1 parent 712edb0 commit a8118ac

File tree

1,592 files changed

+67784
-316498
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,592 files changed

+67784
-316498
lines changed

AutoTune.cpp

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
/**
2-
* Copyright (c) 2015-present, Facebook, Inc.
3-
* All rights reserved.
2+
* Copyright (c) Facebook, Inc. and its affiliates.
43
*
5-
* This source code is licensed under the BSD+Patents license found in the
4+
* This source code is licensed under the MIT license found in the
65
* LICENSE file in the root directory of this source tree.
76
*/
87

@@ -469,9 +468,22 @@ void ParameterSpace::set_index_parameter (
469468
}
470469
if (DC (IndexShards)) {
471470
// call on all sub-indexes
472-
for (auto & shard_index : ix->shard_indexes) {
473-
set_index_parameter (shard_index, name, val);
474-
}
471+
auto fn =
472+
[this, name, val](int, Index* subIndex) {
473+
set_index_parameter(subIndex, name, val);
474+
};
475+
476+
ix->runOnIndex(fn);
477+
return;
478+
}
479+
if (DC (IndexReplicas)) {
480+
// call on all sub-indexes
481+
auto fn =
482+
[this, name, val](int, Index* subIndex) {
483+
set_index_parameter(subIndex, name, val);
484+
};
485+
486+
ix->runOnIndex(fn);
475487
return;
476488
}
477489
if (DC (IndexRefineFlat)) {

AutoTune.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
/**
2-
* Copyright (c) 2015-present, Facebook, Inc.
3-
* All rights reserved.
2+
* Copyright (c) Facebook, Inc. and its affiliates.
43
*
5-
* This source code is licensed under the BSD+Patents license found in the
4+
* This source code is licensed under the MIT license found in the
65
* LICENSE file in the root directory of this source tree.
76
*/
87

AuxIndexStructures.cpp

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
/**
2-
* Copyright (c) 2015-present, Facebook, Inc.
3-
* All rights reserved.
2+
* Copyright (c) Facebook, Inc. and its affiliates.
43
*
5-
* This source code is licensed under the BSD+Patents license found in the
4+
* This source code is licensed under the MIT license found in the
65
* LICENSE file in the root directory of this source tree.
76
*/
87

@@ -54,6 +53,10 @@ RangeSearchResult::~RangeSearchResult () {
5453
delete [] lims;
5554
}
5655

56+
57+
58+
59+
5760
/***********************************************************************
5861
* BufferList
5962
***********************************************************************/
@@ -148,7 +151,7 @@ void RangeSearchPartialResult::finalize ()
148151
res->do_allocation ();
149152

150153
#pragma omp barrier
151-
set_result ();
154+
copy_result ();
152155
}
153156

154157

@@ -162,7 +165,7 @@ void RangeSearchPartialResult::set_lims ()
162165
}
163166

164167
/// called by range_search after do_allocation
165-
void RangeSearchPartialResult::set_result (bool incremental)
168+
void RangeSearchPartialResult::copy_result (bool incremental)
166169
{
167170
size_t ofs = 0;
168171
for (int i = 0; i < queries.size(); i++) {
@@ -178,6 +181,38 @@ void RangeSearchPartialResult::set_result (bool incremental)
178181
}
179182
}
180183

184+
void RangeSearchPartialResult::merge (std::vector <RangeSearchPartialResult *> &
185+
partial_results, bool do_delete)
186+
{
187+
188+
int npres = partial_results.size();
189+
if (npres == 0) return;
190+
RangeSearchResult *result = partial_results[0]->res;
191+
size_t nx = result->nq;
192+
193+
// count
194+
for (const RangeSearchPartialResult * pres : partial_results) {
195+
if (!pres) continue;
196+
for (const RangeQueryResult &qres : pres->queries) {
197+
result->lims[qres.qno] += qres.nres;
198+
}
199+
}
200+
result->do_allocation ();
201+
for (int j = 0; j < npres; j++) {
202+
if (!partial_results[j]) continue;
203+
partial_results[j]->copy_result (true);
204+
if (do_delete) {
205+
delete partial_results[j];
206+
partial_results[j] = nullptr;
207+
}
208+
}
209+
210+
// reset the limits
211+
for (size_t i = nx; i > 0; i--) {
212+
result->lims [i] = result->lims [i - 1];
213+
}
214+
result->lims [0] = 0;
215+
}
181216

182217
/***********************************************************************
183218
* IDSelectorRange

AuxIndexStructures.h

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
/**
2-
* Copyright (c) 2015-present, Facebook, Inc.
3-
* All rights reserved.
2+
* Copyright (c) Facebook, Inc. and its affiliates.
43
*
5-
* This source code is licensed under the BSD+Patents license found in the
4+
* This source code is licensed under the MIT license found in the
65
* LICENSE file in the root directory of this source tree.
76
*/
87

@@ -90,10 +89,15 @@ struct IDSelectorBatch: IDSelector {
9089
~IDSelectorBatch() override {}
9190
};
9291

93-
94-
// Below are structures used only by Index implementations
95-
96-
92+
/****************************************************************
93+
* Result structures for range search.
94+
*
95+
* The main constraint here is that we want to support parallel
96+
* queries from different threads in various ways: 1 thread per query,
97+
* several threads per query. We store the actual results in blocks of
98+
* fixed size rather than exponentially increasing memory. At the end,
99+
* we copy the block content to a linear result array.
100+
*****************************************************************/
97101

98102
/** List of temporary buffers used to store results before they are
99103
* copied to the RangeSearchResult object. */
@@ -115,9 +119,10 @@ struct BufferList {
115119

116120
~BufferList ();
117121

118-
// create a new buffer
122+
/// create a new buffer
119123
void append_buffer ();
120124

125+
/// add one result, possibly appending a new buffer if needed
121126
void add (idx_t id, float dis);
122127

123128
/// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to
@@ -132,31 +137,42 @@ struct RangeSearchPartialResult;
132137
/// result structure for a single query
133138
struct RangeQueryResult {
134139
using idx_t = Index::idx_t;
135-
idx_t qno;
136-
size_t nres;
140+
idx_t qno; //< id of the query
141+
size_t nres; //< nb of results for this query
137142
RangeSearchPartialResult * pres;
138143

144+
/// called by search function to report a new result
139145
void add (float dis, idx_t id);
140146
};
141147

142148
/// the entries in the buffers are split per query
143149
struct RangeSearchPartialResult: BufferList {
144150
RangeSearchResult * res;
145151

152+
/// eventually the result will be stored in res_in
146153
explicit RangeSearchPartialResult (RangeSearchResult * res_in);
147154

155+
/// query ids + nb of results per query.
148156
std::vector<RangeQueryResult> queries;
149157

150158
/// begin a new result
151159
RangeQueryResult & new_result (idx_t qno);
152160

161+
/*****************************************
162+
* functions used at the end of the search to merge the result
163+
* lists */
153164
void finalize ();
154165

155166
/// called by range_search before do_allocation
156167
void set_lims ();
157168

158169
/// called by range_search after do_allocation
159-
void set_result (bool incremental = false);
170+
void copy_result (bool incremental = false);
171+
172+
/// merge a set of PartialResult's into one RangeSearchResult
173+
/// on ouptut the partialresults are empty!
174+
static void merge (std::vector <RangeSearchPartialResult *> &
175+
partial_results, bool do_delete=true);
160176

161177
};
162178

@@ -212,7 +228,7 @@ struct VectorIOWriter:IOWriter {
212228
* it maintains counters) so the distance functions are not const,
213229
* instanciate one from each thread if needed.
214230
***********************************************************/
215-
struct DistanceComputer {
231+
struct DistanceComputer {
216232
using idx_t = Index::idx_t;
217233

218234
/// called before computing distances
@@ -225,7 +241,7 @@ struct VectorIOWriter:IOWriter {
225241
virtual float symmetric_dis (idx_t i, idx_t j) = 0;
226242

227243
virtual ~DistanceComputer() {}
228-
};
244+
};
229245

230246
/***********************************************************
231247
* Interrupt callback

Clustering.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
/**
2-
* Copyright (c) 2015-present, Facebook, Inc.
3-
* All rights reserved.
2+
* Copyright (c) Facebook, Inc. and its affiliates.
43
*
5-
* This source code is licensed under the BSD+Patents license found in the
4+
* This source code is licensed under the MIT license found in the
65
* LICENSE file in the root directory of this source tree.
76
*/
87

Clustering.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
/**
2-
* Copyright (c) 2015-present, Facebook, Inc.
3-
* All rights reserved.
2+
* Copyright (c) Facebook, Inc. and its affiliates.
43
*
5-
* This source code is licensed under the BSD+Patents license found in the
4+
* This source code is licensed under the MIT license found in the
65
* LICENSE file in the root directory of this source tree.
76
*/
87

FaissAssert.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
/**
2-
* Copyright (c) 2015-present, Facebook, Inc.
3-
* All rights reserved.
2+
* Copyright (c) Facebook, Inc. and its affiliates.
43
*
5-
* This source code is licensed under the BSD+Patents license found in the
4+
* This source code is licensed under the MIT license found in the
65
* LICENSE file in the root directory of this source tree.
76
*/
87

FaissException.cpp

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
/**
2-
* Copyright (c) 2015-present, Facebook, Inc.
3-
* All rights reserved.
2+
* Copyright (c) Facebook, Inc. and its affiliates.
43
*
5-
* This source code is licensed under the BSD+Patents license found in the
4+
* This source code is licensed under the MIT license found in the
65
* LICENSE file in the root directory of this source tree.
76
*/
87

98
// -*- c++ -*-
109

1110
#include "FaissException.h"
11+
#include <sstream>
1212

1313
namespace faiss {
1414

@@ -32,4 +32,35 @@ FaissException::what() const noexcept {
3232
return msg.c_str();
3333
}
3434

35+
void handleExceptions(
36+
std::vector<std::pair<int, std::exception_ptr>>& exceptions) {
37+
if (exceptions.size() == 1) {
38+
// throw the single received exception directly
39+
std::rethrow_exception(exceptions.front().second);
40+
41+
} else if (exceptions.size() > 1) {
42+
// multiple exceptions; aggregate them and return a single exception
43+
std::stringstream ss;
44+
45+
for (auto& p : exceptions) {
46+
try {
47+
std::rethrow_exception(p.second);
48+
} catch (std::exception& ex) {
49+
if (ex.what()) {
50+
// exception message available
51+
ss << "Exception thrown from index " << p.first << ": "
52+
<< ex.what() << "\n";
53+
} else {
54+
// No message available
55+
ss << "Unknown exception thrown from index " << p.first << "\n";
56+
}
57+
} catch (...) {
58+
ss << "Unknown exception thrown from index " << p.first << "\n";
59+
}
60+
}
61+
62+
throw FaissException(ss.str());
63+
}
64+
}
65+
3566
}

FaissException.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
/**
2-
* Copyright (c) 2015-present, Facebook, Inc.
3-
* All rights reserved.
2+
* Copyright (c) Facebook, Inc. and its affiliates.
43
*
5-
* This source code is licensed under the BSD+Patents license found in the
4+
* This source code is licensed under the MIT license found in the
65
* LICENSE file in the root directory of this source tree.
76
*/
87

@@ -13,6 +12,8 @@
1312

1413
#include <exception>
1514
#include <string>
15+
#include <vector>
16+
#include <utility>
1617

1718
namespace faiss {
1819

@@ -32,6 +33,11 @@ class FaissException : public std::exception {
3233
std::string msg;
3334
};
3435

36+
/// Handle multiple exceptions from worker threads, throwing an appropriate
37+
/// exception that aggregates the information
38+
/// The pair int is the thread that generated the exception
39+
void
40+
handleExceptions(std::vector<std::pair<int, std::exception_ptr>>& exceptions);
3541

3642
/** bare-bones unique_ptr
3743
* this one deletes with delete [] */
@@ -60,9 +66,6 @@ struct ScopeDeleter1 {
6066
}
6167
};
6268

63-
64-
6569
}
6670

67-
6871
#endif

HNSW.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
/**
2-
* Copyright (c) 2015-present, Facebook, Inc.
3-
* All rights reserved.
2+
* Copyright (c) Facebook, Inc. and its affiliates.
43
*
5-
* This source code is licensed under the BSD+Patents license found in the
4+
* This source code is licensed under the MIT license found in the
65
* LICENSE file in the root directory of this source tree.
76
*/
87

0 commit comments

Comments
 (0)