@@ -497,7 +497,7 @@ namespace filtering {
497497 * @{
498498 */
499499
500- enum class FilterType { None, Bitmap, Bitset, UDF };
500+ enum class FilterType { None, Bitmap, Bitset, Bloom, UDF };
501501
502502struct base_filter {
503503 ~base_filter () = default ;
@@ -617,6 +617,32 @@ struct bitset_filter : public base_filter {
617617 void to_csr (raft::resources const & handle, csr_matrix_t & csr);
618618};
619619
620+ /* *
621+ * @brief Filter CAGRA candidates with a global @c cuco bloom filter over the index.
622+ *
623+ * Build the filter once on the host with bulk @c add() over the allowed dataset row ids, obtain a
624+ * @c ref() from the owning @c cuco::bloom_filter, copy that ref to device memory, and pass the
625+ * device pointer as @c filter_data. The linked JIT-LTO fragment probes the same filter for every
626+ * query and candidate, similar to @ref bitset_filter but with probabilistic membership tests.
627+ *
628+ * Bloom filters have no false negatives: if a row was inserted, @c contains returns @c true. False
629+ * positives are possible, so highly selective predicates may still need a bitset or UDF for exact
630+ * filtering.
631+ */
632+ struct bloom_filter : public base_filter {
633+ void * filter_data{nullptr };
634+ float filtering_rate{-1 .0f };
635+
636+ bloom_filter () = default ;
637+
638+ explicit bloom_filter (void * filter_data, float filtering_rate = -1 .0f )
639+ : filter_data(filter_data), filtering_rate(filtering_rate)
640+ {
641+ }
642+
643+ FilterType get_filter_type () const override { return FilterType::Bloom; }
644+ };
645+
620646/* *
621647 * @brief JIT-LTO user-defined filter predicate.
622648 *
0 commit comments