@@ -30,24 +30,153 @@ void bind_bloom_filter(nb::module_ &m, const char* name) {
3030 using namespace datasketches ;
3131 using bloom_filter_type = bloom_filter_alloc<A>;
3232
33- // Start with just one simple function
34- m.def (" create_bloom_filter" ,
35- [](uint64_t max_distinct_items, double target_false_positive_prob) {
36- return bloom_filter_type::builder::create_by_accuracy (max_distinct_items, target_false_positive_prob);
37- },
38- nb::arg (" max_distinct_items" ), nb::arg (" target_false_positive_prob" ),
39- " Creates a Bloom filter with optimal parameters for the given accuracy requirements" );
40-
41- // Bind the class with minimal methods
33+ // Bind the class with static factory methods only
4234 nb::class_<bloom_filter_type>(m, name)
4335 .def (" is_empty" , &bloom_filter_type::is_empty,
4436 " Returns True if the filter has seen no items, otherwise False" )
37+
38+ // Update methods - efficient overloads for Python types
39+ // Non-negative integers (uint64_t)
40+ .def (" update" , static_cast <void (bloom_filter_type::*)(uint64_t )>(&bloom_filter_type::update),
41+ nb::arg (" item" ),
42+ " Updates the filter with a non-negative integer" )
43+ // Negative/positive integers (int64_t)
44+ .def (" update" , static_cast <void (bloom_filter_type::*)(int64_t )>(&bloom_filter_type::update),
45+ nb::arg (" item" ),
46+ " Updates the filter with a negative/positive integer" )
47+ // Float (double)
48+ .def (" update" , static_cast <void (bloom_filter_type::*)(double )>(&bloom_filter_type::update),
49+ nb::arg (" item" ),
50+ " Updates the filter with a float" )
51+ // String (std::string)
4552 .def (" update" , static_cast <void (bloom_filter_type::*)(const std::string&)>(&bloom_filter_type::update),
4653 nb::arg (" item" ),
47- " Updates the filter with the given string" )
54+ " Updates the filter with a string" )
55+ // Bytes object
56+ .def (" update" ,
57+ [](bloom_filter_type& self, nb::bytes b) {
58+ self.update (b.c_str (), b.size ());
59+ },
60+ nb::arg (" item" ),
61+ " Updates the filter with a bytes object" )
62+
63+ // Query methods - efficient overloads for Python types
64+ // Non-negative integers (uint64_t)
65+ .def (" query" , static_cast <bool (bloom_filter_type::*)(uint64_t ) const >(&bloom_filter_type::query),
66+ nb::arg (" item" ),
67+ " Queries the filter for a non-negative integer" )
68+ // Negative/positive integers (int64_t)
69+ .def (" query" , static_cast <bool (bloom_filter_type::*)(int64_t ) const >(&bloom_filter_type::query),
70+ nb::arg (" item" ),
71+ " Queries the filter for a negative/positive integer" )
72+ // Float (double)
73+ .def (" query" , static_cast <bool (bloom_filter_type::*)(double ) const >(&bloom_filter_type::query),
74+ nb::arg (" item" ),
75+ " Queries the filter for a float" )
76+ // String (std::string)
4877 .def (" query" , static_cast <bool (bloom_filter_type::*)(const std::string&) const >(&bloom_filter_type::query),
4978 nb::arg (" item" ),
50- " Queries the filter for the given string" );
79+ " Queries the filter for a string" )
80+ // Bytes object
81+ .def (" query" ,
82+ [](const bloom_filter_type& self, nb::bytes b) -> bool {
83+ return self.query (b.c_str (), b.size ());
84+ },
85+ nb::arg (" item" ),
86+ " Queries the filter for a bytes object" )
87+
88+ .def (" reset" , &bloom_filter_type::reset,
89+ " Resets the Bloom filter to its original empty state" )
90+ .def (" union_with" , &bloom_filter_type::union_with,
91+ nb::arg (" other" ),
92+ " Performs a union operation with another Bloom filter. Both filters must have the same capacity, number of hashes, and seed." )
93+ .def (" intersect" , &bloom_filter_type::intersect,
94+ nb::arg (" other" ),
95+ " Performs an intersection operation with another Bloom filter. Both filters must have the same capacity, number of hashes, and seed." )
96+ .def (" invert" , &bloom_filter_type::invert,
97+ " Inverts all the bits of the BloomFilter. Approximately inverts the notion of set-membership." )
98+ .def (" to_string" , &bloom_filter_type::to_string,
99+ nb::arg (" print_filter" )=false ,
100+ " Returns a string representation of the Bloom filter\n\n "
101+ " :param print_filter: If True, includes the actual bit array in the output\n :type print_filter: bool, optional\n "
102+ " :return: String representation of the filter\n :rtype: str" )
103+ .def (" __str__" , [](const bloom_filter_type& self) { return self.to_string (false ); },
104+ " Returns a string summary of the Bloom filter (without printing the bit array)" )
105+ .def (" __repr__" , [](const bloom_filter_type& self) { return self.to_string (false ); },
106+ " Returns a detailed string representation of the Bloom filter for debugging and REPL use" )
107+ .def (" __copy__" , [](const bloom_filter_type& self) { return bloom_filter_type (self); },
108+ " Returns a copy of the Bloom filter" )
109+ .def (" is_compatible" , &bloom_filter_type::is_compatible,
110+ nb::arg (" other" ),
111+ " Returns True if the other Bloom filter is compatible for union/intersection operations (same capacity, num_hashes, and seed)" )
112+ .def (" get_serialized_size_bytes" ,
113+ [](const bloom_filter_type& sk) { return sk.get_serialized_size_bytes (); },
114+ " Returns the size in bytes of the serialized image of the filter" )
115+ .def (" serialize" ,
116+ [](const bloom_filter_type& sk) {
117+ auto v = sk.serialize (); // vector_bytes (std::vector<uint8_t, Allocator>)
118+ return nb::bytes (reinterpret_cast <const char *>(v.data ()), v.size ());
119+ },
120+ " Serialize the filter to a cross-language compatible byte string" )
121+ .def_static (
122+ " deserialize" ,
123+ [](const nb::bytes& bytes) {
124+ return bloom_filter_type::deserialize (bytes.c_str (), bytes.size ());
125+ },
126+ nb::arg (" bytes" ),
127+ " Reads a bytes object and returns the corresponding bloom_filter" )
128+ .def_static (" suggest_num_hashes" ,
129+ static_cast <uint16_t (*)(uint64_t , uint64_t )>(&bloom_filter_type::builder::suggest_num_hashes),
130+ nb::arg (" max_distinct_items" ), nb::arg (" num_filter_bits" ),
131+ " Suggests the optimal number of hash functions for given target numbers of distinct items and filter size" )
132+ .def_static (" suggest_num_hashes_by_probability" ,
133+ static_cast <uint16_t (*)(double )>(&bloom_filter_type::builder::suggest_num_hashes),
134+ nb::arg (" target_false_positive_prob" ),
135+ " Suggests the optimal number of hash functions to achieve a target false positive probability" )
136+ .def_static (" suggest_num_filter_bits" ,
137+ &bloom_filter_type::builder::suggest_num_filter_bits,
138+ nb::arg (" max_distinct_items" ), nb::arg (" target_false_positive_prob" ),
139+ " Suggests the optimal number of bits for given target numbers of distinct items and false positive probability" )
140+ .def_static (" create_by_accuracy" ,
141+ [](uint64_t max_distinct_items, double target_false_positive_prob, uint64_t seed) {
142+ return bloom_filter_type::builder::create_by_accuracy (max_distinct_items, target_false_positive_prob, seed);
143+ },
144+ nb::arg (" max_distinct_items" ), nb::arg (" target_false_positive_prob" ), nb::arg (" seed" )=bloom_filter_type::builder::generate_random_seed (),
145+ " Creates a Bloom filter with optimal parameters for the given accuracy requirements\n\n "
146+ " :param max_distinct_items: Maximum expected number of distinct items to add to the filter\n :type max_distinct_items: int\n "
147+ " :param target_false_positive_prob: Desired false positive probability per item\n :type target_false_positive_prob: float\n "
148+ " :param seed: Hash seed to use (default: random)\n :type seed: int, optional"
149+ )
150+ .def_static (" create_by_size" ,
151+ [](uint64_t num_bits, uint16_t num_hashes, uint64_t seed) {
152+ return bloom_filter_type::builder::create_by_size (num_bits, num_hashes, seed);
153+ },
154+ nb::arg (" num_bits" ), nb::arg (" num_hashes" ), nb::arg (" seed" )=bloom_filter_type::builder::generate_random_seed (),
155+ " Creates a Bloom filter with specified size parameters\n\n "
156+ " :param num_bits: Size of the Bloom filter in bits\n :type num_bits: int\n "
157+ " :param num_hashes: Number of hash functions to apply to items\n :type num_hashes: int\n "
158+ " :param seed: Hash seed to use (default: random)\n :type seed: int, optional"
159+ )
160+ .def_prop_ro (
161+ " num_bits_used" ,
162+ &bloom_filter_type::get_bits_used,
163+ " Number of bits set to 1 in the Bloom filter"
164+ )
165+ .def_prop_ro (
166+ " capacity" ,
167+ &bloom_filter_type::get_capacity,
168+ " Number of bits in the Bloom filter's bit array"
169+ )
170+ .def_prop_ro (
171+ " num_hashes" ,
172+ &bloom_filter_type::get_num_hashes,
173+ " Number of hash functions used by this Bloom filter"
174+ )
175+ .def_prop_ro (
176+ " seed" ,
177+ &bloom_filter_type::get_seed,
178+ " Hash seed used by this Bloom filter"
179+ );
51180}
52181
53182void init_bloom_filter (nb::module_ &m) {
0 commit comments