Skip to content

Commit 44729af

Browse files
committed
Merge pull request #192 from simongog/sd_vector_sel0
Faster select_0 support for sd_vector
2 parents ad5c1ae + 49fd6fd commit 44729af

File tree

3 files changed

+303
-0
lines changed

3 files changed

+303
-0
lines changed

examples/sd_vector_benchmark.cpp

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
#include <sdsl/bit_vectors.hpp>
2+
#include <random>
3+
#include <iostream>
4+
#include <chrono>
5+
6+
using namespace sdsl;
7+
using namespace std;
8+
9+
using namespace std::chrono;
10+
using timer = std::chrono::high_resolution_clock;
11+
12+
13+
template<class t_vec>
14+
uint64_t test_inv_random_access(const t_vec& v, const int_vector<64>& rands, uint64_t mask, uint64_t times=100000000)
15+
{
16+
uint64_t cnt=0;
17+
for (uint64_t i=0; i<times; ++i) {
18+
cnt += v(rands[ i&mask ]);
19+
}
20+
return cnt;
21+
}
22+
23+
24+
25+
//int main(int argc, char* argv[]){
26+
int main()
27+
{
28+
auto start = timer::now();
29+
bool default_value = 0; //ID[ID.length()-1]-'0';
30+
bit_vector bv = bit_vector(800000000, default_value);
31+
32+
std::mt19937_64 rng;
33+
std::uniform_int_distribution<uint64_t> distribution(0, bv.size()-1);
34+
auto dice = bind(distribution, rng);
35+
// populate vectors with some other bits
36+
for (uint64_t i=0; i < bv.size()/25; ++i) {
37+
uint64_t x = dice();
38+
bv[x] = !default_value;
39+
}
40+
auto stop = timer::now();
41+
cout << "initialization in (ms): " << duration_cast<milliseconds>(stop-start).count() << endl;
42+
cout << "size in MiB: " << size_in_mega_bytes(bv) << endl;
43+
44+
start = timer::now();
45+
sd_vector<> bv_sd(bv);
46+
stop = timer::now();
47+
cout << "sd_construction in (ms): " << duration_cast<milliseconds>(stop-start).count() << endl;
48+
{
49+
bit_vector().swap(bv);
50+
}
51+
cout << "size in MiB: " << size_in_mega_bytes(bv_sd) << endl;
52+
cout << "wl = " << (size_t) bv_sd.wl << endl;
53+
cout << "n = " << bv_sd.size() << endl;
54+
cout << "2*m = " << bv_sd.high.size()<<endl;
55+
cout <<"n/m=" << (2.0*bv_sd.size())/bv_sd.high.size()<<endl;
56+
57+
auto zeros = sd_vector<>::rank_0_type(&bv_sd)(bv_sd.size());
58+
auto ones = bv_sd.size()-zeros;
59+
cout << "zeros = "<< zeros << endl;
60+
{
61+
uint64_t mask = 0;
62+
auto rands = util::rnd_positions<int_vector<64>>(20, mask, zeros, 17);
63+
for (uint64_t i=0; i<rands.size(); ++i) rands[i] = rands[i]+1;
64+
sd_vector<>::select_0_type select0(&bv_sd);
65+
const uint64_t reps = 10000000;
66+
start = timer::now();
67+
auto check = test_inv_random_access(select0, rands, mask, reps);
68+
stop = timer::now();
69+
70+
cout << "# select0_time = " << duration_cast<nanoseconds>(stop-start).count()/(double)reps << endl;
71+
cout << "# select_check = " << check << endl;
72+
cout << "# size_in_mega_bytes(bv_sd) = " << size_in_mega_bytes(bv_sd) << endl;
73+
cout << "# size_in_mega_bytes(select0) = " << size_in_mega_bytes(select0) << endl;
74+
}
75+
{
76+
uint64_t mask = 0;
77+
auto rands = util::rnd_positions<int_vector<64>>(20, mask, zeros, 17);
78+
for (uint64_t i=0; i<rands.size(); ++i) rands[i] = rands[i]+1;
79+
select_0_support_sd<sd_vector<>> select0(&bv_sd);
80+
const uint64_t reps = 10000000;
81+
start = timer::now();
82+
auto check = test_inv_random_access(select0, rands, mask, reps);
83+
stop = timer::now();
84+
85+
cout << "# select0_time = " << duration_cast<nanoseconds>(stop-start).count()/(double)reps << endl;
86+
cout << "# select_check = " << check << endl;
87+
cout << "# size_in_mega_bytes(bv_sd) = " << size_in_mega_bytes(bv_sd) << endl;
88+
cout << "# size_in_mega_bytes(select0) = " << size_in_mega_bytes(select0) << endl;
89+
}
90+
{
91+
uint64_t mask = 0;
92+
auto rands = util::rnd_positions<int_vector<64>>(20, mask, ones, 17);
93+
for (uint64_t i=0; i<rands.size(); ++i) rands[i] = rands[i]+1;
94+
sd_vector<>::select_1_type select1(&bv_sd);
95+
const uint64_t reps = 10000000;
96+
start = timer::now();
97+
auto check = test_inv_random_access(select1, rands, mask, reps);
98+
stop = timer::now();
99+
100+
cout << "# select1_time = " << duration_cast<nanoseconds>(stop-start).count()/(double)reps << endl;
101+
cout << "# select_check = " << check << endl;
102+
}
103+
{
104+
uint64_t mask = 0;
105+
auto rands = util::rnd_positions<int_vector<64>>(20, mask, bv_sd.size(), 17);
106+
cout<<"done"<<endl;
107+
cout<<(uint64_t)&(bv_sd.high_1_select)<<endl;
108+
cout<<(uint64_t)&(bv_sd.high_0_select)<<endl;
109+
sd_vector<>::rank_1_type rank1(&bv_sd);
110+
cout<<"done"<<endl;
111+
const uint64_t reps = 10000000;
112+
// for(size_t i=0; i<bv_sd.size();++i){
113+
// cout << "i="<<i<<" rank1("<<i<<")="<<rank1(i)<<endl;
114+
// }
115+
start = timer::now();
116+
auto check = test_inv_random_access(rank1, rands, mask, reps);
117+
stop = timer::now();
118+
119+
cout << "# rank1_time = " << duration_cast<nanoseconds>(stop-start).count()/(double)reps << endl;
120+
cout << "# select_check = " << check << endl;
121+
}
122+
}

include/sdsl/sd_vector.hpp

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -493,5 +493,185 @@ class select_support_sd
493493
}
494494
};
495495

496+
497+
//! Select_0 data structure for sd_vector
498+
/*! \tparam t_sd_vector sd_vector type
499+
* \tparam t_rank_1 Rank support for high part of sd_vector
500+
*/
501+
template<typename t_sd_vector=sd_vector<>>
502+
class select_0_support_sd
503+
{
504+
public:
505+
typedef bit_vector::size_type size_type;
506+
typedef t_sd_vector bit_vector_type;
507+
using rank_1 = typename t_sd_vector::rank_1_type;
508+
using sel0_type = typename t_sd_vector::select_0_type;
509+
typedef bit_vector y_high_type;
510+
enum { bit_pat = 0 };
511+
512+
private:
513+
const bit_vector_type* m_v;
514+
int_vector<> m_pointer;
515+
int_vector<> m_rank1;
516+
public:
517+
518+
explicit select_0_support_sd(const bit_vector_type* v=nullptr) {
519+
set_vector(v);
520+
if (nullptr != m_v) {
521+
size_type rank_0 = 0; // rank0 in H
522+
const size_type bs = 1ULL << (m_v->wl);
523+
size_type z = 0;
524+
size_type rank1 = 0;// rank1 in H
525+
size_type zeros = m_v->size() - rank_1(m_v)(m_v->size()); // zeros in B
526+
m_pointer = int_vector<>(zeros/(64*bs)+1, 0, bits::hi(m_v->high.size()/64)+1);
527+
m_rank1 = int_vector<>(m_pointer.size(), 0, bits::hi(m_v->high.size())+1);
528+
uint64_t w=0;
529+
for (size_type i=0, sel0=1; i < m_v->high.size(); i+=64) {
530+
size_type old_rank1 = rank1;
531+
w = m_v->high.get_int(i, 64);
532+
rank1 += bits::cnt(w);
533+
rank_0 = (i+64)-rank1;
534+
if (rank1 > 0 and (w>>63)&1) {
535+
uint64_t pos = rank_0*bs + m_v->low[rank1-1]; // pos of last one (of previous block in B
536+
z = pos + 1 - rank1;
537+
} else {
538+
z = rank_0*bs - rank1;
539+
}
540+
while (sel0 <= z and sel0 <= zeros) {
541+
m_pointer[(sel0-1)/(64*bs)] = i/64;
542+
m_rank1[(sel0-1)/(64*bs)] = old_rank1;
543+
sel0 += 64*bs;
544+
}
545+
}
546+
}
547+
}
548+
549+
//! Returns the position of the i-th occurrence in the bit vector.
550+
size_type select(size_type i)const {
551+
const size_type bs = 1ULL << (m_v->wl);
552+
size_type j = m_pointer[(i-1)/(64*bs)]*64;// index into m_high
553+
size_type rank1 = m_rank1[(i-1)/(64*bs)]; // rank_1(j*bs*64) in B
554+
size_type pos = 0;
555+
size_type rank0 = 0;
556+
557+
if (rank1 > 0 and (m_v->high[j-1])&1) {
558+
pos = (j-rank1)*bs + m_v->low[rank1-1]; // starting position of current block
559+
rank0 = pos+1-rank1;
560+
} else {
561+
pos = (j-rank1)*bs;// starting position of current block
562+
rank0 = pos-rank1;
563+
}
564+
uint64_t w = m_v->high.get_int(j, 64);
565+
do {
566+
uint64_t _rank1 = rank1 + bits::cnt(w);
567+
uint64_t _rank0 = 0;
568+
if (_rank1 > 0 and (w>>63)&1) {
569+
pos = (j+64-_rank1)*bs + m_v->low[_rank1-1];
570+
_rank0 = pos+1-_rank1;
571+
} else {
572+
pos = (j+64-_rank1)*bs;
573+
_rank0 = pos-_rank1;
574+
}
575+
if (_rank0 < i) {
576+
j+=64;
577+
w = m_v->high.get_int(j, 64);
578+
rank1 = _rank1;
579+
} else {
580+
break;
581+
}
582+
} while (true);
583+
// invariant i >zeros
584+
do {
585+
uint64_t _rank1 = rank1 + bits::lt_cnt[w&0xFFULL];
586+
uint64_t _rank0 = 0;
587+
if (_rank1 > 0 and (w>>7)&1) {
588+
pos = (j+8-_rank1)*bs + m_v->low[_rank1-1];
589+
_rank0 = pos+1-_rank1;
590+
} else {
591+
pos = (j+8-_rank1)*bs;
592+
_rank0 = pos-_rank1;
593+
}
594+
if (_rank0 < i) {
595+
j+=8;
596+
w >>= 8;
597+
rank1 = _rank1;
598+
} else {
599+
break;
600+
}
601+
} while (true);
602+
603+
do {
604+
bool b = w&1ULL;
605+
w >>= 1; // zeros are shifted in
606+
++j;
607+
if (0 == b) {
608+
pos = (j-rank1)*bs;
609+
size_type zeros = pos-rank1;
610+
if (zeros >= i) {
611+
pos = pos - (zeros-i) - 1;
612+
break;
613+
}
614+
} else {
615+
pos = (j-1-rank1)*bs;
616+
size_type one_pos = pos + m_v->low[rank1];
617+
++rank1;
618+
size_type zeros = one_pos + 1 - rank1;
619+
if (zeros >= i) {
620+
pos = one_pos - (zeros-i) - 1;
621+
break;
622+
}
623+
}
624+
if (j%64==0) {
625+
w = m_v->high.get_int(j,64);
626+
}
627+
} while (true);
628+
return pos;
629+
}
630+
631+
size_type operator()(size_type i)const {
632+
return select(i);
633+
}
634+
635+
size_type size()const {
636+
return m_v->size();
637+
}
638+
639+
void set_vector(const bit_vector_type* v=nullptr) {
640+
m_v = v;
641+
}
642+
643+
select_0_support_sd& operator=(const select_0_support_sd& ss) {
644+
if (this != &ss) {
645+
m_pointer = ss.m_pointer;
646+
m_rank1 = ss.m_rank1;
647+
set_vector(ss.m_v);
648+
}
649+
return *this;
650+
}
651+
652+
void swap(select_0_support_sd& ss) {
653+
m_pointer.swap(ss.m_pointer);
654+
m_rank1.swap(ss.m_rank1);
655+
}
656+
657+
void load(std::istream& in, const bit_vector_type* v=nullptr) {
658+
m_pointer.load(in);
659+
m_rank1.load(in);
660+
set_vector(v);
661+
}
662+
663+
size_type serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const {
664+
structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
665+
size_type written_bytes = 0;
666+
written_bytes += m_pointer.serialize(out, child, "pointer");
667+
written_bytes += m_rank1.serialize(out, child, "rank1");
668+
structure_tree::add_size(child, written_bytes);
669+
return written_bytes;
670+
}
671+
672+
};
673+
674+
675+
496676
} // end namespace
497677
#endif

test/SelectSupportTest.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ typedef Types<select_support_mcl<>,
2828
select_support_rrr<1, 128>,
2929
select_support_sd<1>,
3030
select_support_sd<0>,
31+
select_0_support_sd<>,
3132
select_support_il<1, 256>,
3233
select_support_il<1, 512>,
3334
select_support_il<1, 1024>,

0 commit comments

Comments
 (0)