@@ -28,145 +28,145 @@ class t_csa       = csa_wt<wt_huff<rrr_vector<63>>, 1000000, 1000000>,
2828      class  t_wtd  = wt_int<bit_vector,rank_support_v5<1 >,select_support_scan<1 >,select_support_scan<0 >>,
2929      typename  t_csa::char_type t_doc_delim = 1 
3030      >
31-        class  doc_list_index_greedy 
32-        {
33-            public: 
34-            typedef  t_csa                                       csa_type;
35-            typedef  t_wtd                                       wtd_type;
36-            typedef  int_vector<>::size_type                     size_type;
37-            typedef  std::vector<std::pair<size_type,size_type>> list_type;
38-            typedef  doc_list_tag                                index_category;
39- 
40-            enum  { WIDTH = t_csa::alphabet_category::WIDTH };
41- 
42-            class  result  : public  list_type 
43-            {
44-                private: 
45-               size_type m_sp, m_ep;
46-                public: 
47-               //  Number of occurrences
48- size_type count () {
49-     return  m_ep-m_sp+1 ;
50- }
31+ class  doc_list_index_greedy 
32+ {
33+     public: 
34+         typedef  t_csa                                       csa_type;
35+         typedef  t_wtd                                       wtd_type;
36+         typedef  int_vector<>::size_type                     size_type;
37+         typedef  std::vector<std::pair<size_type,size_type>> list_type;
38+         typedef  doc_list_tag                                index_category;
39+ 
40+         enum  { WIDTH = t_csa::alphabet_category::WIDTH };
41+ 
42+         class  result  : public  list_type 
43+         {
44+             private: 
45+                  size_type m_sp, m_ep;
46+             public: 
47+                  //  Number of occurrences
48+                  size_type count () {
49+                      return  m_ep-m_sp+1 ;
50+                  }
5151
5252//  Constructors for an empty result and for a result in the interval [sp, ep]:
53- result (size_type sp, size_type ep,list_type&& l) : list_type(l), m_sp(1 ), m_ep(0 ) {}
54- result () : m_sp(1 ), m_ep(0 ) {}
55- result (size_type sp, size_type ep) : m_sp(sp), m_ep(ep) {}
56- result& operator =(const  result& res) {
57-     if  (this  != &res) {
58-         list_type::operator =(res);
59-         m_sp = res.m_sp ;
60-         m_ep = res.m_ep ;
61-     }
62-     return  *this ;
63- }
64- 
65-            };
66- 
67- protected: 
68- size_type m_doc_cnt; //  number of documents in the collection
69- csa_type  m_csa;     //  CSA built from the collection text
70- wtd_type  m_wtd;     //  wtd build from the collection text
71- public: 
53+                  result (size_type sp, size_type ep,list_type&& l) : list_type(l), m_sp(1 ), m_ep(0 ) {}
54+                  result () : m_sp(1 ), m_ep(0 ) {}
55+                  result (size_type sp, size_type ep) : m_sp(sp), m_ep(ep) {}
56+                  result& operator =(const  result& res) {
57+                      if  (this  != &res) {
58+                          list_type::operator =(res);
59+                          m_sp = res.m_sp ;
60+                          m_ep = res.m_ep ;
61+                      }
62+                      return  *this ;
63+                  }
64+ 
65+         };
66+ 
67+      protected: 
68+          size_type m_doc_cnt; //  number of documents in the collection
69+          csa_type  m_csa;     //  CSA built from the collection text
70+          wtd_type  m_wtd;     //  wtd build from the collection text
71+      public: 
7272
7373// ! Default constructor
74- doc_list_index_greedy () { }
75- 
76- doc_list_index_greedy (std::string file_name, sdsl::cache_config& cconfig, uint8_t  num_bytes) {
77-     construct (m_csa, file_name, cconfig, num_bytes);
78- 
79-     const  char * KEY_TEXT = key_text_trait<WIDTH>::KEY_TEXT;
80-     std::string text_file = cache_file_name (KEY_TEXT, cconfig);
81- 
82-     bit_vector doc_border;
83-     construct_doc_border (text_file,doc_border);
84-     bit_vector::rank_1_type doc_border_rank (&doc_border);
85-     m_doc_cnt = doc_border_rank (doc_border.size ());
86- 
87-     int_vector_buffer<0 > sa_buf (cache_file_name (constants ::KEY_SA, cconfig));
88-     {
89-         int_vector<> D;
90-         construct_D_array (sa_buf, doc_border_rank, m_doc_cnt, D);
91-         std::string d_file = cache_file_name (" DARRAY" 
92-         store_to_file (D, d_file);
93-         util::clear (D);
94-         construct (m_wtd, d_file);
95-         sdsl::remove (d_file);
96-     }
97- }
98- 
99- size_type doc_cnt ()const  {
100-     return  m_wtd.sigma -1 ; //  subtract one, since zero does not count
101- }
102- 
103- size_type word_cnt ()const  {
104-     return  m_wtd.size ()-doc_cnt ();
105- }
106- 
107- size_type serialize (std::ostream& out, structure_tree_node* v=NULL , std::string name=" " const  {
108-     structure_tree_node* child = structure_tree::add_child (v, name, util::class_name (*this ));
109-     size_type written_bytes = 0 ;
110-     written_bytes += write_member (m_doc_cnt, out, child, " doc_cnt" 
111-     written_bytes += m_csa.serialize (out, child, " csa" 
112-     written_bytes += m_wtd.serialize (out, child, " wtd" 
113-     structure_tree::add_size (child, written_bytes);
114-     return  written_bytes;
115- }
116- 
117- void  load (std::istream& in) {
118-     read_member (m_doc_cnt, in);
119-     m_csa.load (in);
120-     m_wtd.load (in);
121- }
122- 
123- void  swap (doc_list_index_greedy& dr) {
124-     if  (this  != &dr) {
125-         std::swap (m_doc_cnt, dr.m_doc_cnt );
126-         m_csa.swap (dr.m_csa );
127-         m_wtd.swap (dr.m_wtd );
128-     }
129- }
74+          doc_list_index_greedy () { }
75+ 
76+          doc_list_index_greedy (std::string file_name, sdsl::cache_config& cconfig, uint8_t  num_bytes) {
77+              construct (m_csa, file_name, cconfig, num_bytes);
78+ 
79+              const  char * KEY_TEXT = key_text_trait<WIDTH>::KEY_TEXT;
80+              std::string text_file = cache_file_name (KEY_TEXT, cconfig);
81+ 
82+              bit_vector doc_border;
83+              construct_doc_border (text_file,doc_border);
84+              bit_vector::rank_1_type doc_border_rank (&doc_border);
85+              m_doc_cnt = doc_border_rank (doc_border.size ());
86+ 
87+              int_vector_buffer<0 > sa_buf (cache_file_name (conf ::KEY_SA, cconfig));
88+              {
89+                  int_vector<> D;
90+                  construct_D_array (sa_buf, doc_border_rank, m_doc_cnt, D);
91+                  std::string d_file = cache_file_name (" DARRAY" 
92+                  store_to_file (D, d_file);
93+                  util::clear (D);
94+                  construct (m_wtd, d_file);
95+                  sdsl::remove (d_file);
96+              }
97+          }
98+ 
99+          size_type doc_cnt ()const  {
100+              return  m_wtd.sigma -1 ; //  subtract one, since zero does not count
101+          }
102+ 
103+          size_type word_cnt ()const  {
104+              return  m_wtd.size ()-doc_cnt ();
105+          }
106+ 
107+          size_type serialize (std::ostream& out, structure_tree_node* v=NULL , std::string name=" " const  {
108+              structure_tree_node* child = structure_tree::add_child (v, name, util::class_name (*this ));
109+              size_type written_bytes = 0 ;
110+              written_bytes += write_member (m_doc_cnt, out, child, " doc_cnt" 
111+              written_bytes += m_csa.serialize (out, child, " csa" 
112+              written_bytes += m_wtd.serialize (out, child, " wtd" 
113+              structure_tree::add_size (child, written_bytes);
114+              return  written_bytes;
115+          }
116+ 
117+          void  load (std::istream& in) {
118+              read_member (m_doc_cnt, in);
119+              m_csa.load (in);
120+              m_wtd.load (in);
121+          }
122+ 
123+          void  swap (doc_list_index_greedy& dr) {
124+              if  (this  != &dr) {
125+                  std::swap (m_doc_cnt, dr.m_doc_cnt );
126+                  m_csa.swap (dr.m_csa );
127+                  m_wtd.swap (dr.m_wtd );
128+              }
129+          }
130130
131131// ! Search for the k documents which contain the search term most frequent
132- template <class  t_pat_iter >
133- size_type search (t_pat_iter begin, t_pat_iter end, result& res, size_t  k) const  {
134-     size_type sp=1 , ep=0 ;
135-     if  (0  == backward_search (m_csa, 0 , m_csa.size ()-1 , begin, end, sp, ep)) {
136-         res = result ();
137-         return  0 ;
138-     } else  {
139-         auto  tmp_res = m_wtd.topk_greedy (sp,ep,k);
140-         res = result (sp, ep, std::move (tmp_res));
141-         return  ep-sp+1 ;
142-     }
143- }
144- 
145- private: 
132+          template <class  t_pat_iter >
133+          size_type search (t_pat_iter begin, t_pat_iter end, result& res, size_t  k) const  {
134+              size_type sp=1 , ep=0 ;
135+              if  (0  == backward_search (m_csa, 0 , m_csa.size ()-1 , begin, end, sp, ep)) {
136+                  res = result ();
137+                  return  0 ;
138+              } else  {
139+                  auto  tmp_res = m_wtd.topk_greedy (sp,ep,k);
140+                  res = result (sp, ep, std::move (tmp_res));
141+                  return  ep-sp+1 ;
142+              }
143+          }
144+ 
145+      private: 
146146// ! Construct the doc_border bitvector by streaming the text file
147- void 
148- construct_doc_border (const  std::string& text_file, bit_vector& doc_border) {
149-     int_vector_buffer<WIDTH> text_buf (text_file);
150-     doc_border = bit_vector (text_buf.size (), 0 );
151-     for  (size_type i = 0 ; i < text_buf.size (); ++i) {
152-         if  (t_doc_delim == text_buf[i]) {
153-             doc_border[i] = 1 ;
147+         void 
148+         construct_doc_border (const  std::string& text_file, bit_vector& doc_border) {
149+             int_vector_buffer<WIDTH> text_buf (text_file);
150+             doc_border = bit_vector (text_buf.size (), 0 );
151+             for  (size_type i = 0 ; i < text_buf.size (); ++i) {
152+                 if  (t_doc_delim == text_buf[i]) {
153+                     doc_border[i] = 1 ;
154+                 }
155+             }
156+         }
157+ 
158+         void 
159+         construct_D_array (int_vector_buffer<0 >& sa_buf,
160+                           bit_vector::rank_1_type& doc_border_rank,
161+                           const  size_type doc_cnt,
162+                           int_vector<>& D) {
163+             D = int_vector<>(sa_buf.size (), 0 , bits::hi (doc_cnt+1 )+1 );
164+             for  (size_type i = 0 ; i < sa_buf.size (); ++i) {
165+                 uint64_t  d = doc_border_rank (sa_buf[i]+1 );
166+                 D[i] = d;
167+             }
154168        }
155-     }
156- }
157- 
158- void 
159- construct_D_array (int_vector_buffer<0 >& sa_buf,
160-                   bit_vector::rank_1_type& doc_border_rank,
161-                   const  size_type doc_cnt,
162- int_vector<>& D) {
163-     D = int_vector<>(sa_buf.size (), 0 , bits::hi (doc_cnt+1 )+1 );
164-     for  (size_type i = 0 ; i < sa_buf.size (); ++i) {
165-         uint64_t  d = doc_border_rank (sa_buf[i]+1 );
166-         D[i] = d;
167-     }
168- }
169-       };
169+ };
170170
171171} //  end namespace
172172
0 commit comments