@@ -99,7 +99,7 @@ class doc_list_index_sada
9999
100100 private:
101101 size_type m_doc_cnt; // number of documents in the collection
102- csa_full_type m_full_csa ; // CSA build from the collection text
102+ csa_full_type m_csa_full ; // CSA build from the collection text
103103 vector<int_vector<>> m_doc_isa; // array of inverse SAs. m_doc_isa[i] contains the ISA of document i
104104 range_min_type m_rminq; // range minimum data structure build over an array Cprev
105105 range_max_type m_rmaxq; // range maximum data structure build over an array Cnext
@@ -116,7 +116,7 @@ class doc_list_index_sada
116116 doc_list_index_sada () { }
117117
118118 doc_list_index_sada (std::string file_name, sdsl::cache_config& cconfig, uint8_t num_bytes) {
119- construct (m_full_csa , file_name, cconfig, num_bytes);
119+ construct (m_csa_full , file_name, cconfig, num_bytes);
120120
121121 const char * KEY_TEXT = key_text_trait<WIDTH>::KEY_TEXT;
122122 std::string text_file = cache_file_name (KEY_TEXT, cconfig);
@@ -154,14 +154,14 @@ class doc_list_index_sada
154154 }
155155
156156 size_type word_cnt ()const {
157- return m_full_csa .size ()-doc_cnt ();
157+ return m_csa_full .size ()-doc_cnt ();
158158 }
159159
160160 size_type serialize (std::ostream& out, structure_tree_node* v=NULL , std::string name=" " )const {
161161 structure_tree_node* child = structure_tree::add_child (v, name, util::class_name (*this ));
162162 size_type written_bytes = 0 ;
163163 written_bytes += write_member (m_doc_cnt, out, child, " doc_cnt" );
164- written_bytes += m_full_csa .serialize (out, child, " full_csa " );
164+ written_bytes += m_csa_full .serialize (out, child, " csa_full " );
165165 written_bytes += serialize_vector (m_doc_isa, out, child, " doc_isa" );
166166 written_bytes += m_rminq.serialize (out, child, " rminq" );
167167 written_bytes += m_rmaxq.serialize (out, child, " rmaxq" );
@@ -176,7 +176,7 @@ class doc_list_index_sada
176176
177177 void load (std::istream& in) {
178178 read_member (m_doc_cnt, in);
179- m_full_csa .load (in);
179+ m_csa_full .load (in);
180180 m_doc_isa.resize (m_doc_cnt);
181181 load_vector (m_doc_isa, in);
182182 m_rminq.load (in);
@@ -195,7 +195,7 @@ class doc_list_index_sada
195195 void swap (doc_list_index_sada& dr) {
196196 if (this != &dr) {
197197 std::swap (m_doc_cnt, dr.m_doc_cnt );
198- m_full_csa .swap (dr.m_full_csa );
198+ m_csa_full .swap (dr.m_csa_full );
199199 m_doc_isa.swap (dr.m_doc_isa );
200200 m_rminq.swap (dr.m_rminq );
201201 m_rmaxq.swap (dr.m_rmaxq );
@@ -218,7 +218,7 @@ class doc_list_index_sada
218218 result& res,
219219 size_t k) const {
220220 size_type sp=1 , ep=0 ;
221- if (0 == backward_search (m_full_csa , 0 , m_full_csa .size ()-1 , begin, end, sp, ep)) {
221+ if (0 == backward_search (m_csa_full , 0 , m_csa_full .size ()-1 , begin, end, sp, ep)) {
222222 res = result ();
223223 return 0 ;
224224 } else {
@@ -249,46 +249,64 @@ class doc_list_index_sada
249249 m_doc_rmax_marked[doc] = 0 ; // get_lex_largest_suffixes
250250
251251 if (suffix_1 == suffix_2) { // if pattern occurs exactly once
252- res.push_back ({doc,1 }); // add the #occurrence
252+ res.push_back ( {doc,1 }); // add the #occurrence
253253 } else {
254254 size_type doc_begin = doc ? m_doc_border_select (doc) + 1 : 0 ;
255255 size_type doc_sp = m_doc_isa[doc][ suffix_1 - doc_begin ];
256256 size_type doc_ep = m_doc_isa[doc][ suffix_2 - doc_begin ];
257257 if (doc_sp > doc_ep) {
258258 std::swap (doc_sp, doc_ep);
259259 }
260- res.push_back ({doc, doc_ep - doc_sp + 1 });
260+ res.push_back ( {doc, doc_ep - doc_sp + 1 });
261261 }
262262 }
263263 }
264264
265- void get_lex_smallest_suffixes (size_type sp, size_type ep, vector<size_type>& suffixes)const {
266- if (sp > ep)
267- return ;
268- size_type min_idx = m_rminq (sp, ep);
269- size_type suffix = m_full_csa[min_idx];
270- size_type doc = m_doc_border_rank (suffix+1 );
271-
272- if (!m_doc_rmin_marked[doc]) {
273- suffixes.push_back (suffix);
274- m_doc_rmin_marked[doc] = 1 ;
275- get_lex_smallest_suffixes (sp, min_idx - 1 , suffixes); // min_idx != 0, since `\0` is appended to string
276- get_lex_smallest_suffixes (min_idx+1 , ep, suffixes);
265+ void get_lex_smallest_suffixes (size_type sp, size_type ep, vector<size_type>& suffixes) const {
266+ using lex_range_t = std::pair<size_type,size_type>;
267+ std::stack<lex_range_t > stack;
268+ stack.emplace (sp,ep);
269+ while (!stack.empty ()) {
270+ auto range = stack.top ();
271+ stack.pop ();
272+ size_type rsp = std::get<0 >(range);
273+ size_type rep = std::get<1 >(range);
274+ if (rsp <= rep) {
275+ size_type min_idx = m_rminq (rsp,rep);
276+ size_type suffix = m_csa_full[min_idx];
277+ size_type doc = m_doc_border_rank (suffix+1 );
278+
279+ if (!m_doc_rmin_marked[doc]) {
280+ suffixes.push_back (suffix);
281+ m_doc_rmin_marked[doc] = 1 ;
282+ stack.emplace (min_idx+1 ,rep);
283+ stack.emplace (rsp,min_idx-1 ); // min_idx != 0, since `\0` is appended to string
284+ }
285+ }
277286 }
278287 }
279288
280- void get_lex_largest_suffixes (size_type sp, size_type ep, vector<size_type>& suffixes)const {
281- if (sp > ep)
282- return ;
283- size_type max_idx = m_rmaxq (sp, ep);
284- size_type suffix = m_full_csa[max_idx];
285- size_type doc = m_doc_border_rank (suffix+1 );
286-
287- if (!m_doc_rmax_marked[doc]) {
288- suffixes.push_back (suffix);
289- m_doc_rmax_marked[doc] = 1 ;
290- get_lex_largest_suffixes (max_idx+1 , ep, suffixes);
291- get_lex_largest_suffixes (sp, max_idx - 1 , suffixes); // max_idx != 0, since `\0` is appended to string
289+ void get_lex_largest_suffixes (size_type sp, size_type ep, vector<size_type>& suffixes) const {
290+ using lex_range_t = std::pair<size_type,size_type>;
291+ std::stack<lex_range_t > stack;
292+ stack.emplace (sp,ep);
293+ while (!stack.empty ()) {
294+ auto range = stack.top ();
295+ stack.pop ();
296+ size_type rsp = std::get<0 >(range);
297+ size_type rep = std::get<1 >(range);
298+ if (rsp <= rep) {
299+ size_type max_idx = m_rmaxq (rsp,rep);
300+ size_type suffix = m_csa_full[max_idx];
301+ size_type doc = m_doc_border_rank (suffix+1 );
302+
303+ if (!m_doc_rmax_marked[doc]) {
304+ suffixes.push_back (suffix);
305+ m_doc_rmax_marked[doc] = 1 ;
306+ stack.emplace (rsp,max_idx - 1 ); // max_idx != 0, since `\0` is appended to string
307+ stack.emplace (max_idx+1 ,rep);
308+ }
309+ }
292310 }
293311 }
294312
0 commit comments