@@ -72,6 +72,7 @@ class wt_int
7272        typedef  t_select_zero                        select_0_type;
7373        typedef  wt_tag                               index_category;
7474        typedef  int_alphabet_tag                     alphabet_category;
75+         enum  	{lex_ordered=1 };
7576
7677        typedef  std::pair<value_type, size_type>     point_type;
7778        typedef  std::vector<point_type>              point_vec_type;
@@ -111,6 +112,48 @@ class wt_int
111112            m_path_rank_off = int_vector<64 >(max_depth+1 );
112113        }
113114
115+         //  recursive internal version of the method interval_symbols
116+         void  _interval_symbols (size_type i, size_type j, size_type& k,
117+                                std::vector<value_type>& cs,
118+                                std::vector<size_type>& rank_c_i,
119+                                std::vector<size_type>& rank_c_j,
120+                                size_type depth,
121+                                size_type path,
122+                                size_type node_size,
123+                                size_type offset) const  {
124+             //  invariant: j>i
125+ 
126+             if  (depth >= m_max_depth) {
127+                 rank_c_i[k]= i;
128+                 rank_c_j[k]= j;
129+                 cs[k++]= path;
130+                 return ;
131+             }
132+ 
133+             size_type ones_before_o = m_tree_rank (offset);
134+             size_type ones_before_i = m_tree_rank (offset+i) - ones_before_o;
135+             size_type ones_before_j = m_tree_rank (offset+j) - ones_before_o;
136+             size_type ones_before_end = m_tree_rank (offset+ node_size) - ones_before_o;
137+ 
138+             //  goto left child
139+             if  ((j-i)-(ones_before_j-ones_before_i)>0 ) {
140+                 size_type new_offset = offset + m_size;
141+                 size_type new_node_size = node_size - ones_before_end;
142+                 size_type new_i = i - ones_before_i;
143+                 size_type new_j = j - ones_before_j;
144+                 _interval_symbols (new_i, new_j, k, cs, rank_c_i, rank_c_j, depth+1 , path<<1 , new_node_size, new_offset);
145+             }
146+ 
147+             //  goto right child
148+             if  ((ones_before_j-ones_before_i)>0 ) {
149+                 size_type new_offset = offset+(node_size - ones_before_end) + m_size;
150+                 size_type new_node_size = ones_before_end;
151+                 size_type new_i = ones_before_i;
152+                 size_type new_j = ones_before_j;
153+                 _interval_symbols (new_i, new_j, k, cs, rank_c_i, rank_c_j, depth+1 , (path<<1 )|1 , new_node_size, new_offset);
154+             }
155+         }
156+ 
114157    public: 
115158
116159        const  size_type&       sigma = m_sigma; // !< Effective alphabet size of the wavelet tree.
@@ -263,8 +306,10 @@ class wt_int
263306        }
264307
265308        // ! Recovers the i-th symbol of the original vector.
266-         /* ! \param i The index of the symbol in the original vector. \f$i \in [0..size()-1]\f$ 
309+         /* ! \param i The index of the symbol in the original vector.
267310         *  \returns The i-th symbol of the original vector. 
311+          *  \par Precondition 
312+          *       \f$ i < size() \f$ 
268313         */  
269314        value_type operator [](size_type i)const  {
270315            assert (i < size ());
@@ -296,12 +341,17 @@ class wt_int
296341         *  \param c The symbol to count the occurrences in the prefix. 
297342         *    \returns The number of occurrences of symbol c in the prefix [0..i-1] of the supported vector. 
298343         *  \par Time complexity 
299-          *        \f$ \Order{\log |\Sigma|} \f$ 
344+          *       \f$ \Order{\log |\Sigma|} \f$ 
345+          *  \par Precondition 
346+          *       \f$ i \leq size() \f$ 
300347         */  
301348        size_type rank (size_type i, value_type c)const  {
302349            assert (i <= size ());
350+             if  (((1ULL )<<(m_max_depth))<=c) { //  c is greater than any symbol in wt
351+                 return  0 ;
352+             }
303353            size_type offset = 0 ;
304-             uint64_t  mask      = (1ULL ) << (m_max_depth-1 );
354+             uint64_t  mask = (1ULL ) << (m_max_depth-1 );
305355            size_type node_size = m_size;
306356            for  (uint32_t  k=0 ; k < m_max_depth and  i; ++k) {
307357                size_type ones_before_o   = m_tree_rank (offset);
@@ -327,27 +377,48 @@ class wt_int
327377        /* !
328378         *  \param i The index of the symbol. 
329379         *  \return  Pair (rank(wt[i],i),wt[i]) 
380+          *  \par Precondition 
381+          *       \f$ i < size() \f$ 
330382         */  
331383        std::pair<size_type, value_type>
332384        inverse_select (size_type i)const  {
333385            assert (i < size ());
334-             value_type c = (*this )[i];
335-             return  std::make_pair (rank (i, c),c);
386+ 
387+             value_type c = 0 ;
388+             size_type node_size = m_size, offset = 0 ;
389+             for  (uint32_t  k=0 ; k < m_max_depth; ++k) {
390+                 size_type ones_before_o   = m_tree_rank (offset);
391+                 size_type ones_before_i   = m_tree_rank (offset + i) - ones_before_o;
392+                 size_type ones_before_end = m_tree_rank (offset + node_size) - ones_before_o;
393+                 c<<=1 ;
394+                 if  (m_tree[offset+i]) { //  go to the right child
395+                     offset += (node_size - ones_before_end);
396+                     node_size = ones_before_end;
397+                     i = ones_before_i;
398+                     c|=1 ;
399+                 } else  { //  go to the left child
400+                     node_size = (node_size - ones_before_end);
401+                     i = (i-ones_before_i);
402+                 }
403+                 offset += m_size;
404+             }
405+             return  std::make_pair (i,c);
336406        }
337407
338408        // ! Calculates the i-th occurrence of the symbol c in the supported vector.
339409        /* !
340-          *  \param i The i-th occurrence. \f$i\in [1..rank(size(),c)]\f$.  
410+          *  \param i The i-th occurrence. 
341411         *  \param c The symbol c. 
342412         *  \par Time complexity 
343-          *        \f$ \Order{\log |\Sigma|} \f$ 
413+          *       \f$ \Order{\log |\Sigma|} \f$ 
414+          *  \par Precondition 
415+          *       \f$ 1 \leq i \leq rank(size(), c) \f$ 
344416         */  
345417        size_type select (size_type i, value_type c)const  {
346-             assert (i > 0 );
347-             assert (i <= rank (size (), c));
418+             assert (1  <= i and  i <= rank (size (), c));
348419            //  possible optimization: if the array is a permutation we can start at the bottom of the tree
349420            size_type offset = 0 ;
350-             uint64_t  mask      = (1ULL ) << (m_max_depth-1 );
421+             uint64_t  mask    = (1ULL ) << (m_max_depth-1 );
351422            size_type node_size = m_size;
352423            m_path_off[0 ] = m_path_rank_off[0 ] = 0 ;
353424
@@ -383,6 +454,136 @@ class wt_int
383454            return  i-1 ;
384455        };
385456
457+ 
458+         // ! For each symbol c in wt[i..j-1] get rank(i,c) and rank(j,c).
459+         /* !
460+          * \param i        The start index (inclusive) of the interval. 
461+          * \param j        The end index (exclusive) of the interval. 
462+          * \param k        Reference for number of different symbols in [i..j-1]. 
463+          * \param cs       Reference to a vector that will contain in 
464+          *                 cs[0..k-1] all symbols that occur in [i..j-1] in 
465+          *                 ascending order. 
466+          * \param rank_c_i Reference to a vector which equals 
467+          *                 rank_c_i[p] = rank(i,cs[p]), for \f$ 0 \leq p < k \f$. 
468+          * \param rank_c_j Reference to a vector which equals 
469+          *                 rank_c_j[p] = rank(j,cs[p]), for \f$ 0 \leq p < k \f$. 
470+          * \par Time complexity 
471+          *      \f$ \Order{\min{\sigma, k \log \sigma}} \f$ 
472+          * 
473+          * \par Precondition 
474+          *      \f$ i \leq j \leq size() \f$ 
475+          *      \f$ cs.size() \geq \sigma \f$ 
476+          *      \f$ rank_{c_i}.size() \geq \sigma \f$ 
477+          *      \f$ rank_{c_j}.size() \geq \sigma \f$ 
478+          */  
479+         void  interval_symbols (size_type i, size_type j, size_type& k,
480+                               std::vector<value_type>& cs,
481+                               std::vector<size_type>& rank_c_i,
482+                               std::vector<size_type>& rank_c_j) const  {
483+             assert (i <= j and  j <= size ());
484+             k=0 ;
485+             if  (i==j) {
486+                 return ;
487+             }
488+             if  ((i+1 )==j) {
489+                 auto  res = inverse_select (i);
490+                 cs[0 ]=res.second ;
491+                 rank_c_i[0 ]=res.first ;
492+                 rank_c_j[0 ]=res.first +1 ;
493+                 k=1 ;
494+                 return ;
495+             }
496+ 
497+             _interval_symbols (i, j, k, cs, rank_c_i, rank_c_j, 0 , 0 , m_size, 0 );
498+ 
499+         }
500+ 
501+         // ! How many symbols are lexicographic smaller/greater than c in [i..j-1].
502+         /* !
503+          * \param i       Start index (inclusive) of the interval. 
504+          * \param j       End index (exclusive) of the interval. 
505+          * \param c       Symbol c. 
506+          * \return A triple containing: 
507+          *         * rank(c,i) 
508+          *         * #symbols smaller than c in [i..j-1] 
509+          *         * #symbols greater than c in [i..j-1] 
510+          * 
511+          * \par Precondition 
512+          *      \f$ i \leq j \leq size() \f$ 
513+          */  
514+         template <class  t_ret_type  = std::tuple<size_type, size_type, size_type>>
515+         t_ret_type lex_count (size_type i, size_type j, value_type c)const  {
516+             assert (i <= j and  j <= size ());
517+             if  (((1ULL )<<(m_max_depth))<=c) { //  c is greater than any symbol in wt
518+                 return  t_ret_type {0 , j-i, 0 };
519+             }
520+             size_type offset  = 0 ;
521+             size_type smaller = 0 ;
522+             size_type greater = 0 ;
523+             uint64_t  mask     = (1ULL ) << (m_max_depth-1 );
524+             size_type node_size = m_size;
525+             for  (uint32_t  k=0 ; k < m_max_depth; ++k) {
526+                 size_type ones_before_o   = m_tree_rank (offset);
527+                 size_type ones_before_i   = m_tree_rank (offset + i) - ones_before_o;
528+                 size_type ones_before_j   = m_tree_rank (offset + j) - ones_before_o;
529+                 size_type ones_before_end = m_tree_rank (offset + node_size) - ones_before_o;
530+                 if  (c & mask) { //  search for a one at this level
531+                     offset += (node_size - ones_before_end);
532+                     node_size = ones_before_end;
533+                     smaller += j-i-ones_before_j+ones_before_i;
534+                     i = ones_before_i;
535+                     j = ones_before_j;
536+                 } else  { //  search for a zero at this level
537+                     node_size -= ones_before_end;
538+                     greater += ones_before_j-ones_before_i;
539+                     i -= ones_before_i;
540+                     j -= ones_before_j;
541+                 }
542+                 offset += m_size;
543+                 mask >>= 1 ;
544+             }
545+             return  t_ret_type {i, smaller, greater};
546+         };
547+ 
548+         // ! How many symbols are lexicographic smaller than c in [0..i-1].
549+         /* !
550+          * \param i Exclusive right bound of the range. 
551+          * \param c Symbol c. 
552+          * \return A tuple containing: 
553+          *         * rank(c,i) 
554+          *         * #symbols smaller than c in [0..i-1] 
555+          * \par Precondition 
556+          *      \f$ i \leq size() \f$ 
557+          */  
558+         template <class  t_ret_type  = std::tuple<size_type, size_type>>
559+         t_ret_type lex_smaller_count (size_type i, value_type c) const  {
560+             assert (i <= size ());
561+             if  (((1ULL )<<(m_max_depth))<=c) { //  c is greater than any symbol in wt
562+                 return  t_ret_type {0 , i};
563+             }
564+             size_type offset = 0 ;
565+             size_type result = 0 ;
566+             uint64_t  mask    = (1ULL ) << (m_max_depth-1 );
567+             size_type node_size = m_size;
568+             for  (uint32_t  k=0 ; k < m_max_depth and  i; ++k) {
569+                 size_type ones_before_o   = m_tree_rank (offset);
570+                 size_type ones_before_i   = m_tree_rank (offset + i) - ones_before_o;
571+                 size_type ones_before_end = m_tree_rank (offset + node_size) - ones_before_o;
572+                 if  (c & mask) { //  search for a one at this level
573+                     offset   += (node_size - ones_before_end);
574+                     node_size = ones_before_end;
575+                     result   += i - ones_before_i;
576+                     i         = ones_before_i;
577+                 } else  { //  search for a zero at this level
578+                     node_size = (node_size - ones_before_end);
579+                     i        -= ones_before_i;
580+                 }
581+                 offset += m_size;
582+                 mask >>= 1 ;
583+             }
584+             return  t_ret_type {i, result};
585+         }
586+ 
386587        // ! range_search_2d searches points in the index interval [lb..rb] and value interval [vlb..vrb].
387588        /* ! \param lb     Left bound of index interval (inclusive)
388589         *  \param rb     Right bound of index interval (inclusive) 
0 commit comments