@@ -17,6 +17,7 @@ package dupi
1717import (
1818 "fmt"
1919 "log"
20+ "math"
2021 "os"
2122
2223 "github.com/go-air/dupi/blotter"
@@ -95,10 +96,36 @@ func (x *Index) Root() string {
9596 return x .config .IndexRoot
9697}
9798
99+ func (x * Index ) Stats () (* Stats , error ) {
100+ var err error
101+ st := & Stats {}
102+ st .Root = x .config .IndexRoot
103+ st .NumBlots = 1 << 16 * uint64 (len (x .shards ))
104+ st .NumDocs , err = x .dmd .NumDocs ()
105+ if err != nil {
106+ return nil , err
107+ }
108+ st .NumPaths = uint64 (len (x .fnames .d ))
109+
110+ for i := range x .shards {
111+ shrd := & x .shards [i ]
112+ st .NumPosts += shrd .NumPosts ()
113+ }
114+ st .BlotMean = float64 (st .NumPosts ) / float64 (st .NumBlots )
115+ var sos float64
116+ for i := range x .shards {
117+ shrd := & x .shards [i ]
118+ sos += shrd .SosDiffs (st .BlotMean )
119+ }
120+ sos /= float64 (st .NumBlots )
121+ st .BlotSigma = math .Sqrt (sos )
122+ return st , nil
123+ }
124+
98125func (x * Index ) TokenFunc () token.TokenizerFunc {
99126 tf , err := token .FromConfig (& x .config .TokenConfig )
100127 if err != nil {
101- panic (err ) // should be impossible.
128+ panic (err ) // should be impossible, tf created in ctor
102129 }
103130 return tf
104131}
@@ -159,7 +186,6 @@ func (x *Index) JoinBlot(shard uint32, sblot uint16) uint32 {
159186 blot := nsh * uint32 (sblot )
160187 blot += shard
161188 return blot
162-
163189}
164190
165191func (x * Index ) FindBlot (theBlot uint32 , doc * Doc ) (start , end uint32 , err error ) {
0 commit comments