Skip to content

Commit 9979f3b

Browse files
committed
refactor, cleanup errors and document loading
1 parent 692aa64 commit 9979f3b

File tree

3 files changed

+47
-12
lines changed

3 files changed

+47
-12
lines changed

cmd/dupi/unblot.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,16 +43,20 @@ func (ub *unblotCmd) Run(args []string) error {
4343
m := make(map[string][]*dupi.Doc)
4444
for i := range blot.Docs {
4545
doc := &blot.Docs[i]
46+
4647
start, end, err := idx.FindBlot(hex, doc)
4748
if err != nil {
4849
log.Printf("warning: %s", err)
4950
continue
5051
}
51-
dat := string(doc.Dat[start:end])
52+
dat := string(doc.Dat[start-doc.Start : end-doc.Start])
5253
doc.Dat = nil
5354
m[dat] = append(m[dat], doc)
5455
}
5556
for k, ds := range m {
57+
if len(ds) < 2 {
58+
continue
59+
}
5660
fmt.Printf("text:\n'''\n%s'''\n", k)
5761
for _, d := range ds {
5862
fmt.Printf("\t%s %d:%d\n", d.Path, d.Start, d.End)

doc.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,13 @@
1616
// data in large sets of documents.
1717
package dupi
1818

19+
import (
20+
"fmt"
21+
"io"
22+
"io/ioutil"
23+
"os"
24+
)
25+
1926
type Doc struct {
2027
Path string
2128
Start uint32
@@ -28,3 +35,36 @@ func NewDoc(path, body string) *Doc {
2835
Path: path,
2936
Dat: []byte(body)}
3037
}
38+
39+
func (doc *Doc) Load() error {
40+
var (
41+
f *os.File
42+
err error
43+
)
44+
45+
f, err = os.Open(doc.Path)
46+
if err != nil {
47+
return err
48+
}
49+
50+
if doc.Start == 0 && doc.End == 0 {
51+
doc.Dat, err = ioutil.ReadAll(f)
52+
if err != nil {
53+
return fmt.Errorf("readall: %w", err)
54+
}
55+
f.Close()
56+
doc.End = uint32(len(doc.Dat))
57+
} else {
58+
_, err = f.Seek(int64(doc.Start), io.SeekStart)
59+
if err != nil {
60+
return fmt.Errorf("seek: %w", err)
61+
}
62+
doc.Dat = make([]byte, doc.End-doc.Start)
63+
_, err = f.ReadAt(doc.Dat, int64(doc.Start))
64+
if err != nil {
65+
return fmt.Errorf("readat len=%d at=%d: %w\n", len(doc.Dat), doc.Start, err)
66+
}
67+
f.Close()
68+
}
69+
return nil
70+
}

index.go

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@ package dupi
1616

1717
import (
1818
"fmt"
19-
"io"
20-
"io/ioutil"
2119
"log"
2220
"os"
2321

@@ -166,17 +164,10 @@ func (x *Index) JoinBlot(shard uint32, sblot uint16) uint32 {
166164

167165
func (x *Index) FindBlot(theBlot uint32, doc *Doc) (start, end uint32, err error) {
168166
if doc.Dat == nil {
169-
var f *os.File
170-
f, err = os.Open(doc.Path)
167+
err = doc.Load()
171168
if err != nil {
172169
return
173170
}
174-
doc.Dat, err = ioutil.ReadAll(f)
175-
if err != nil {
176-
return
177-
}
178-
f.Close()
179-
doc.Dat = doc.Dat[doc.Start:doc.End]
180171
}
181172
toks := x.TokenFunc()(nil, doc.Dat, doc.Start)
182173
j := 0
@@ -203,7 +194,7 @@ func (x *Index) FindBlot(theBlot uint32, doc *Doc) (start, end uint32, err error
203194
end = tok.Pos + uint32(len(tok.Lit))
204195
return
205196
}
206-
err = io.EOF
197+
err = fmt.Errorf("blot %x not found", theBlot)
207198
return
208199
}
209200

0 commit comments

Comments
 (0)