Skip to content
This repository was archived by the owner on Jun 27, 2020. It is now read-only.

Index caching #82

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions bookbrowser.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ func main() {

s := server.NewServer(*addr, *bookdir, *tempdir, curversion, true, *nocovers)
go func() {
s.LoadBookIndex()
s.RefreshBookIndex()
if len(s.Indexer.BookList()) == 0 {
log.Fatalln("Fatal error: no books found")
Expand Down
137 changes: 127 additions & 10 deletions indexer/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@ import (
"github.com/geek1011/BookBrowser/booklist"
"github.com/geek1011/BookBrowser/formats"

zglob "github.com/mattn/go-zglob"
"github.com/mattn/go-zglob"
"github.com/nfnt/resize"
"github.com/pkg/errors"
"encoding/json"
)

type Indexer struct {
Expand All @@ -25,6 +26,7 @@ type Indexer struct {
booklist booklist.BookList
mu sync.Mutex
indMu sync.Mutex
seen *SeenCache
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be an interface to allow for different cache implementations.

}

func New(paths []string, coverpath *string, exts []string) (*Indexer, error) {
Expand All @@ -45,7 +47,79 @@ func New(paths []string, coverpath *string, exts []string) (*Indexer, error) {
cp = &p
}

return &Indexer{paths: paths, coverpath: cp, exts: exts}, nil
return &Indexer{paths: paths, coverpath: cp, exts: exts, seen: NewSeenCache()}, nil
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the cache should be passed as an argument to New.

}

func (i *Indexer) Load() error {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some of this code may fit better as part of the cache. Ideally, the cache would handle it's own loading, and the indexer would query the cache during the indexing.

i.indMu.Lock()
defer i.indMu.Unlock()

booklist := booklist.BookList{}

jsonFilename := filepath.Join(*i.coverpath, "index.json")
f, err := os.Open(jsonFilename)
if err != nil {
if os.IsNotExist(err) {
return nil
} else {
return errors.Wrap(err, "could not open index cache file")
}
}
dec := json.NewDecoder(f)
err = dec.Decode(&booklist)
if err != nil {
return errors.Wrap(err, "could not decode index cache file")
}
seen := NewSeenCache()
for index, b := range booklist {
seen.Add(b.FilePath, b.FileSize, b.ModTime, index)
}

if i.Verbose {
log.Printf("Loaded %d items from index cache", len(booklist))
}

i.mu.Lock()
i.booklist = booklist
i.seen = seen
i.mu.Unlock()

return nil
}

func (i *Indexer) Save() error {
i.indMu.Lock()
defer i.indMu.Unlock()

i.mu.Lock()
booklist := i.booklist
i.mu.Unlock()

tmpFilename := filepath.Join(*i.coverpath, ".index.json.tmp")
jsonFilename := filepath.Join(*i.coverpath, "index.json")
f, err := os.Create(tmpFilename)
if err != nil {
f.Close()
return errors.Wrap(err, "could not create index cache temporary file")
}

enc := json.NewEncoder(f)
err = enc.Encode(&booklist)
if err != nil {
f.Close()
return errors.Wrap(err, "could not encode index cache file")
}

err = os.Rename(tmpFilename, jsonFilename)
if err != nil {
return errors.Wrap(err, "could not replace index cache file with temporary file")
}

if i.Verbose {
log.Printf("Saved %d items to index cache", len(booklist))
}

return nil
}

func (i *Indexer) Refresh() ([]error, error) {
Expand All @@ -62,8 +136,15 @@ func (i *Indexer) Refresh() ([]error, error) {
return errs, errors.New("no paths to index")
}

booklist := booklist.BookList{}
seen := map[string]bool{}
// seenID may be redundant at this point given that SeenCache does essentially the same thing, but
// seenCache is based on the mtime/size/filename of each book (for performance), whereas seenID is based on
// the file hash
seenID := map[string]bool{}
seen := NewSeenCache()

i.mu.Lock()
bl := i.booklist
i.mu.Unlock()

filenames := []string{}
for _, path := range i.paths {
Expand All @@ -81,29 +162,65 @@ func (i *Indexer) Refresh() ([]error, error) {
}
}

exists := make([]bool, len(bl), len(bl))

for fi, filepath := range filenames {
if i.Verbose {
log.Printf("Indexing %s", filepath)
}

book, err := i.getBook(filepath)
stat, err := os.Stat(filepath)
if err != nil {
errs = append(errs, errors.Wrapf(err, "error reading book '%s'", filepath))
errs = append(errs, errors.Wrapf(err, "cannot stat file '%s'", filepath))
if i.Verbose {
log.Printf("--> Error: %v", errs[len(errs)-1])
}
continue
}
if !seen[book.ID()] {
booklist = append(booklist, book)
seen[book.ID()] = true

var book *booklist.Book
hash := i.seen.Hash(filepath, stat.Size(), stat.ModTime())
haveSeen, blIndex := i.seen.SeenHash(hash)
if haveSeen {
exists[blIndex] = true
seen.AddHash(hash, blIndex)
if i.Verbose {
log.Printf("Already seen; not reindexing")
}
} else {
// TODO: pass stat variable to i.getBook() to avoid a duplicate os.Stat() for each book
book, err = i.getBook(filepath)
if err != nil {
errs = append(errs, errors.Wrapf(err, "error reading book '%s'", filepath))
if i.Verbose {
log.Printf("--> Error: %v", errs[len(errs)-1])
}
continue
}
if !seenID[book.ID()] {
bl = append(bl, book)
seenID[book.ID()] = true
blIndex = len(bl) - 1
seen.AddHash(hash, blIndex)
}
}

i.Progress = float64(fi+1) / float64(len(filenames))
}

// remove any books that have disappeared since our last indexing job
lastEntry := len(bl)-1
for index, stillExists := range exists {
if !stillExists {
bl[index] = bl[lastEntry]
lastEntry--
}
}
bl = bl[0:lastEntry+1]

i.mu.Lock()
i.booklist = booklist
i.booklist = bl
i.seen = seen
i.mu.Unlock()

return errs, nil
Expand Down
48 changes: 48 additions & 0 deletions indexer/seencache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package indexer

import (
"fmt"
"crypto/sha1"
"time"
)

type SeenCache struct {
seen map[string]int
}

func NewSeenCache() *SeenCache {
return &SeenCache{seen: make(map[string]int)}
}

func (c *SeenCache) Hash(filePath string, fileSize int64, modTime time.Time) string {
token := fmt.Sprintf("%08d|%s|%s",fileSize,modTime,filePath)
return fmt.Sprintf("%x", sha1.Sum([]byte(token)))[:10]
}

func (c *SeenCache) Clear() {
c.seen = make(map[string]int)
}

func (c *SeenCache) Add(filePath string, fileSize int64, modTime time.Time, index int) string {
hash := c.Hash(filePath, fileSize, modTime)
c.seen[hash] = index
return hash
}

func (c *SeenCache) AddHash(hash string, index int) {
c.seen[hash] = index
}

func (c *SeenCache) Seen(filePath string, fileSize int64, modTime time.Time) (bool, string, int) {
hash := c.Hash(filePath, fileSize, modTime)
if index, exists := c.seen[hash]; exists {
return true, hash, index
} else {
return false, "", -1
}
}

func (c *SeenCache) SeenHash(hash string) (bool, int) {
index, exists := c.seen[hash]
return exists, index
}
14 changes: 14 additions & 0 deletions server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,13 @@ func (s *Server) printLog(format string, v ...interface{}) {
}
}

func (s *Server) LoadBookIndex() error {
return s.Indexer.Load()
}
func (s *Server) SaveBookIndex() error {
return s.Indexer.Save()
}

// RefreshBookIndex refreshes the book index
func (s *Server) RefreshBookIndex() error {
errs, err := s.Indexer.Refresh()
Expand All @@ -91,6 +98,13 @@ func (s *Server) RefreshBookIndex() error {
}

debug.FreeOSMemory()

err = s.Indexer.Save()
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be able to be disabled using a command line flag.

if err != nil {
log.Printf("Error saving index: %s",err)
return err
}

return nil
}

Expand Down