Skip to content

Commit

Permalink
Add test that validates fingerprint.StartsWith when read from files (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
djaglowski authored May 19, 2021
1 parent 3b22d2e commit cec0fb6
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 0 deletions.
6 changes: 6 additions & 0 deletions operator/builtin/input/file/fingerprint.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ const defaultFingerprintSize = 1000 // bytes
const minFingerprintSize = 16 // bytes

// Fingerprint is used to identify a file
// A file's fingerprint is the first N bytes of the file,
// where N is the fingerprintSize on the file_input operator
type Fingerprint struct {
FirstBytes []byte
}
Expand Down Expand Up @@ -56,6 +58,10 @@ func (f Fingerprint) Copy() *Fingerprint {

// StartsWith returns true if the fingerprints are the same
// or if the new fingerprint starts with the old one
// This is important functionality for tracking new files,
// since their initial size is typically less than that of
// a fingerprint. As the file grows, its fingerprint is updated
// until it reaches a maximum size, as configured on the operator
func (f Fingerprint) StartsWith(old *Fingerprint) bool {
l0 := len(old.FirstBytes)
if l0 == 0 {
Expand Down
55 changes: 55 additions & 0 deletions operator/builtin/input/file/fingerprint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ package file

import (
"fmt"
"io/ioutil"
"math/rand"
"strings"
"testing"

Expand Down Expand Up @@ -212,4 +214,57 @@ func TestFingerprintStartsWith(t *testing.T) {
}
}

// Generates a file filled with many random bytes, then
// writes the same bytes to a second file, one byte at a time.
// Validates, after each byte is written, that fingerprint
// matching would successfully associate the two files.
// The static file can be thought of as the present state of
// the file, while each iteration of the growing file represents
// a possible state of the same file at a previous time.
func TestFingerprintStartsWith_FromFile(t *testing.T) {
r := rand.New(rand.NewSource(112358))

operator, _, tempDir := newTestFileOperator(t, nil, nil)
operator.fingerprintSize *= 10

fileLength := 12 * operator.fingerprintSize

// Make a []byte we can write one at a time
content := make([]byte, fileLength)
r.Read(content) // Fill slice with random bytes

// Overwrite some bytes with \n to ensure
// we are testing a file with multiple lines
newlineMask := make([]byte, fileLength)
r.Read(newlineMask) // Fill slice with random bytes
for i, b := range newlineMask {
if b == 0 && i != 0 { // 1/256 chance, but never first byte
content[i] = byte('\n')
}
}

fullFile, err := ioutil.TempFile(tempDir, "")
require.NoError(t, err)
_, err = fullFile.Write(content)
require.NoError(t, err)

fff, err := operator.NewFingerprint(fullFile)
require.NoError(t, err)

partialFile, err := ioutil.TempFile(tempDir, "")
require.NoError(t, err)

// Write one byte at a time and validate that updated
// full fingerprint still starts with partial
for i := range content {
_, err = partialFile.Write(content[i:i])
require.NoError(t, err)

pff, err := operator.NewFingerprint(fullFile)
require.NoError(t, err)

require.True(t, fff.StartsWith(pff))
}
}

// TODO TestConfig (config_test.go) - sets defaults, errors appropriately, etc

0 comments on commit cec0fb6

Please sign in to comment.