Skip to content

Commit cec0fb6

Browse files
authored
Add test that validates fingerprint.StartsWith when read from files (#145)
1 parent 3b22d2e commit cec0fb6

File tree

2 files changed

+61
-0
lines changed

2 files changed

+61
-0
lines changed

operator/builtin/input/file/fingerprint.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ const defaultFingerprintSize = 1000 // bytes
2525
const minFingerprintSize = 16 // bytes
2626

2727
// Fingerprint is used to identify a file
28+
// A file's fingerprint is the first N bytes of the file,
29+
// where N is the fingerprintSize on the file_input operator
2830
type Fingerprint struct {
2931
FirstBytes []byte
3032
}
@@ -56,6 +58,10 @@ func (f Fingerprint) Copy() *Fingerprint {
5658

5759
// StartsWith returns true if the fingerprints are the same
5860
// or if the new fingerprint starts with the old one
61+
// This is important functionality for tracking new files,
62+
// since their initial size is typically less than that of
63+
// a fingerprint. As the file grows, its fingerprint is updated
64+
// until it reaches a maximum size, as configured on the operator
5965
func (f Fingerprint) StartsWith(old *Fingerprint) bool {
6066
l0 := len(old.FirstBytes)
6167
if l0 == 0 {

operator/builtin/input/file/fingerprint_test.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ package file
1616

1717
import (
1818
"fmt"
19+
"io/ioutil"
20+
"math/rand"
1921
"strings"
2022
"testing"
2123

@@ -212,4 +214,57 @@ func TestFingerprintStartsWith(t *testing.T) {
212214
}
213215
}
214216

217+
// Generates a file filled with many random bytes, then
218+
// writes the same bytes to a second file, one byte at a time.
219+
// Validates, after each byte is written, that fingerprint
220+
// matching would successfully associate the two files.
221+
// The static file can be thought of as the present state of
222+
// the file, while each iteration of the growing file represents
223+
// a possible state of the same file at a previous time.
224+
func TestFingerprintStartsWith_FromFile(t *testing.T) {
225+
r := rand.New(rand.NewSource(112358))
226+
227+
operator, _, tempDir := newTestFileOperator(t, nil, nil)
228+
operator.fingerprintSize *= 10
229+
230+
fileLength := 12 * operator.fingerprintSize
231+
232+
// Make a []byte we can write one at a time
233+
content := make([]byte, fileLength)
234+
r.Read(content) // Fill slice with random bytes
235+
236+
// Overwrite some bytes with \n to ensure
237+
// we are testing a file with multiple lines
238+
newlineMask := make([]byte, fileLength)
239+
r.Read(newlineMask) // Fill slice with random bytes
240+
for i, b := range newlineMask {
241+
if b == 0 && i != 0 { // 1/256 chance, but never first byte
242+
content[i] = byte('\n')
243+
}
244+
}
245+
246+
fullFile, err := ioutil.TempFile(tempDir, "")
247+
require.NoError(t, err)
248+
_, err = fullFile.Write(content)
249+
require.NoError(t, err)
250+
251+
fff, err := operator.NewFingerprint(fullFile)
252+
require.NoError(t, err)
253+
254+
partialFile, err := ioutil.TempFile(tempDir, "")
255+
require.NoError(t, err)
256+
257+
// Write one byte at a time and validate that updated
258+
// full fingerprint still starts with partial
259+
for i := range content {
260+
_, err = partialFile.Write(content[i:i])
261+
require.NoError(t, err)
262+
263+
pff, err := operator.NewFingerprint(fullFile)
264+
require.NoError(t, err)
265+
266+
require.True(t, fff.StartsWith(pff))
267+
}
268+
}
269+
215270
// TODO TestConfig (config_test.go) - sets defaults, errors appropriately, etc

0 commit comments

Comments
 (0)