-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtesting_file_system_store.go
More file actions
192 lines (161 loc) · 5.36 KB
/
Copy pathtesting_file_system_store.go
File metadata and controls
192 lines (161 loc) · 5.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
package bloomsearch
import (
"context"
"encoding/binary"
"fmt"
"io"
"os"
"path/filepath"
"strings"
)
type FileSystemDataStore struct {
rootDir string
}
type FileSystemDataStoreFilePointer struct {
ID string
}
func NewFileSystemDataStore(rootDir string) *FileSystemDataStore {
// Make dir if not exists
if _, err := os.Stat(rootDir); os.IsNotExist(err) {
os.MkdirAll(rootDir, 0755)
}
return &FileSystemDataStore{
rootDir: rootDir,
}
}
func (fs *FileSystemDataStore) OpenFile(ctx context.Context, filePointerBytes []byte) (io.ReadSeekCloser, error) {
filePath := string(filePointerBytes)
file, err := os.Open(filePath)
if err != nil {
return nil, err
}
return file, nil
}
func (fs *FileSystemDataStore) CreateFile(ctx context.Context) (io.WriteCloser, []byte, error) {
file, err := os.CreateTemp(fs.rootDir, "bloom-*.dat")
if err != nil {
return nil, nil, err
}
filePath := file.Name()
return file, []byte(filePath), nil
}
func (fs *FileSystemDataStore) TombstoneFile(ctx context.Context, filePointerBytes []byte) error {
if err := os.Remove(string(filePointerBytes)); err != nil && !os.IsNotExist(err) {
return err
}
return nil
}
// readFileMetadata reads the file metadata from a bloom file
func (fs *FileSystemDataStore) readFileMetadata(filePath string) (*FileMetadata, error) {
file, err := os.Open(filePath)
if err != nil {
return nil, fmt.Errorf("failed to open file %s: %w", filePath, err)
}
defer file.Close()
// Get file size
stat, err := file.Stat()
if err != nil {
return nil, fmt.Errorf("failed to stat file %s: %w", filePath, err)
}
fileSize := stat.Size()
// Check if file is large enough to contain the footer
// Footer: [8 bytes magic] + [4 bytes version] + [4 bytes metadata length] + [HashSize bytes metadata hash]
minFooterSize := int64(8 + 4 + 4 + HashSize)
if fileSize < minFooterSize {
return nil, fmt.Errorf("file %s is too small to be a valid bloom file", filePath)
}
// Read magic bytes from the end
magicBytes := make([]byte, 8)
_, err = file.ReadAt(magicBytes, fileSize-8)
if err != nil {
return nil, fmt.Errorf("failed to read magic bytes from %s: %w", filePath, err)
}
// Verify magic bytes
if string(magicBytes) != MagicBytes {
return nil, fmt.Errorf("invalid magic bytes in file %s", filePath)
}
// Read file version
versionBytes := make([]byte, 4)
_, err = file.ReadAt(versionBytes, fileSize-8-4)
if err != nil {
return nil, fmt.Errorf("failed to read version from %s: %w", filePath, err)
}
version := binary.LittleEndian.Uint32(versionBytes)
// Verify version
if version != FileVersion {
return nil, fmt.Errorf("unsupported file version %d in file %s", version, filePath)
}
// Read metadata length
metadataLengthBytes := make([]byte, 4)
_, err = file.ReadAt(metadataLengthBytes, fileSize-8-4-4)
if err != nil {
return nil, fmt.Errorf("failed to read metadata length from %s: %w", filePath, err)
}
metadataLength := binary.LittleEndian.Uint32(metadataLengthBytes)
// Read metadata hash
metadataHashBytes := make([]byte, HashSize)
_, err = file.ReadAt(metadataHashBytes, fileSize-8-4-4-int64(HashSize))
if err != nil {
return nil, fmt.Errorf("failed to read metadata hash from %s: %w", filePath, err)
}
// Read metadata
metadataBytes := make([]byte, metadataLength)
metadataOffset := fileSize - 8 - 4 - 4 - int64(HashSize) - int64(metadataLength)
_, err = file.ReadAt(metadataBytes, metadataOffset)
if err != nil {
return nil, fmt.Errorf("failed to read metadata from %s: %w", filePath, err)
}
// Parse and verify metadata
metadata, err := FileMetadataFromBytesWithHash(metadataBytes, metadataHashBytes)
if err != nil {
return nil, fmt.Errorf("failed to parse metadata from %s: %w", filePath, err)
}
return metadata, nil
}
func (fs *FileSystemDataStore) GetMaybeFilesForQuery(ctx context.Context, query *QueryPrefilter) ([]MaybeFile, error) {
files, err := os.ReadDir(fs.rootDir)
if err != nil {
return nil, err
}
maybeFiles := make([]MaybeFile, 0, len(files))
for _, file := range files {
// Skip directories and non-bloom files
if file.IsDir() || !strings.HasSuffix(file.Name(), ".dat") {
continue
}
filePath := filepath.Join(fs.rootDir, file.Name())
// Read file metadata from bloom file
fileMetadata, err := fs.readFileMetadata(filePath)
if err != nil {
// Skip files with read errors (they might not be valid bloom files)
return nil, fmt.Errorf("failed to read file metadata from %s: %w", filePath, err)
}
// Filter data blocks based on query conditions
fileMetadata.DataBlocks = FilterDataBlocks(fileMetadata.DataBlocks, query)
stat, err := os.Stat(filePath)
if err != nil {
return nil, fmt.Errorf("failed to stat file %s: %w", filePath, err)
}
fileSize := stat.Size()
// Only include files that have matching data blocks (or all files if no query conditions)
if query == nil || len(fileMetadata.DataBlocks) > 0 {
maybeFiles = append(maybeFiles, MaybeFile{
PointerBytes: []byte(filePath),
Metadata: *fileMetadata,
Size: int(fileSize),
})
}
}
return maybeFiles, nil
}
func (fs *FileSystemDataStore) Update(ctx context.Context, writes []WriteOperation, deletes []DeleteOperation) error {
// writes are no-op, it's stored in the files
for _, delete := range deletes {
os.Remove(string(delete.FilePointerBytes))
}
return nil
}
func init() {
var _ DataStore = &FileSystemDataStore{}
var _ MetaStore = &FileSystemDataStore{}
}