Skip to content

Commit 875d819

Browse files
authored
Add SQLite scanner (#28)
1 parent 1e15435 commit 875d819

File tree

2 files changed

+110
-0
lines changed

2 files changed

+110
-0
lines changed

internal/format/header.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ var fileHeaders = []FileHeader{
5454
zipFileHeader,
5555
rarFileHeader,
5656
pdfFileHeader,
57+
// database formats
58+
sqliteFileHeader,
5759
}
5860

5961
func GetFileScanners(ext ...string) ([]FileScanner, error) {

internal/format/sqlite.go

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
// Copyright (c) 2025 Stefano Scafiti
2+
//
3+
// Permission is hereby granted, free of charge, to any person obtaining a copy
4+
// of this software and associated documentation files (the "Software"), to deal
5+
// in the Software without restriction, including without limitation the rights
6+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7+
// copies of the Software, and to permit persons to whom the Software is
8+
// furnished to do so, subject to the following conditions:
9+
//
10+
// The above copyright notice and this permission notice shall be included in
11+
// all copies or substantial portions of the Software.
12+
//
13+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19+
// THE SOFTWARE.
20+
package format
21+
22+
import (
23+
"bytes"
24+
"encoding/binary"
25+
"fmt"
26+
)
27+
28+
const SQLiteSignature = "SQLite format 3\x00"
29+
30+
var sqliteFileHeader = FileHeader{
31+
Ext: "sqlite",
32+
Description: "SQLite Database Format",
33+
Signatures: [][]byte{
34+
[]byte(SQLiteSignature),
35+
},
36+
ScanFile: ScanSQLite,
37+
}
38+
39+
// ScanSqlite tries to carve a single SQLite DB starting at offset 0 in the reader.
40+
func ScanSQLite(r *Reader) (*ScanResult, error) {
41+
// SQLite 3 Database Header Structure: https://www.sqlite.org/fileformat2.html#the_database_header
42+
// -----------------------------------------
43+
// Magic (16 bytes) "SQLite format 3\0" magic string
44+
// PageSize (2 bytes) Big-endian uint16; DB page size in bytes (must be power of 2, 512–65536)
45+
// FFWrite (1 byte) File format write version
46+
// FFRead (1 byte) File format read version
47+
// Reserved (1 byte) Reserved for future use
48+
// MaxEmbPayloadFrac (1 byte) Maximum embedded payload fraction
49+
// MinEmbPayloadFrac (1 byte) Minimum embedded payload fraction
50+
// LeafPayloadFrac (1 byte) Leaf payload fraction
51+
// FileChangeCounter (4 bytes) Big-endian uint32; file change counter
52+
// FileSizeInPage (4 bytes) Big-endian uint32; total DB size in pages
53+
// FirstFreelistPage (4 bytes) Big-endian uint32; first freelist trunk page
54+
// FreelistPages (4 bytes) Big-endian uint32; total number of freelist pages
55+
// SchemaCookie (4 bytes) Big-endian uint32; schema cookie
56+
// SchemaFormat (4 bytes) Big-endian uint32; schema format number
57+
// DefaultPageCacheSize (4 bytes) Big-endian uint32; default page cache size
58+
// LargestRootBtree (4 bytes) Big-endian uint32; root b-tree page number
59+
// TextEncoding (4 bytes) Big-endian uint32; text encoding used
60+
// UserVersion (4 bytes) Big-endian uint32; user version
61+
// IncVacuumMode (4 bytes) Big-endian uint32; incremental vacuum mode flag
62+
// AppID (4 bytes) Big-endian uint32; application ID
63+
// ReservedForExpansion (20 bytes) Reserved space for future expansion
64+
// VersionValidFor (4 bytes) Big-endian uint32; version valid for number
65+
// Version (4 bytes) Big-endian uint32; SQLite version number
66+
67+
var hdr [100]byte
68+
_, err := r.Read(hdr[:])
69+
if err != nil {
70+
return nil, fmt.Errorf("failed to read SQLite header: %w", err)
71+
}
72+
73+
if !bytes.Equal(hdr[:len(SQLiteSignature)], []byte(SQLiteSignature)) {
74+
return nil, fmt.Errorf("invalid SQLite magic header: expected %q, got %q", SQLiteSignature, string(hdr[:16]))
75+
}
76+
77+
// Read PageSize: bytes 16-17 big-endian uint16
78+
pageSize := int(binary.BigEndian.Uint16(hdr[16:18]))
79+
if pageSize == 1 {
80+
pageSize = 65536
81+
}
82+
83+
if !isPowerOfTwo(uint32(pageSize)) || pageSize < 512 || pageSize > 65536 {
84+
return nil, fmt.Errorf("invalid SQLite page size: %d", pageSize)
85+
}
86+
87+
// Read FileChangeCounter: bytes 24-27 big-endian uint32
88+
fileChangeCounter := binary.BigEndian.Uint32(hdr[24:28])
89+
90+
// Read FileSizeInPage: bytes 28-31 big-endian uint32
91+
fileSizeInPage := binary.BigEndian.Uint32(hdr[28:32])
92+
93+
// Read VersionValidFor: bytes 92-95 big-endian uint32
94+
versionValidFor := binary.BigEndian.Uint32(hdr[92:96])
95+
96+
var size uint64 = 0
97+
if fileSizeInPage != 0 && fileChangeCounter == versionValidFor {
98+
size = uint64(fileSizeInPage) * uint64(pageSize)
99+
}
100+
101+
return &ScanResult{
102+
Size: size,
103+
}, nil
104+
}
105+
106+
func isPowerOfTwo(x uint32) bool {
107+
return x != 0 && (x&(x-1)) == 0
108+
}

0 commit comments

Comments
 (0)