Skip to content

Commit 3b5bbdf

Browse files
committed
support SQLite
1 parent 90111c7 commit 3b5bbdf

21 files changed

+460
-17
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ Use the `-dbms` flag to specify the database type:
130130
- `mysql` - MySQL
131131
- `oracle` - Oracle
132132
- `snowflake` - Snowflake
133+
- `sqlite` - SQLite
133134

134135
## Testing
135136

cmd/sqllexer/main.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ func main() {
1717
mode = flag.String("mode", "obfuscate_and_normalize", "Operation mode: obfuscate, normalize, tokenize, obfuscate_and_normalize")
1818
inputFile = flag.String("input", "", "Input file (default: stdin)")
1919
outputFile = flag.String("output", "", "Output file (default: stdout)")
20-
dbms = flag.String("dbms", "", "Database type: mssql, postgresql, mysql, oracle, snowflake")
20+
dbms = flag.String("dbms", "", "Database type: mssql, postgresql, mysql, oracle, snowflake, sqlite")
2121
replaceDigits = flag.Bool("replace-digits", true, "Replace digits with placeholders")
2222
replaceBoolean = flag.Bool("replace-boolean", true, "Replace boolean values with placeholders")
2323
replaceNull = flag.Bool("replace-null", true, "Replace null values with placeholders")
@@ -249,7 +249,7 @@ Flags:
249249
-output string
250250
Output file (default: stdout)
251251
-dbms string
252-
Database type: mssql, postgresql, mysql, oracle, snowflake
252+
Database type: mssql, postgresql, mysql, oracle, snowflake, sqlite
253253
-replace-digits
254254
Replace digits with placeholders (default true)
255255
-replace-boolean

dbms_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ func TestQueriesPerDBMS(t *testing.T) {
3939
DBMSSQLServer,
4040
DBMSMySQL,
4141
DBMSSnowflake,
42+
DBMSSQLite,
4243
}
4344

4445
for _, dbms := range dbmsTypes {

sqllexer.go

Lines changed: 77 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -142,24 +142,29 @@ func (s *Lexer) Scan() *Token {
142142
case isWildcard(ch):
143143
return s.scanWildcard()
144144
case ch == '$':
145-
if isDigit(s.lookAhead(1)) {
146-
// if the dollar sign is followed by a digit, then it's a numbered parameter
147-
return s.scanPositionalParameter()
145+
nextCh := s.lookAhead(1)
146+
if isDigit(nextCh) {
147+
// Prefix length 2: consume '$' plus the first digit of SQLite bind parameters that use $VVV,
148+
// where V may be numeric (e.g. $1, $12). Refer to scanSQLiteBindParameter for details
149+
return s.scanNumericParameter(2)
150+
}
151+
if s.config.DBMS == DBMSSQLite && isAlphaNumeric(nextCh) {
152+
return s.scanBindParameter()
148153
}
149-
if s.config.DBMS == DBMSSQLServer && isLetter(s.lookAhead(1)) {
154+
if s.config.DBMS == DBMSSQLServer && isLetter(nextCh) {
150155
return s.scanIdentifier(ch)
151156
}
152157
return s.scanDollarQuotedString()
153158
case ch == ':':
154-
if s.config.DBMS == DBMSOracle && isAlphaNumeric(s.lookAhead(1)) {
159+
if (s.config.DBMS == DBMSOracle || s.config.DBMS == DBMSSQLite) && isAlphaNumeric(s.lookAhead(1)) {
155160
return s.scanBindParameter()
156161
}
157162
return s.scanOperator(ch)
158163
case ch == '`':
159-
if s.config.DBMS == DBMSMySQL {
164+
if s.config.DBMS == DBMSMySQL || s.config.DBMS == DBMSSQLite {
160165
return s.scanDoubleQuotedIdentifier('`')
161166
}
162-
return s.scanUnknown() // backtick is only valid in mysql
167+
return s.scanUnknown() // backtick is only valid in mysql and sqlite
163168
case ch == '#':
164169
if s.config.DBMS == DBMSSQLServer {
165170
return s.scanIdentifier(ch)
@@ -168,6 +173,13 @@ func (s *Lexer) Scan() *Token {
168173
return s.scanSingleLineComment(ch)
169174
}
170175
return s.scanOperator(ch)
176+
case ch == '?':
177+
if s.config.DBMS == DBMSSQLite {
178+
// Prefix length 1: consume '?' before scanning optional digits of SQLite ?NNN parameters
179+
// SQLite treats bare '?' and '?NNN' as positional parameters (see scanSQLiteBindParameter)
180+
return s.scanNumericParameter(1)
181+
}
182+
return s.scanOperator(ch)
171183
case ch == '@':
172184
if s.lookAhead(1) == '@' {
173185
if isAlphaNumeric(s.lookAhead(2)) {
@@ -192,7 +204,7 @@ func (s *Lexer) Scan() *Token {
192204
case isOperator(ch):
193205
return s.scanOperator(ch)
194206
case isPunctuation(ch):
195-
if ch == '[' && s.config.DBMS == DBMSSQLServer {
207+
if ch == '[' && (s.config.DBMS == DBMSSQLServer || s.config.DBMS == DBMSSQLite) {
196208
return s.scanDoubleQuotedIdentifier('[')
197209
}
198210
return s.scanPunctuation()
@@ -595,21 +607,22 @@ func (s *Lexer) scanDollarQuotedString() *Token {
595607
return s.emit(ERROR)
596608
}
597609

598-
func (s *Lexer) scanPositionalParameter() *Token {
610+
func (s *Lexer) scanNumericParameter(prefixLen int) *Token {
599611
s.start = s.cursor
600-
ch := s.nextBy(2) // consume the dollar sign and the number
601-
for {
602-
if !isDigit(ch) {
603-
break
604-
}
612+
ch := s.nextBy(prefixLen)
613+
for isDigit(ch) {
605614
ch = s.next()
606615
}
607616
return s.emit(POSITIONAL_PARAMETER)
608617
}
609618

610619
func (s *Lexer) scanBindParameter() *Token {
611620
s.start = s.cursor
612-
ch := s.nextBy(2) // consume the (colon|at sign) and the char
621+
if s.config.DBMS == DBMSSQLite {
622+
// SQLite allows named bind parameters prefixed with :, @, or $, so use the SQLite-specific scanner
623+
return s.scanSQLiteBindParameter()
624+
}
625+
ch := s.nextBy(2) // consume the (colon|at sign|dollar sign) and the char
613626
for {
614627
if !isAlphaNumeric(ch) {
615628
break
@@ -619,6 +632,55 @@ func (s *Lexer) scanBindParameter() *Token {
619632
return s.emit(BIND_PARAMETER)
620633
}
621634

635+
// https://sqlite.org/c3ref/bind_blob.html
636+
func (s *Lexer) scanSQLiteBindParameter() *Token {
637+
s.next() // consume the prefix character (:, @, or $)
638+
s.consumeSQLiteIdentifier()
639+
640+
for {
641+
if s.peek() == ':' && s.lookAhead(1) == ':' {
642+
s.nextBy(2) // consume '::'
643+
s.consumeSQLiteIdentifier()
644+
continue
645+
}
646+
break
647+
}
648+
649+
if s.peek() == '(' {
650+
s.consumeSQLiteParameterSuffix()
651+
}
652+
653+
return s.emit(BIND_PARAMETER)
654+
}
655+
656+
func (s *Lexer) consumeSQLiteIdentifier() {
657+
for {
658+
ch := s.peek()
659+
if ch == '_' || isAlphaNumeric(ch) {
660+
s.next()
661+
continue
662+
}
663+
break
664+
}
665+
}
666+
667+
func (s *Lexer) consumeSQLiteParameterSuffix() {
668+
s.next() // consume '('
669+
depth := 1
670+
for depth > 0 {
671+
ch := s.peek()
672+
if isEOF(ch) {
673+
break
674+
}
675+
s.next()
676+
if ch == '(' {
677+
depth++
678+
} else if ch == ')' {
679+
depth--
680+
}
681+
}
682+
}
683+
622684
func (s *Lexer) scanSystemVariable() *Token {
623685
s.start = s.cursor
624686
ch := s.nextBy(2) // consume @@

sqllexer_fuzz_test.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,17 @@ func addComplexTestCases(f *testing.F) {
147147
`SELECT $1, $2 FROM @mystage/file.csv`,
148148
}
149149

150+
// SQLite specific patterns
151+
sqlitePatterns := []string{
152+
`SELECT * FROM pragma_table_info('users')`,
153+
`INSERT OR REPLACE INTO kv_store(key, value) VALUES(:key, json_extract($payload, '$.value'))`,
154+
`INSERT INTO logs VALUES($ns::var, $env(config), $ns::name(sub))`,
155+
"CREATE TABLE IF NOT EXISTS logs (id INTEGER PRIMARY KEY, payload TEXT) WITHOUT ROWID",
156+
"WITH ranked AS (SELECT *, row_number() OVER (PARTITION BY type ORDER BY created_at DESC) AS rn FROM events) SELECT * FROM ranked WHERE rn = 1",
157+
"SELECT [user] FROM [main].[table] WHERE [id] = 1",
158+
"ATTACH DATABASE 'archive.db' AS archive; DETACH DATABASE archive",
159+
}
160+
150161
// Common edge cases across all DBMS
151162
commonEdgeCases := []string{
152163
// Nested subqueries
@@ -181,6 +192,7 @@ func addComplexTestCases(f *testing.F) {
181192
patterns = append(patterns, oraclePatterns...)
182193
patterns = append(patterns, snowflakePatterns...)
183194
patterns = append(patterns, commonEdgeCases...)
195+
patterns = append(patterns, sqlitePatterns...)
184196

185197
// Add each pattern with different DBMS types
186198
dbmsTypes := []string{
@@ -189,6 +201,7 @@ func addComplexTestCases(f *testing.F) {
189201
string(DBMSMySQL),
190202
string(DBMSOracle),
191203
string(DBMSSnowflake),
204+
string(DBMSSQLite),
192205
}
193206

194207
for _, pattern := range patterns {
@@ -259,6 +272,15 @@ func addObfuscationTestCases(f *testing.F) {
259272
`SELECT $1, $2, $3 FROM @mystage`,
260273
}
261274

275+
// SQLite specific obfuscation patterns
276+
sqlitePatterns := []string{
277+
`SELECT * FROM logs WHERE id = ?5 AND tag = @tag`,
278+
`SELECT * FROM users WHERE email = :email OR email = $email`,
279+
`SELECT $ns::var, $env(config), $ns::name(sub)`,
280+
`SELECT [user] FROM [main].[table] WHERE [id] = 1`,
281+
`PRAGMA table_info('users')`,
282+
}
283+
262284
// Common obfuscation patterns for all DBMS
263285
commonPatterns := []string{
264286
// Basic numbers
@@ -331,13 +353,19 @@ func addObfuscationTestCases(f *testing.F) {
331353
f.Add(pattern, string(DBMSSnowflake))
332354
}
333355

356+
// Add SQLite patterns with SQLite DBMS
357+
for _, pattern := range sqlitePatterns {
358+
f.Add(pattern, string(DBMSSQLite))
359+
}
360+
334361
// Add common patterns and quote edge cases with all DBMS types
335362
dbmsTypes := []string{
336363
string(DBMSPostgres),
337364
string(DBMSSQLServer),
338365
string(DBMSMySQL),
339366
string(DBMSOracle),
340367
string(DBMSSnowflake),
368+
string(DBMSSQLite),
341369
}
342370

343371
for _, pattern := range append(commonPatterns, quoteEdgeCases...) {

sqllexer_test.go

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,134 @@ here */`,
637637
{BIND_PARAMETER, "@__my_id"},
638638
},
639639
},
640+
{
641+
name: "sqlite named parameters",
642+
input: "SELECT * FROM users WHERE id = :id AND email = $email AND tag = @tag",
643+
expected: []TokenSpec{
644+
{COMMAND, "SELECT"},
645+
{SPACE, " "},
646+
{WILDCARD, "*"},
647+
{SPACE, " "},
648+
{KEYWORD, "FROM"},
649+
{SPACE, " "},
650+
{IDENT, "users"},
651+
{SPACE, " "},
652+
{KEYWORD, "WHERE"},
653+
{SPACE, " "},
654+
{IDENT, "id"},
655+
{SPACE, " "},
656+
{OPERATOR, "="},
657+
{SPACE, " "},
658+
{BIND_PARAMETER, ":id"},
659+
{SPACE, " "},
660+
{KEYWORD, "AND"},
661+
{SPACE, " "},
662+
{IDENT, "email"},
663+
{SPACE, " "},
664+
{OPERATOR, "="},
665+
{SPACE, " "},
666+
{BIND_PARAMETER, "$email"},
667+
{SPACE, " "},
668+
{KEYWORD, "AND"},
669+
{SPACE, " "},
670+
{IDENT, "tag"},
671+
{SPACE, " "},
672+
{OPERATOR, "="},
673+
{SPACE, " "},
674+
{BIND_PARAMETER, "@tag"},
675+
},
676+
lexerOpts: []lexerOption{WithDBMS(DBMSSQLite)},
677+
},
678+
{
679+
name: "sqlite positional parameters",
680+
input: "SELECT * FROM logs WHERE id = ?5 AND alt = ?",
681+
expected: []TokenSpec{
682+
{COMMAND, "SELECT"},
683+
{SPACE, " "},
684+
{WILDCARD, "*"},
685+
{SPACE, " "},
686+
{KEYWORD, "FROM"},
687+
{SPACE, " "},
688+
{IDENT, "logs"},
689+
{SPACE, " "},
690+
{KEYWORD, "WHERE"},
691+
{SPACE, " "},
692+
{IDENT, "id"},
693+
{SPACE, " "},
694+
{OPERATOR, "="},
695+
{SPACE, " "},
696+
{POSITIONAL_PARAMETER, "?5"},
697+
{SPACE, " "},
698+
{KEYWORD, "AND"},
699+
{SPACE, " "},
700+
{IDENT, "alt"},
701+
{SPACE, " "},
702+
{OPERATOR, "="},
703+
{SPACE, " "},
704+
{POSITIONAL_PARAMETER, "?"},
705+
},
706+
lexerOpts: []lexerOption{WithDBMS(DBMSSQLite)},
707+
},
708+
{
709+
name: "sqlite extended dollar parameters",
710+
input: "SELECT $ns::var, $env(config), $ns::name(sub)",
711+
expected: []TokenSpec{
712+
{COMMAND, "SELECT"},
713+
{SPACE, " "},
714+
{BIND_PARAMETER, "$ns::var"},
715+
{PUNCTUATION, ","},
716+
{SPACE, " "},
717+
{BIND_PARAMETER, "$env(config)"},
718+
{PUNCTUATION, ","},
719+
{SPACE, " "},
720+
{BIND_PARAMETER, "$ns::name(sub)"},
721+
},
722+
lexerOpts: []lexerOption{WithDBMS(DBMSSQLite)},
723+
},
724+
{
725+
name: "sqlite square bracket identifier",
726+
input: "SELECT [user] FROM [main].[table] WHERE [id] = 1",
727+
expected: []TokenSpec{
728+
{COMMAND, "SELECT"},
729+
{SPACE, " "},
730+
{QUOTED_IDENT, "[user]"},
731+
{SPACE, " "},
732+
{KEYWORD, "FROM"},
733+
{SPACE, " "},
734+
{QUOTED_IDENT, "[main].[table]"},
735+
{SPACE, " "},
736+
{KEYWORD, "WHERE"},
737+
{SPACE, " "},
738+
{QUOTED_IDENT, "[id]"},
739+
{SPACE, " "},
740+
{OPERATOR, "="},
741+
{SPACE, " "},
742+
{NUMBER, "1"},
743+
},
744+
lexerOpts: []lexerOption{WithDBMS(DBMSSQLite)},
745+
},
746+
{
747+
name: "sqlite backtick quoted identifier",
748+
input: "SELECT `user` FROM `main`.`table` WHERE `id` = 1",
749+
expected: []TokenSpec{
750+
{COMMAND, "SELECT"},
751+
{SPACE, " "},
752+
{QUOTED_IDENT, "`user`"},
753+
{SPACE, " "},
754+
{KEYWORD, "FROM"},
755+
{SPACE, " "},
756+
{QUOTED_IDENT, "`main`.`table`"},
757+
{SPACE, " "},
758+
{KEYWORD, "WHERE"},
759+
{SPACE, " "},
760+
{QUOTED_IDENT, "`id`"},
761+
{SPACE, " "},
762+
{OPERATOR, "="},
763+
{SPACE, " "},
764+
{NUMBER, "1"},
765+
},
766+
lexerOpts: []lexerOption{WithDBMS(DBMSSQLite)},
767+
},
640768
{
641769
name: "select with system variable",
642770
input: "SELECT @@VERSION AS SqlServerVersion",

0 commit comments

Comments
 (0)