Skip to content
This repository was archived by the owner on Nov 7, 2025. It is now read-only.

Commit 6fb0ebb

Browse files
authored
Add tests of lexer dialects (sqlparse, sqlfluff) (#1313)
This PR adds tests of our ports of dialects from sqlparse and sqlfluff. The test files are extracted SQL queries from the tests of sqlparse and sqlfluff. The expected outputs (tokens) are collected by running the queries through the original lexers of sqlparse and sqlfluff. By doing this, we get a really good test coverage (>10k SQL queries, many edge case/"strange"/"tricky" SQL queries collected) and we can be almost certain that our implementation behaves identically to the original sqlparse/sqlfluff implementations. The test files are stored in a separate repo (added to Quesma as a Git submodule), because they are large (~10MB).
1 parent 6b71cac commit 6fb0ebb

File tree

6 files changed

+151
-2
lines changed

6 files changed

+151
-2
lines changed

.github/workflows/pipeline.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ jobs:
2020
runs-on: ubuntu-latest
2121
steps:
2222
- uses: actions/checkout@v4
23+
with:
24+
submodules: 'true'
2325

2426
- name: Tune GitHub-hosted runner network
2527
uses: smorimoto/tune-github-hosted-runner-network@v1

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[submodule "platform/parsers/sql/testdata"]
2+
path = platform/parsers/sql/testdata
3+
url = https://github.com/avelanarius/quesma-testdata-wip.git

platform/parsers/sql/lexer/dialect_sqlparse/rule_test.go

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44
package dialect_sqlparse
55

66
import (
7-
"github.com/QuesmaOrg/quesma/platform/parsers/sql/lexer/core"
87
"testing"
98

9+
"github.com/QuesmaOrg/quesma/platform/parsers/sql/lexer/core"
10+
"github.com/QuesmaOrg/quesma/platform/parsers/sql/lexer/testutils"
11+
1012
"github.com/stretchr/testify/assert"
1113
)
1214

@@ -38,6 +40,42 @@ func TestSimpleSelect(t *testing.T) {
3840
assert.Equal(t, "tabela", tokens[6].RawValue)
3941
}
4042

43+
func TestSqlparsedTestcases(t *testing.T) {
44+
testfiles := []string{
45+
"../../testdata/testdata/dialect_sqlparse/parsed-sqlparse-testcases.txt",
46+
"../../testdata/testdata/dialect_sqlparse/parsed-sqlfluff-all-testcases.txt",
47+
}
48+
49+
for _, testfile := range testfiles {
50+
t.Run(testfile, func(t *testing.T) {
51+
testcases := testutils.LoadParsedTestcases(testfile)
52+
for _, testcase := range testcases {
53+
t.Run(testcase.Query, func(t *testing.T) {
54+
tokens := core.Lex(testcase.Query, SqlparseRules)
55+
assert.Equal(t, len(testcase.ExpectedTokens), len(tokens))
56+
57+
commonLength := min(len(testcase.ExpectedTokens), len(tokens))
58+
59+
for i := 0; i < commonLength; i++ {
60+
assert.Equalf(t, testcase.ExpectedTokens[i].TokenType, tokens[i].Type.Name, "Token type at position %d", i)
61+
assert.Equalf(t, testcase.ExpectedTokens[i].TokenValue, tokens[i].RawValue, "Token value at position %d", i)
62+
}
63+
64+
if t.Failed() {
65+
for i := 0; i < commonLength; i++ {
66+
if testcase.ExpectedTokens[i].TokenType != tokens[i].Type.Name || testcase.ExpectedTokens[i].TokenValue != tokens[i].RawValue {
67+
t.Logf("Mismatch token at position %d: %s(%s). Got: %s(%s)", i, testcase.ExpectedTokens[i].TokenType, testcase.ExpectedTokens[i].TokenValue, tokens[i].Type.Name, tokens[i].RawValue)
68+
} else {
69+
t.Logf("Expected token at position %d: %s(%s). Got: %s(%s)", i, testcase.ExpectedTokens[i].TokenType, testcase.ExpectedTokens[i].TokenValue, tokens[i].Type.Name, tokens[i].RawValue)
70+
}
71+
}
72+
}
73+
})
74+
}
75+
})
76+
}
77+
}
78+
4179
func FuzzLex(f *testing.F) {
4280
f.Add("SELECT * FROM tabela")
4381
f.Add("SELECT id, name, email FROM customers WHERE age > 21")

platform/parsers/sql/lexer/dialects_sqlfluff/ansi/rule_test.go

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,51 @@
44
package ansi
55

66
import (
7+
"testing"
8+
79
"github.com/QuesmaOrg/quesma/platform/parsers/sql/lexer/core"
10+
"github.com/QuesmaOrg/quesma/platform/parsers/sql/lexer/testutils"
811
"github.com/stretchr/testify/assert"
9-
"testing"
1012
)
1113

14+
func TestSqlfluffAnsiTestcases(t *testing.T) {
15+
testfiles := []string{
16+
"../../../testdata/testdata/dialects_sqlfluff/parsed-sqlfluff-ansi-testcases.txt",
17+
"../../../testdata/testdata/dialects_sqlfluff/parsed-sqlparse-testcases.txt",
18+
}
19+
20+
for _, testfile := range testfiles {
21+
t.Run(testfile, func(t *testing.T) {
22+
testcases := testutils.LoadParsedTestcases(testfile)
23+
for _, testcase := range testcases {
24+
t.Run(testcase.Query, func(t *testing.T) {
25+
expectedTokens := testcase.ExpectedTokens[:len(testcase.ExpectedTokens)-1] // remove the last token, which is an EOF token
26+
27+
tokens := core.Lex(testcase.Query, SqlfluffAnsiRules)
28+
assert.Equal(t, len(expectedTokens), len(tokens))
29+
30+
commonLength := min(len(expectedTokens), len(tokens))
31+
32+
for i := 0; i < commonLength; i++ {
33+
assert.Equalf(t, expectedTokens[i].TokenType, tokens[i].Type.Name, "Token type at position %d", i)
34+
assert.Equalf(t, expectedTokens[i].TokenValue, tokens[i].RawValue, "Token value at position %d", i)
35+
}
36+
37+
if t.Failed() {
38+
for i := 0; i < commonLength; i++ {
39+
if expectedTokens[i].TokenType != tokens[i].Type.Name || expectedTokens[i].TokenValue != tokens[i].RawValue {
40+
t.Logf("Mismatch token at position %d: %s(%s). Got: %s(%s)", i, expectedTokens[i].TokenType, expectedTokens[i].TokenValue, tokens[i].Type.Name, tokens[i].RawValue)
41+
} else {
42+
t.Logf("Expected token at position %d: %s(%s). Got: %s(%s)", i, expectedTokens[i].TokenType, expectedTokens[i].TokenValue, tokens[i].Type.Name, tokens[i].RawValue)
43+
}
44+
}
45+
}
46+
})
47+
}
48+
})
49+
}
50+
}
51+
1252
func FuzzLex(f *testing.F) {
1353
f.Add("SELECT * FROM tabela")
1454
f.Add("SELECT id, name, email FROM customers WHERE age > 21")
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
// Copyright Quesma, licensed under the Elastic License 2.0.
2+
// SPDX-License-Identifier: Elastic-2.0
3+
4+
package testutils
5+
6+
import (
7+
"bytes"
8+
"os"
9+
)
10+
11+
type ParsedTestcase struct {
12+
Query string
13+
ExpectedTokens []ExpectedToken
14+
}
15+
16+
type ExpectedToken struct {
17+
TokenType string
18+
TokenValue string
19+
}
20+
21+
// Loads a list of test queries and their expected tokens (extracted from existing parsers).
22+
// The structure of the file is as follows:
23+
//
24+
// [QUERY1]
25+
// <end_of_query/>
26+
// [TOKEN_TYPE_1]
27+
// [TOKEN_VALUE_1]
28+
// <end_of_token/>
29+
// [TOKEN_TYPE_2]
30+
// [TOKEN_VALUE_2]
31+
// <end_of_token/>
32+
// ...
33+
// <end_of_tokens/>
34+
// [QUERY2]
35+
// ...
36+
func LoadParsedTestcases(filename string) []ParsedTestcase {
37+
contents, err := os.ReadFile(filename)
38+
if err != nil {
39+
panic(err)
40+
}
41+
42+
testcases := bytes.Split(contents, []byte("\n<end_of_tokens/>\n"))
43+
testcases = testcases[:len(testcases)-1]
44+
45+
var parsedTestcases []ParsedTestcase
46+
for _, testcase := range testcases {
47+
endOfQuerySplit := bytes.Split(testcase, []byte("\n<end_of_query/>\n"))
48+
49+
query := string(endOfQuerySplit[0])
50+
51+
tokens := bytes.Split(endOfQuerySplit[1], []byte("\n<end_of_token/>\n"))
52+
tokens = tokens[:len(tokens)-1]
53+
54+
var expectedTokens []ExpectedToken
55+
for _, tokenDescription := range tokens {
56+
tokenDescriptionSplit := bytes.SplitN(tokenDescription, []byte("\n"), 2)
57+
tokenType := string(tokenDescriptionSplit[0])
58+
tokenValue := string(tokenDescriptionSplit[1])
59+
expectedTokens = append(expectedTokens, ExpectedToken{tokenType, tokenValue})
60+
}
61+
62+
parsedTestcases = append(parsedTestcases, ParsedTestcase{query, expectedTokens})
63+
}
64+
return parsedTestcases
65+
}

platform/parsers/sql/testdata

Submodule testdata added at e7995a7

0 commit comments

Comments
 (0)