Skip to content

Commit 85611e7

Browse files
Ajit Pratap SinghAjit Pratap Singh
authored andcommitted
feat(#249): MySQL syntax support (Phase 2)
- LIMIT offset, count syntax (MySQL-style) - ON DUPLICATE KEY UPDATE clause for INSERT - Backtick identifiers (already supported, tests added) - SHOW TABLES/DATABASES/CREATE TABLE statements - DESCRIBE/EXPLAIN statements - REPLACE INTO statement - UPDATE/DELETE with LIMIT (MySQL extension) - INTERVAL number unit syntax (MySQL-style) - IF() and REPLACE() as function names - GROUP_CONCAT with ORDER BY and SEPARATOR - MATCH AGAINST full-text search - REGEXP/RLIKE operator - All 30 testdata/mysql/ files pass - Comprehensive unit and integration tests
1 parent 353b035 commit 85611e7

File tree

10 files changed

+778
-33
lines changed

10 files changed

+778
-33
lines changed

pkg/models/token_type.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,9 @@ const (
406406
TokenTypePolicy TokenType = 515 // POLICY keyword for CREATE/ALTER POLICY
407407
TokenTypeUntil TokenType = 516 // UNTIL keyword for VALID UNTIL
408408
TokenTypeReset TokenType = 517 // RESET keyword for ALTER ROLE RESET
409+
TokenTypeShow TokenType = 518 // SHOW keyword for MySQL SHOW commands
410+
TokenTypeDescribe TokenType = 519 // DESCRIBE keyword for MySQL DESCRIBE command
411+
TokenTypeExplain TokenType = 520 // EXPLAIN keyword
409412
)
410413

411414
// String returns a string representation of the token type.
@@ -1014,6 +1017,12 @@ func (t TokenType) String() string {
10141017
return "UNTIL"
10151018
case TokenTypeReset:
10161019
return "RESET"
1020+
case TokenTypeShow:
1021+
return "SHOW"
1022+
case TokenTypeDescribe:
1023+
return "DESCRIBE"
1024+
case TokenTypeExplain:
1025+
return "EXPLAIN"
10171026

10181027
default:
10191028
return "TOKEN"

pkg/sql/ast/ast.go

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,13 +1105,14 @@ func (a ArraySliceExpression) Children() []Node {
11051105

11061106
// InsertStatement represents an INSERT SQL statement
11071107
type InsertStatement struct {
1108-
With *WithClause
1109-
TableName string
1110-
Columns []Expression
1111-
Values [][]Expression // Multi-row support: each inner slice is one row of values
1112-
Query QueryExpression // For INSERT ... SELECT (SelectStatement or SetOperation)
1113-
Returning []Expression
1114-
OnConflict *OnConflict
1108+
With *WithClause
1109+
TableName string
1110+
Columns []Expression
1111+
Values [][]Expression // Multi-row support: each inner slice is one row of values
1112+
Query QueryExpression // For INSERT ... SELECT (SelectStatement or SetOperation)
1113+
Returning []Expression
1114+
OnConflict *OnConflict
1115+
OnDuplicateKey *UpsertClause // MySQL: ON DUPLICATE KEY UPDATE
11151116
}
11161117

11171118
func (i *InsertStatement) statementNode() {}
@@ -1134,6 +1135,9 @@ func (i InsertStatement) Children() []Node {
11341135
if i.OnConflict != nil {
11351136
children = append(children, i.OnConflict)
11361137
}
1138+
if i.OnDuplicateKey != nil {
1139+
children = append(children, i.OnDuplicateKey)
1140+
}
11371141
return children
11381142
}
11391143

@@ -1701,3 +1705,41 @@ func (a AST) Children() []Node {
17011705
}
17021706
return children
17031707
}
1708+
1709+
// ShowStatement represents MySQL SHOW commands (SHOW TABLES, SHOW DATABASES, SHOW CREATE TABLE x, etc.)
1710+
type ShowStatement struct {
1711+
ShowType string // TABLES, DATABASES, CREATE TABLE, COLUMNS, INDEX, etc.
1712+
ObjectName string // For SHOW CREATE TABLE x, SHOW COLUMNS FROM x, etc.
1713+
From string // For SHOW ... FROM database
1714+
}
1715+
1716+
func (s *ShowStatement) statementNode() {}
1717+
func (s ShowStatement) TokenLiteral() string { return "SHOW" }
1718+
func (s ShowStatement) Children() []Node { return nil }
1719+
1720+
// DescribeStatement represents MySQL DESCRIBE/DESC/EXPLAIN table commands
1721+
type DescribeStatement struct {
1722+
TableName string
1723+
}
1724+
1725+
func (d *DescribeStatement) statementNode() {}
1726+
func (d DescribeStatement) TokenLiteral() string { return "DESCRIBE" }
1727+
func (d DescribeStatement) Children() []Node { return nil }
1728+
1729+
// ReplaceStatement represents MySQL REPLACE INTO statement
1730+
type ReplaceStatement struct {
1731+
TableName string
1732+
Columns []Expression
1733+
Values [][]Expression
1734+
}
1735+
1736+
func (r *ReplaceStatement) statementNode() {}
1737+
func (r ReplaceStatement) TokenLiteral() string { return "REPLACE" }
1738+
func (r ReplaceStatement) Children() []Node {
1739+
children := make([]Node, 0)
1740+
children = append(children, nodifyExpressions(r.Columns)...)
1741+
for _, row := range r.Values {
1742+
children = append(children, nodifyExpressions(row)...)
1743+
}
1744+
return children
1745+
}

pkg/sql/parser/dml.go

Lines changed: 78 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -120,18 +120,37 @@ func (p *Parser) parseInsertStatement() (ast.Statement, error) {
120120
return nil, p.expectedError("VALUES or SELECT")
121121
}
122122

123-
// Parse ON CONFLICT clause if present (PostgreSQL UPSERT)
123+
// Parse ON CONFLICT clause (PostgreSQL) or ON DUPLICATE KEY UPDATE (MySQL)
124124
var onConflict *ast.OnConflict
125+
var onDuplicateKey *ast.UpsertClause
125126
if p.isType(models.TokenTypeOn) {
126-
// Peek ahead to check for CONFLICT
127-
if p.peekToken().Literal == "CONFLICT" {
127+
nextLit := strings.ToUpper(p.peekToken().Literal)
128+
if nextLit == "CONFLICT" {
128129
p.advance() // Consume ON
129130
p.advance() // Consume CONFLICT
130131
var err error
131132
onConflict, err = p.parseOnConflictClause()
132133
if err != nil {
133134
return nil, err
134135
}
136+
} else if nextLit == "DUPLICATE" {
137+
p.advance() // Consume ON
138+
p.advance() // Consume DUPLICATE
139+
// Expect KEY
140+
if strings.ToUpper(p.currentToken.Literal) != "KEY" && !p.isType(models.TokenTypeKey) {
141+
return nil, p.expectedError("KEY")
142+
}
143+
p.advance() // Consume KEY
144+
// Expect UPDATE
145+
if !p.isType(models.TokenTypeUpdate) {
146+
return nil, p.expectedError("UPDATE")
147+
}
148+
p.advance() // Consume UPDATE
149+
var err error
150+
onDuplicateKey, err = p.parseOnDuplicateKeyUpdateClause()
151+
if err != nil {
152+
return nil, err
153+
}
135154
}
136155
}
137156

@@ -148,12 +167,13 @@ func (p *Parser) parseInsertStatement() (ast.Statement, error) {
148167

149168
// Create INSERT statement
150169
return &ast.InsertStatement{
151-
TableName: tableName,
152-
Columns: columns,
153-
Values: values,
154-
Query: query,
155-
OnConflict: onConflict,
156-
Returning: returning,
170+
TableName: tableName,
171+
Columns: columns,
172+
Values: values,
173+
Query: query,
174+
OnConflict: onConflict,
175+
OnDuplicateKey: onDuplicateKey,
176+
Returning: returning,
157177
}, nil
158178
}
159179

@@ -237,6 +257,14 @@ func (p *Parser) parseUpdateStatement() (ast.Statement, error) {
237257
}
238258
}
239259

260+
// Parse LIMIT clause if present (MySQL)
261+
if p.isType(models.TokenTypeLimit) {
262+
p.advance() // Consume LIMIT
263+
if p.isNumericLiteral() {
264+
p.advance() // Consume limit value (MySQL UPDATE LIMIT)
265+
}
266+
}
267+
240268
// Parse RETURNING clause if present (PostgreSQL)
241269
var returning []ast.Expression
242270
if p.isType(models.TokenTypeReturning) || p.currentToken.Literal == "RETURNING" {
@@ -284,6 +312,14 @@ func (p *Parser) parseDeleteStatement() (ast.Statement, error) {
284312
}
285313
}
286314

315+
// Parse LIMIT clause if present (MySQL)
316+
if p.isType(models.TokenTypeLimit) {
317+
p.advance() // Consume LIMIT
318+
if p.isNumericLiteral() {
319+
p.advance() // Consume limit value
320+
}
321+
}
322+
287323
// Parse RETURNING clause if present (PostgreSQL)
288324
var returning []ast.Expression
289325
if p.isType(models.TokenTypeReturning) || p.currentToken.Literal == "RETURNING" {
@@ -712,5 +748,38 @@ func (p *Parser) parseOnConflictClause() (*ast.OnConflict, error) {
712748
return onConflict, nil
713749
}
714750

751+
// parseOnDuplicateKeyUpdateClause parses the assignments in ON DUPLICATE KEY UPDATE
752+
func (p *Parser) parseOnDuplicateKeyUpdateClause() (*ast.UpsertClause, error) {
753+
upsert := &ast.UpsertClause{}
754+
for {
755+
if !p.isIdentifier() {
756+
return nil, p.expectedError("column name in ON DUPLICATE KEY UPDATE")
757+
}
758+
columnName := p.currentToken.Literal
759+
p.advance()
760+
761+
if !p.isType(models.TokenTypeEq) {
762+
return nil, p.expectedError("=")
763+
}
764+
p.advance()
765+
766+
value, err := p.parseExpression()
767+
if err != nil {
768+
return nil, fmt.Errorf("failed to parse ON DUPLICATE KEY UPDATE value: %w", err)
769+
}
770+
771+
upsert.Updates = append(upsert.Updates, ast.UpdateExpression{
772+
Column: &ast.Identifier{Name: columnName},
773+
Value: value,
774+
})
775+
776+
if !p.isType(models.TokenTypeComma) {
777+
break
778+
}
779+
p.advance() // Consume comma
780+
}
781+
return upsert, nil
782+
}
783+
715784
// parseTableReference parses a simple table reference (table name)
716785
// Returns a TableReference with the Name field populated

pkg/sql/parser/expressions.go

Lines changed: 57 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,26 @@ func (p *Parser) parseComparisonExpression() (ast.Expression, error) {
172172
}, nil
173173
}
174174

175+
// Check for REGEXP/RLIKE operator (MySQL)
176+
if strings.EqualFold(p.currentToken.Literal, "REGEXP") || strings.EqualFold(p.currentToken.Literal, "RLIKE") {
177+
operator := strings.ToUpper(p.currentToken.Literal)
178+
p.advance()
179+
pattern, err := p.parsePrimaryExpression()
180+
if err != nil {
181+
return nil, goerrors.InvalidSyntaxError(
182+
fmt.Sprintf("failed to parse REGEXP pattern: %v", err),
183+
p.currentLocation(),
184+
p.currentToken.Literal,
185+
)
186+
}
187+
return &ast.BinaryExpression{
188+
Left: left,
189+
Operator: operator,
190+
Right: pattern,
191+
Not: notPrefix,
192+
}, nil
193+
}
194+
175195
// Check for IN operator
176196
if p.isType(models.TokenTypeIn) {
177197
p.advance() // Consume IN
@@ -619,6 +639,17 @@ func (p *Parser) parsePrimaryExpression() (ast.Expression, error) {
619639
return p.parseArrayConstructor()
620640
}
621641

642+
// Handle keywords that can be used as function names in MySQL (IF, REPLACE, etc.)
643+
if (p.isType(models.TokenTypeIf) || p.isType(models.TokenTypeReplace)) && p.peekToken().Type == models.TokenTypeLParen {
644+
identName := p.currentToken.Literal
645+
p.advance()
646+
funcCall, err := p.parseFunctionCall(identName)
647+
if err != nil {
648+
return nil, err
649+
}
650+
return funcCall, nil
651+
}
652+
622653
if p.isType(models.TokenTypeIdentifier) || p.isType(models.TokenTypeDoubleQuotedString) {
623654
// Handle identifiers and function calls
624655
// Double-quoted strings are treated as identifiers in SQL (e.g., "column_name")
@@ -632,6 +663,12 @@ func (p *Parser) parsePrimaryExpression() (ast.Expression, error) {
632663
if err != nil {
633664
return nil, err
634665
}
666+
667+
// MySQL MATCH(...) AGAINST(...) full-text search
668+
if strings.EqualFold(identName, "MATCH") && strings.EqualFold(p.currentToken.Literal, "AGAINST") {
669+
return p.parseMatchAgainst(funcCall)
670+
}
671+
635672
return funcCall, nil
636673
}
637674

@@ -1068,21 +1105,29 @@ func (p *Parser) parseIntervalExpression() (*ast.IntervalExpression, error) {
10681105
// Consume INTERVAL keyword
10691106
p.advance()
10701107

1071-
// Expect a string literal for the interval value
1072-
if !p.isStringLiteral() {
1073-
return nil, goerrors.InvalidSyntaxError(
1074-
"expected string literal after INTERVAL keyword",
1075-
p.currentLocation(),
1076-
"Use INTERVAL 'value' syntax (e.g., INTERVAL '1 day')",
1077-
)
1108+
// Support both PostgreSQL style: INTERVAL '1 day'
1109+
// and MySQL style: INTERVAL 30 DAY, INTERVAL 1 HOUR
1110+
if p.isStringLiteral() {
1111+
value := p.currentToken.Literal
1112+
p.advance()
1113+
return &ast.IntervalExpression{Value: value}, nil
10781114
}
10791115

1080-
value := p.currentToken.Literal
1081-
p.advance() // Consume the string literal
1116+
// MySQL style: INTERVAL <number> <unit>
1117+
if p.isNumericLiteral() {
1118+
numStr := p.currentToken.Literal
1119+
p.advance()
1120+
// Expect a unit keyword (DAY, HOUR, MINUTE, SECOND, MONTH, YEAR, WEEK, etc.)
1121+
unit := strings.ToUpper(p.currentToken.Literal)
1122+
p.advance()
1123+
return &ast.IntervalExpression{Value: numStr + " " + unit}, nil
1124+
}
10821125

1083-
return &ast.IntervalExpression{
1084-
Value: value,
1085-
}, nil
1126+
return nil, goerrors.InvalidSyntaxError(
1127+
"expected string literal or number after INTERVAL keyword",
1128+
p.currentLocation(),
1129+
"Use INTERVAL '1 day' or INTERVAL 1 DAY syntax",
1130+
)
10861131
}
10871132

10881133
// parseArrayConstructor parses PostgreSQL ARRAY constructor syntax.

0 commit comments

Comments
 (0)