Skip to content

Commit c7abb54

Browse files
authored
fix: LOAD DATA into enum columns (#348)
1 parent 98d97ec commit c7abb54

File tree

6 files changed

+89
-34
lines changed

6 files changed

+89
-34
lines changed

backend/executor.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ func (b *DuckBuilder) Build(ctx *sql.Context, root sql.Node, r sql.Row) (sql.Row
6464
ctx.GetLogger().WithFields(logrus.Fields{
6565
"Query": ctx.Query(),
6666
"NodeType": fmt.Sprintf("%T", n),
67-
}).Trace("Building node:", n)
67+
}).Traceln("Building node:", n)
6868

6969
// TODO; find a better way to fallback to the base builder
7070
switch n.(type) {

backend/loaddata.go

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"github.com/dolthub/go-mysql-server/sql"
1515
"github.com/dolthub/go-mysql-server/sql/plan"
1616
"github.com/dolthub/go-mysql-server/sql/types"
17+
"github.com/dolthub/vitess/go/vt/proto/query"
1718
)
1819

1920
const isUnixSystem = runtime.GOOS == "linux" ||
@@ -244,14 +245,8 @@ func columnTypeHints(b *strings.Builder, dst sql.Table, schema sql.Schema, colNa
244245
if i > 0 {
245246
b.WriteString(", ")
246247
}
247-
b.WriteString(catalog.QuoteIdentifierANSI(col.Name))
248-
b.WriteString(": ")
249-
if dt, err := catalog.DuckdbDataType(col.Type); err != nil {
248+
if err := columnTypeHint(b, col); err != nil {
250249
return err
251-
} else {
252-
b.WriteString(`'`)
253-
b.WriteString(dt.Name())
254-
b.WriteString(`'`)
255250
}
256251
}
257252
b.WriteString("}")
@@ -262,25 +257,36 @@ func columnTypeHints(b *strings.Builder, dst sql.Table, schema sql.Schema, colNa
262257
if i > 0 {
263258
b.WriteString(", ")
264259
}
265-
b.WriteString(catalog.QuoteIdentifierANSI(col))
266-
b.WriteString(": ")
267260
idx := schema.IndexOf(col, dst.Name()) // O(n^2) but n := # of columns is usually small
268261
if idx < 0 {
269262
return sql.ErrTableColumnNotFound.New(dst.Name(), col)
270263
}
271-
if dt, err := catalog.DuckdbDataType(schema[idx].Type); err != nil {
264+
if err := columnTypeHint(b, schema[idx]); err != nil {
272265
return err
273-
} else {
274-
b.WriteString(`'`)
275-
b.WriteString(dt.Name())
276-
b.WriteString(`'`)
277266
}
278267
}
279268

280269
b.WriteString("}")
281270
return nil
282271
}
283272

273+
func columnTypeHint(b *strings.Builder, col *sql.Column) error {
274+
b.WriteString(catalog.QuoteIdentifierANSI(col.Name))
275+
b.WriteString(": ")
276+
if dt, err := catalog.DuckdbDataType(col.Type); err != nil {
277+
return err
278+
} else {
279+
b.WriteString(`'`)
280+
if col.Type.Type() == query.Type_ENUM {
281+
b.WriteString(`VARCHAR`)
282+
} else {
283+
b.WriteString(dt.Name())
284+
}
285+
b.WriteString(`'`)
286+
}
287+
return nil
288+
}
289+
284290
// isUnderSecureFileDir ensures that fileStr is under secureFileDir or a subdirectory of secureFileDir, errors otherwise
285291
// Copied from https://github.com/dolthub/go-mysql-server/blob/main/sql/rowexec/rel.go
286292
func isUnderSecureFileDir(secureFileDir interface{}, fileStr string) error {

catalog/type_mapping.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,13 @@ func newDateTimeType(mysqlName string, precision int) AnnotatedDuckType {
9999
}
100100

101101
func newEnumType(typ sql.EnumType) AnnotatedDuckType {
102-
// TODO: `ENUM` allows `,` and `'` in the values. We need to escape `'`.
103-
typeString := `ENUM('` + strings.Join(typ.Values(), `', '`) + `')`
102+
// For ENUM type, we need to escape single quotes in values
103+
escapedValues := make([]string, len(typ.Values()))
104+
for i, v := range typ.Values() {
105+
// Replace each single quote with two single quotes to escape it
106+
escapedValues[i] = strings.ReplaceAll(v, "'", "''")
107+
}
108+
typeString := `ENUM('` + strings.Join(escapedValues, `', '`) + `')`
104109
return AnnotatedDuckType{typeString, MySQLType{Name: "ENUM", Values: typ.Values(), Collation: uint16(typ.Collation())}}
105110
}
106111

test/bats/mysql/helper.bash

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,12 @@ mysql_exec() {
1313

1414
mysql_exec_stdin() {
1515
mysql -h "$MYSQL_HOST" -P "$MYSQL_PORT" -u "$MYSQL_USER" --raw --batch --skip-column-names --local-infile "$@"
16+
}
17+
18+
create_temp_file() {
19+
local content="$1"
20+
local tempfile
21+
tempfile="$(mktemp)"
22+
echo -e "$content" > "$tempfile"
23+
echo "$tempfile"
1624
}

test/bats/mysql/load_data.bats

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#!/usr/bin/env bats
2+
bats_require_minimum_version 1.5.0
3+
4+
load helper
5+
6+
setup_file() {
7+
mysql_exec_stdin <<-'EOF'
8+
CREATE DATABASE load_data_test;
9+
SET GLOBAL local_infile = 1;
10+
EOF
11+
}
12+
13+
teardown_file() {
14+
mysql_exec_stdin <<-'EOF'
15+
DROP DATABASE IF EXISTS load_data_test;
16+
EOF
17+
}
18+
19+
@test "Load a TSV file that contains an escaped JSON column" {
20+
skip
21+
mysql_exec_stdin <<-'EOF'
22+
USE load_data_test;
23+
CREATE TABLE translations (code VARCHAR(100), domain VARCHAR(16), translations JSON);
24+
LOAD DATA LOCAL INFILE 'testdata/issue329.tsv' REPLACE INTO TABLE translations CHARACTER SET 'utf8mb4' FIELDS TERMINATED BY ' ' ESCAPED BY '\\' LINES STARTING BY '' TERMINATED BY '\n' (`code`, `domain`, `translations`);
25+
EOF
26+
run -0 mysql_exec 'SELECT COUNT(*) FROM load_data_test.translations'
27+
[ "${output}" = "1" ]
28+
}
29+
30+
@test "Load a TSV file with date and enum columns" {
31+
local tempfile
32+
tempfile=$(create_temp_file "2025-01-06\t2025-01-06\t2025-01-06\tphprapporten")
33+
34+
mysql_exec_stdin <<-EOF
35+
USE load_data_test;
36+
CREATE TABLE peildatum (
37+
datum date DEFAULT NULL,
38+
vanaf date DEFAULT NULL,
39+
tot date DEFAULT NULL,
40+
doel enum('phprapporten','excelrapporten','opslagkosten') CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL
41+
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
42+
LOAD DATA LOCAL INFILE '${tempfile}' REPLACE INTO TABLE peildatum
43+
CHARACTER SET 'utf8mb4'
44+
FIELDS TERMINATED BY ' ' ESCAPED BY '\\\\'
45+
LINES STARTING BY '' TERMINATED BY '\n'
46+
(datum, vanaf, tot, doel);
47+
EOF
48+
49+
run -0 mysql_exec 'SELECT * FROM load_data_test.peildatum'
50+
[ "${output}" = "2025-01-06 2025-01-06 2025-01-06 phprapporten" ]
51+
52+
rm "$tempfile"
53+
}

test/bats/mysql/load_json_column.bats

Lines changed: 0 additions & 17 deletions
This file was deleted.

0 commit comments

Comments
 (0)