Skip to content

Commit 40535e8

Browse files
xheboxti-chi-bot
authored andcommitted
This is an automated cherry-pick of pingcap#61080
Signed-off-by: ti-chi-bot <[email protected]>
1 parent 5840bb8 commit 40535e8

File tree

3 files changed

+212
-0
lines changed

3 files changed

+212
-0
lines changed

cmd/explaintest/r/new_character_set.result

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,40 @@ set @@character_set_connection=gbk;
9191
select hex('一a'), '一a';
9292
hex('涓?') 涓?
9393
E4B83F 涓?
94+
<<<<<<< HEAD:cmd/explaintest/r/new_character_set.result
95+
=======
96+
set @@sql_mode=default;
97+
set @@character_set_client=default;
98+
set @@character_set_connection=default;
99+
show variables like 'collation_connection';
100+
Variable_name Value
101+
collation_connection utf8mb4_bin
102+
set default_collation_for_utf8mb4 = 'utf8mb4_general_ci';
103+
set names utf8mb4;
104+
show variables like 'collation_connection';
105+
Variable_name Value
106+
collation_connection utf8mb4_general_ci
107+
set default_collation_for_utf8mb4 = 'utf8mb4_0900_ai_ci';
108+
set names utf8mb4;
109+
show variables like 'collation_connection';
110+
Variable_name Value
111+
collation_connection utf8mb4_0900_ai_ci
112+
set default_collation_for_utf8mb4 = default;
113+
set names utf8mb4;
114+
show variables like 'collation_connection';
115+
Variable_name Value
116+
collation_connection utf8mb4_bin
117+
set character_set_results = "gbk";
118+
select cast(0x414141E280A9424242 as char charset utf8mb4);
119+
cast(0x414141E280A9424242 as char charset utf8mb4)
120+
AAA?BBB
121+
SET character_set_results = @undefined_var;
122+
DROP TABLE if exists t61085;
123+
create table t61085 (a char(255) charset gbk);
124+
insert into t61085 values ('AAA');
125+
set SESSION sql_mode = '';
126+
select * from t61085 where a = cast(0x41414180424242 as char charset gbk);
127+
a
128+
AAA
129+
DROP TABLE t61085;
130+
>>>>>>> d5dcbdf3792 (server: replace instead of truncate encoded result (#61080)):tests/integrationtest/r/new_character_set.result

cmd/explaintest/t/new_character_set.test

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,3 +67,32 @@ set names utf8mb4;
6767
set @@character_set_client=gbk;
6868
set @@character_set_connection=gbk;
6969
select hex('一a'), '一a';
70+
<<<<<<< HEAD:cmd/explaintest/t/new_character_set.test
71+
=======
72+
73+
set @@sql_mode=default;
74+
set @@character_set_client=default;
75+
set @@character_set_connection=default;
76+
77+
show variables like 'collation_connection';
78+
set default_collation_for_utf8mb4 = 'utf8mb4_general_ci';
79+
set names utf8mb4;
80+
show variables like 'collation_connection';
81+
set default_collation_for_utf8mb4 = 'utf8mb4_0900_ai_ci';
82+
set names utf8mb4;
83+
show variables like 'collation_connection';
84+
set default_collation_for_utf8mb4 = default;
85+
set names utf8mb4;
86+
show variables like 'collation_connection';
87+
88+
# Bug#61085: https://github.com/pingcap/tidb/issues/61085 should replace instead of truncation for result charset
89+
set character_set_results = "gbk";
90+
select cast(0x414141E280A9424242 as char charset utf8mb4);
91+
SET character_set_results = @undefined_var;
92+
DROP TABLE if exists t61085;
93+
create table t61085 (a char(255) charset gbk);
94+
insert into t61085 values ('AAA');
95+
set SESSION sql_mode = '';
96+
select * from t61085 where a = cast(0x41414180424242 as char charset gbk);
97+
DROP TABLE t61085;
98+
>>>>>>> d5dcbdf3792 (server: replace instead of truncate encoded result (#61080)):tests/integrationtest/t/new_character_set.test
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
// Copyright 2015 PingCAP, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
// Copyright 2013 The Go-MySQL-Driver Authors. All rights reserved.
16+
//
17+
// This Source Code Form is subject to the terms of the Mozilla Public
18+
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
19+
// You can obtain one at http://mozilla.org/MPL/2.0/.
20+
21+
// The MIT License (MIT)
22+
//
23+
// Copyright (c) 2014 wandoulabs
24+
// Copyright (c) 2014 siddontang
25+
//
26+
// Permission is hereby granted, free of charge, to any person obtaining a copy of
27+
// this software and associated documentation files (the "Software"), to deal in
28+
// the Software without restriction, including without limitation the rights to
29+
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
30+
// the Software, and to permit persons to whom the Software is furnished to do so,
31+
// subject to the following conditions:
32+
//
33+
// The above copyright notice and this permission notice shall be included in all
34+
// copies or substantial portions of the Software.
35+
36+
package column
37+
38+
import (
39+
"bytes"
40+
41+
"github.com/pingcap/tidb/pkg/parser/charset"
42+
"github.com/pingcap/tidb/pkg/parser/mysql"
43+
"github.com/pingcap/tidb/pkg/util/logutil"
44+
"go.uber.org/zap"
45+
)
46+
47+
// ResultEncoder encodes a column value to a byte slice.
48+
type ResultEncoder struct {
49+
encoding charset.Encoding
50+
51+
// dataEncoding can be updated to match the column data charset.
52+
dataEncoding charset.Encoding
53+
54+
buffer *bytes.Buffer
55+
56+
// chsName and encoding are unchanged after the initialization from
57+
// session variable @@character_set_results.
58+
chsName string
59+
60+
isBinary bool
61+
isNull bool
62+
dataIsBinary bool
63+
}
64+
65+
// NewResultEncoder creates a new ResultEncoder.
66+
func NewResultEncoder(chs string) *ResultEncoder {
67+
return &ResultEncoder{
68+
chsName: chs,
69+
encoding: charset.FindEncodingTakeUTF8AsNoop(chs),
70+
buffer: &bytes.Buffer{},
71+
isBinary: chs == charset.CharsetBin,
72+
isNull: len(chs) == 0,
73+
}
74+
}
75+
76+
// Clean prevent the ResultEncoder from holding too much memory.
77+
func (d *ResultEncoder) Clean() {
78+
d.buffer = nil
79+
}
80+
81+
// UpdateDataEncoding updates the data encoding.
82+
func (d *ResultEncoder) UpdateDataEncoding(chsID uint16) {
83+
chs, _, err := charset.GetCharsetInfoByID(int(chsID))
84+
if err != nil {
85+
logutil.BgLogger().Warn("unknown charset ID", zap.Error(err))
86+
}
87+
d.dataEncoding = charset.FindEncodingTakeUTF8AsNoop(chs)
88+
d.dataIsBinary = chsID == mysql.BinaryDefaultCollationID
89+
}
90+
91+
// ColumnTypeInfoCharsetID returns the charset ID for the column type info.
92+
func (d *ResultEncoder) ColumnTypeInfoCharsetID(info *Info) uint16 {
93+
// Only replace the charset when @@character_set_results is valid and
94+
// the target column is a non-binary string.
95+
charset := info.dumpCharset()
96+
if d.isNull || len(d.chsName) == 0 || !isStringColumnType(info.Type) {
97+
return charset
98+
}
99+
if charset == mysql.BinaryDefaultCollationID {
100+
return mysql.BinaryDefaultCollationID
101+
}
102+
return uint16(mysql.CharsetNameToID(d.chsName))
103+
}
104+
105+
// EncodeMeta encodes bytes for meta info like column names.
106+
// Note that the result should be consumed immediately.
107+
func (d *ResultEncoder) EncodeMeta(src []byte) []byte {
108+
return d.EncodeWith(src, d.encoding)
109+
}
110+
111+
// EncodeData encodes bytes for row data.
112+
// Note that the result should be consumed immediately.
113+
func (d *ResultEncoder) EncodeData(src []byte) []byte {
114+
// For the following cases, TiDB encodes the results with column charset
115+
// instead of @@character_set_results:
116+
// - @@character_set_result = null.
117+
// - @@character_set_result = binary.
118+
// - The column is binary type like blob, binary char/varchar.
119+
if d.isNull || d.isBinary || d.dataIsBinary {
120+
// Use the column charset to encode.
121+
return d.EncodeWith(src, d.dataEncoding)
122+
}
123+
return d.EncodeWith(src, d.encoding)
124+
}
125+
126+
// EncodeWith encodes bytes with the given encoding.
127+
func (d *ResultEncoder) EncodeWith(src []byte, enc charset.Encoding) []byte {
128+
data, err := enc.Transform(d.buffer, src, charset.OpEncodeReplace)
129+
if err != nil {
130+
logutil.BgLogger().Debug("encode error", zap.Error(err))
131+
}
132+
return data
133+
}
134+
135+
func isStringColumnType(tp byte) bool {
136+
switch tp {
137+
case mysql.TypeString, mysql.TypeVarString, mysql.TypeVarchar, mysql.TypeBit,
138+
mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob, mysql.TypeBlob,
139+
mysql.TypeEnum, mysql.TypeSet, mysql.TypeJSON:
140+
return true
141+
case mysql.TypeTiDBVectorFloat32:
142+
// When passing Vector column to the SQL Client, pretend to be a non-binary String.
143+
return true
144+
}
145+
return false
146+
}

0 commit comments

Comments
 (0)