expression: refine invalid char error msg for string conversion (#32199)

close pingcap/tidb#30444
This commit is contained in:
tangenta
2022-02-09 19:25:36 +08:00
committed by GitHub
parent 21e49193be
commit 4e33a0f2eb
4 changed files with 40 additions and 12 deletions

View File

@ -398,13 +398,17 @@ a like 0xe4b880 b like 0xd2bb
1 1
1 1
select a = 0xb6fe from t;
Error 3854: Cannot convert string 'B6FE' from binary to utf8mb4
Error 3854: Cannot convert string '\xB6\xFE' from binary to utf8mb4
select b = 0xe4ba8c from t;
Error 3854: Cannot convert string 'E4BA8C' from binary to gbk
Error 3854: Cannot convert string '\xE4\xBA\x8C' from binary to gbk
select concat(a, 0xb6fe) from t;
Error 3854: Cannot convert string 'B6FE' from binary to utf8mb4
Error 3854: Cannot convert string '\xB6\xFE' from binary to utf8mb4
select concat(b, 0xe4ba8c) from t;
Error 3854: Cannot convert string 'E4BA8C' from binary to gbk
Error 3854: Cannot convert string '\xE4\xBA\x8C' from binary to gbk
select concat(convert('a' using gbk), 0x3fff) from t;
Error 3854: Cannot convert string '?\xFF' from binary to gbk
select concat(convert('a' using gbk), 0x3fffffffffffffff) from t;
Error 3854: Cannot convert string '?\xFF\xFF\xFF\xFF\xFF...' from binary to gbk
set @@tidb_enable_vectorized_expression = false;
select hex(concat(a, c)), hex(concat(b, c)) from t;
hex(concat(a, c)) hex(concat(b, c))
@ -497,13 +501,13 @@ a like 0xe4b880 b like 0xd2bb
1 1
1 1
select a = 0xb6fe from t;
Error 3854: Cannot convert string 'B6FE' from binary to utf8mb4
Error 3854: Cannot convert string '\xB6\xFE' from binary to utf8mb4
select b = 0xe4ba8c from t;
Error 3854: Cannot convert string 'E4BA8C' from binary to gbk
Error 3854: Cannot convert string '\xE4\xBA\x8C' from binary to gbk
select concat(a, 0xb6fe) from t;
Error 3854: Cannot convert string 'B6FE' from binary to utf8mb4
Error 3854: Cannot convert string '\xB6\xFE' from binary to utf8mb4
select concat(b, 0xe4ba8c) from t;
Error 3854: Cannot convert string 'E4BA8C' from binary to gbk
Error 3854: Cannot convert string '\xE4\xBA\x8C' from binary to gbk
drop table if exists t;
create table t (a char(20) charset utf8mb4, b char(20) charset gbk, c binary(20));
insert into t values ('一二三', '一二三', '一二三');

View File

@ -498,4 +498,4 @@ a b c d
create table t3(a char(10), primary key (a));
insert into t3 values ('a');
select * from t3 where a > 0x80;
Error 1105: Cannot convert string '80' from binary to utf8mb4
Error 1105: Cannot convert string '\x80' from binary to utf8mb4

View File

@ -200,6 +200,10 @@ select b = 0xe4ba8c from t;
select concat(a, 0xb6fe) from t;
--error 3854
select concat(b, 0xe4ba8c) from t;
--error 3854
select concat(convert('a' using gbk), 0x3fff) from t;
--error 3854
select concat(convert('a' using gbk), 0x3fffffffffffffff) from t;
set @@tidb_enable_vectorized_expression = false;
select hex(concat(a, c)), hex(concat(b, c)) from t;

View File

@ -17,6 +17,8 @@ package expression
import (
"bytes"
"fmt"
"strings"
"unicode"
"github.com/pingcap/tidb/errno"
"github.com/pingcap/tidb/parser/ast"
@ -172,9 +174,10 @@ func (b *builtinInternalFromBinarySig) evalString(row chunk.Row) (res string, is
return val, isNull, err
}
enc := charset.FindEncoding(b.tp.Charset)
ret, err := enc.Transform(nil, hack.Slice(val), charset.OpDecode)
valBytes := hack.Slice(val)
ret, err := enc.Transform(nil, valBytes, charset.OpDecode)
if err != nil {
strHex := fmt.Sprintf("%X", val)
strHex := formatInvalidChars(valBytes)
err = errCannotConvertString.GenWithStackByArgs(strHex, charset.CharsetBin, b.tp.Charset)
}
return string(ret), false, err
@ -205,7 +208,7 @@ func (b *builtinInternalFromBinarySig) vecEvalString(input *chunk.Chunk, result
str := buf.GetBytes(i)
val, err := enc.Transform(encodedBuf, str, charset.OpDecode)
if err != nil {
strHex := fmt.Sprintf("%X", str)
strHex := formatInvalidChars(str)
return errCannotConvertString.GenWithStackByArgs(strHex, charset.CharsetBin, b.tp.Charset)
}
result.AppendBytes(val)
@ -334,3 +337,20 @@ func isLegacyCharset(chs string) bool {
}
return false
}
func formatInvalidChars(src []byte) string {
var sb strings.Builder
const maxBytesToShow = 5
for i := 0; i < len(src); i++ {
if i > maxBytesToShow {
sb.WriteString("...")
break
}
if src[i] > unicode.MaxASCII {
sb.WriteString(fmt.Sprintf("\\x%X", src[i]))
} else {
sb.Write([]byte{src[i]})
}
}
return sb.String()
}