[FEAT MERGE] Support gb18030_2022

This commit is contained in:
xianyu-w
2023-04-26 15:13:03 +00:00
committed by ob-robot
parent ef51ca80f8
commit 3efcefc29e
27 changed files with 32100 additions and 855 deletions

View File

@ -188,7 +188,7 @@ inline int64_t ObFastParserBase::is_identifier_flags(const int64_t pos)
// added here to avoid the next judgment whether it is utf8 char or gbk char
} else if (CHARSET_UTF8MB4 == charset_type_ || CHARSET_UTF16 == charset_type_) {
idf_pos = is_utf8_char(pos);
} else if (CHARSET_GBK == charset_type_ || CHARSET_GB18030 == charset_type_) {
} else if (ObCharset::is_gb_charset(charset_type_)) {
idf_pos = is_gbk_char(pos);
} else if (CHARSET_LATIN1 == charset_type_) {
idf_pos = is_latin1_char(pos);
@ -943,7 +943,7 @@ char *ObFastParserBase::parse_strdup_with_replace_multi_byte_char(
} else {
out_str[len++] = str[i];
}
} else if (CHARSET_GBK == charset_type_ || CHARSET_GB18030 == charset_type_) {
} else if (ObCharset::is_gb_charset(charset_type_)) {
if (i + 1 < dup_len) {
if (str[i] == (char)0xa1 && str[i+1] == (char)0xa1) {//gbk multi byte space
out_str[len++] = ' ';
@ -1162,7 +1162,7 @@ inline int64_t ObFastParserBase::is_first_identifier_flags(const int64_t pos)
// added here to avoid the next judgment whether it is utf8 char or gbk char
} else if (CHARSET_UTF8MB4 == charset_type_ || CHARSET_UTF16 == charset_type_) {
idf_pos = is_utf8_char(pos);
} else if (CHARSET_GBK == charset_type_ || CHARSET_GB18030 == charset_type_) {
} else if (ObCharset::is_gb_charset(charset_type_)) {
idf_pos = is_gbk_char(pos);
} else if (CHARSET_LATIN1 == charset_type_) {
idf_pos = is_latin1_char(pos);

View File

@ -214,7 +214,7 @@ protected:
byte_len = 3; \
} \
} else if (is_oracle_mode_ \
&& (CHARSET_GBK == charset_type_ || CHARSET_GB18030 == charset_type_)) { \
&& (ObCharset::is_gb_charset(charset_type_))) { \
if (pos + 2 < len && -1 != is_gbk_multi_byte_##CHARACTER_NAME(str, pos)) { \
bool_ret = true; \
byte_len = 2; \

View File

@ -207,6 +207,13 @@ char *parse_strdup_with_replace_multi_byte_char(const char *str, int *connection
switch (*connection_collation_) {
case 28/*CS_TYPE_GBK_CHINESE_CI*/:
case 87/*CS_TYPE_GBK_BIN*/:
case 216/*CS_TYPE_GB18030_2022_BIN*/:
case 217/*CS_TYPE_GB18030_2022_PINYIN_CI*/:
case 218/*CS_TYPE_GB18030_2022_PINYIN_CS*/:
case 219/*CS_TYPE_GB18030_2022_RADICAL_CI*/:
case 220/*CS_TYPE_GB18030_2022_RADICAL_CS*/:
case 221/*CS_TYPE_GB18030_2022_STROKE_CI*/:
case 222/*CS_TYPE_GB18030_2022_STROKE_CS*/:
case 248/*CS_TYPE_GB18030_CHINESE_CI*/:
case 249/*CS_TYPE_GB18030_BIN*/: {
if (i + 1 < dup_len) {

View File

@ -112,6 +112,7 @@ _UTF8 { REPUT_TOKEN_NEG_SIGN(_UTF8); }
_UTF8MB4 { REPUT_TOKEN_NEG_SIGN(_UTF8MB4); }
_GBK { REPUT_TOKEN_NEG_SIGN(_GBK); }
_GB18030 { REPUT_TOKEN_NEG_SIGN(_GB18030); }
_GB18030_2022 { REPUT_TOKEN_NEG_SIGN(_GB18030_2022); }
_LATIN1 { REPUT_TOKEN_NEG_SIGN(_LATIN1); }
_BINARY { REPUT_TOKEN_NEG_SIGN(_BINARY); }
_UTF16 { REPUT_TOKEN_NEG_SIGN(_UTF16); }

View File

@ -189,7 +189,7 @@ APPEND NO_GATHER_OPTIMIZER_STATISTICS GATHER_OPTIMIZER_STATISTICS DBMS_STATS
NEG_SIGN
%token /*can not be relation name*/
_BINARY _UTF8 _UTF8MB4 _GBK _UTF16 _GB18030 _LATIN1 CNNOP
_BINARY _UTF8 _UTF8MB4 _GBK _UTF16 _GB18030 _GB18030_2022 _LATIN1 CNNOP
SELECT_HINT_BEGIN UPDATE_HINT_BEGIN DELETE_HINT_BEGIN INSERT_HINT_BEGIN REPLACE_HINT_BEGIN HINT_HINT_BEGIN HINT_END
LOAD_DATA_HINT_BEGIN CREATE_HINT_BEGIN
END_P SET_VAR DELIMITER
@ -976,6 +976,15 @@ _UTF8
YYABORT_NO_MEMORY;
}
}
| _GB18030_2022
{
malloc_terminal_node($$, result->malloc_pool_, T_CHARSET);
$$->str_value_ = parse_strdup("gb18030_2022", result->malloc_pool_, &($$->str_len_));
if (OB_UNLIKELY(NULL == $$->str_value_)) {
yyerror(NULL, result, "No more space for mallocing string");
YYABORT_NO_MEMORY;
}
}
| _UTF16
{
malloc_terminal_node($$, result->malloc_pool_, T_CHARSET);