[FEAT MERGE] Support gb18030_2022

This commit is contained in:
xianyu-w 2023-04-26 15:13:03 +00:00 committed by ob-robot
parent ef51ca80f8
commit 3efcefc29e
27 changed files with 32100 additions and 855 deletions

View File

@ -80,6 +80,9 @@
http://www.evertype.com/alphabets/icelandic.pdf
http://cldr.unicode.org/)
http://unicode.org/Public/cldr/24/core.zip)
http://unicode.org/Public/cldr/42/)
https://www.unicode.org/reports/tr38/tr38-33.html#kRSUnicode)
https://www.unicode.org/Public/UCD/latest/ucd/Unihan.zip)
www.doxygen.org)
www.tcx.se
www.google.com

View File

@ -281,6 +281,7 @@ const ObCharsetWrapper ObCharset::charset_wrap_arr_[ObCharset::VALID_CHARSET_TYP
{CHARSET_UTF16, "UTF-16 Unicode", CS_TYPE_UTF16_GENERAL_CI, 2},
{CHARSET_GB18030, "GB18030 charset", CS_TYPE_GB18030_CHINESE_CI, 4},
{CHARSET_LATIN1, "cp1252 West European", CS_TYPE_LATIN1_SWEDISH_CI, 1},
{CHARSET_GB18030_2022, "GB18030-2022 charset", CS_TYPE_GB18030_2022_PINYIN_CI, 4},
};
const ObCollationWrapper ObCharset::collation_wrap_arr_[ObCharset::VALID_COLLATION_TYPES] =
@ -298,6 +299,13 @@ const ObCollationWrapper ObCharset::collation_wrap_arr_[ObCharset::VALID_COLLATI
{CS_TYPE_GB18030_BIN, CHARSET_GB18030, CS_TYPE_GB18030_BIN, false, true, 1},
{CS_TYPE_LATIN1_SWEDISH_CI, CHARSET_LATIN1, CS_TYPE_LATIN1_SWEDISH_CI,true, true, 1},
{CS_TYPE_LATIN1_BIN, CHARSET_LATIN1, CS_TYPE_LATIN1_BIN,false, true, 1},
{CS_TYPE_GB18030_2022_BIN, CHARSET_GB18030_2022, CS_TYPE_GB18030_2022_BIN, false, true, 1},
{CS_TYPE_GB18030_2022_PINYIN_CI, CHARSET_GB18030_2022, CS_TYPE_GB18030_2022_PINYIN_CI, true, true, 1},
{CS_TYPE_GB18030_2022_PINYIN_CS, CHARSET_GB18030_2022, CS_TYPE_GB18030_2022_PINYIN_CS, false, true, 1},
{CS_TYPE_GB18030_2022_RADICAL_CI, CHARSET_GB18030_2022, CS_TYPE_GB18030_2022_RADICAL_CI, false, true, 1},
{CS_TYPE_GB18030_2022_RADICAL_CS, CHARSET_GB18030_2022, CS_TYPE_GB18030_2022_RADICAL_CS, false, true, 1},
{CS_TYPE_GB18030_2022_STROKE_CI, CHARSET_GB18030_2022, CS_TYPE_GB18030_2022_STROKE_CI, false, true, 1},
{CS_TYPE_GB18030_2022_STROKE_CS, CHARSET_GB18030_2022, CS_TYPE_GB18030_2022_STROKE_CS, false, true, 1},
};
ObCharsetInfo *ObCharset::charset_arr[CS_TYPE_MAX] = {
@ -338,7 +346,10 @@ ObCharsetInfo *ObCharset::charset_arr[CS_TYPE_MAX] = {
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 192
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 200
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 208
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 216
&ob_charset_gb18030_2022_bin, &ob_charset_gb18030_2022_pinyin_ci, // 216
&ob_charset_gb18030_2022_pinyin_cs, &ob_charset_gb18030_2022_radical_ci,// 218
&ob_charset_gb18030_2022_radical_cs, &ob_charset_gb18030_2022_stroke_ci, // 220
&ob_charset_gb18030_2022_stroke_cs, NULL, // 222
NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 225
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 232
@ -723,7 +734,8 @@ int ObCharset::caseup(const ObCollationType collation_type,
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to allocate memory", K(ret));
} else if (charset_type_by_coll(collation_type) == CHARSET_GB18030) {
} else if (charset_type_by_coll(collation_type) == CHARSET_GB18030 ||
charset_type_by_coll(collation_type) == CHARSET_GB18030_2022) {
size_t dst_len = caseup(collation_type, (char*)src.ptr(), src.length(), buf, buf_len);
dst.assign_ptr(buf, static_cast<int32_t>(dst_len));
} else {
@ -763,7 +775,8 @@ int ObCharset::casedn(const ObCollationType collation_type,
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to allocate memory", K(ret));
} else if (charset_type_by_coll(collation_type) == CHARSET_GB18030) {
} else if (charset_type_by_coll(collation_type) == CHARSET_GB18030 ||
charset_type_by_coll(collation_type) == CHARSET_GB18030_2022) {
size_t dst_len = casedn(collation_type, (char*)src.ptr(), src.length(), buf, buf_len);
dst.assign_ptr(buf, static_cast<int32_t>(dst_len));
} else {
@ -1353,6 +1366,10 @@ const char *ObCharset::charset_name(ObCharsetType charset_type)
ret_name = "latin1";
break;
}
case CHARSET_GB18030_2022: {
ret_name = "gb18030_2022";
break;
}
default: {
break;
}
@ -1452,6 +1469,8 @@ ObCharsetType ObCharset::charset_type(const ObString &cs_name)
charset_type = CHARSET_GB18030;
} else if (0 == cs_name.case_compare(ob_charset_latin1.csname)) {
charset_type = CHARSET_LATIN1;
} else if (0 == cs_name.case_compare(ob_charset_gb18030_2022_bin.csname)) {
charset_type = CHARSET_GB18030_2022;
}
return charset_type;
}
@ -1470,6 +1489,8 @@ ObCharsetType ObCharset::charset_type_by_name_oracle(const ObString &cs_name)
charset_type = CHARSET_GB18030;
} else if (0 == cs_name.case_compare("WE8MSWIN1252")) {
charset_type = CHARSET_LATIN1;
} else if (0 == cs_name.case_compare("ZHS32GB18030_2022")) {
charset_type = CHARSET_GB18030_2022;
}
return charset_type;
}
@ -1520,6 +1541,20 @@ ObCollationType ObCharset::collation_type(const ObString &cs_name)
collation_type = CS_TYPE_GB18030_CHINESE_CS;
} else if (0 == cs_name.case_compare("any_cs")) {
collation_type = CS_TYPE_ANY;
} else if (0 == cs_name.case_compare(ob_charset_gb18030_2022_bin.name)) {
collation_type = CS_TYPE_GB18030_2022_BIN;
} else if (0 == cs_name.case_compare(ob_charset_gb18030_2022_pinyin_ci.name)) {
collation_type = CS_TYPE_GB18030_2022_PINYIN_CI;
} else if (0 == cs_name.case_compare(ob_charset_gb18030_2022_pinyin_cs.name)) {
collation_type = CS_TYPE_GB18030_2022_PINYIN_CS;
} else if (0 == cs_name.case_compare(ob_charset_gb18030_2022_radical_ci.name)) {
collation_type = CS_TYPE_GB18030_2022_RADICAL_CI;
} else if (0 == cs_name.case_compare(ob_charset_gb18030_2022_radical_cs.name)) {
collation_type = CS_TYPE_GB18030_2022_RADICAL_CS;
} else if (0 == cs_name.case_compare(ob_charset_gb18030_2022_stroke_ci.name)) {
collation_type = CS_TYPE_GB18030_2022_STROKE_CI;
} else if (0 == cs_name.case_compare(ob_charset_gb18030_2022_stroke_cs.name)) {
collation_type = CS_TYPE_GB18030_2022_STROKE_CS;
}
return collation_type;
}
@ -1561,6 +1596,8 @@ bool ObCharset::is_valid_collation(ObCharsetType charset_type, ObCollationType c
if (CS_TYPE_LATIN1_SWEDISH_CI == collation_type || CS_TYPE_LATIN1_BIN == collation_type) {
ret = true;
}
} else if (CHARSET_GB18030_2022 == charset_type) {
ret = is_gb18030_2022(collation_type);
}
return ret;
}
@ -1576,7 +1613,8 @@ ObCollationType ObCharset::get_coll_type_by_nlssort_param(ObCharsetType charset_
CS_TYPE_GBK_BIN,
CS_TYPE_UTF16_BIN,
CS_TYPE_GB18030_BIN,
CS_TYPE_LATIN1_BIN
CS_TYPE_LATIN1_BIN,
CS_TYPE_GB18030_2022_BIN,
};
static ObCollationType non_bin_coll_marks[NLS_COLLATION_MAX] = {
CS_TYPE_INVALID,
@ -1586,6 +1624,12 @@ ObCollationType ObCharset::get_coll_type_by_nlssort_param(ObCharsetType charset_
};
if (0 == nlssort_param.case_compare("SCHINESE_PINYIN_M")) {
nls_coll_type = NLS_COLLATION_SCHINESE_PINYIN_M;
} else if (0 == nlssort_param.case_compare("SCHINESE_PINYIN2_M")) {
nls_coll_type = NLS_COLLATION_SCHINESE_PINYIN2_M;
} else if (0 == nlssort_param.case_compare("SCHINESE_RADICAL2_M")) {
nls_coll_type = NLS_COLLATION_SCHINESE_RADICAL2_M;
} else if (0 == nlssort_param.case_compare("SCHINESE_STROKE2_M")) {
nls_coll_type = NLS_COLLATION_SCHINESE_STROKE2_M;
} else if (0 == nlssort_param.case_compare("UCA0900_SCHINESE_PINYIN")) {
nls_coll_type = NLS_COLLATION_SCHINESE_PINYIN_900;
} else if (0 == nlssort_param.case_compare("UCA0900_SCHINESE_RADICAL")) {
@ -1600,6 +1644,12 @@ ObCollationType ObCharset::get_coll_type_by_nlssort_param(ObCharsetType charset_
coll_type = bin_coll_map[charset_type];
} else if (nls_coll_type == NLS_COLLATION_SCHINESE_PINYIN_M) {
coll_type = CS_TYPE_GB18030_CHINESE_CS;
} else if (nls_coll_type == NLS_COLLATION_SCHINESE_PINYIN2_M) {
coll_type = CS_TYPE_GB18030_2022_PINYIN_CS;
} else if (nls_coll_type == NLS_COLLATION_SCHINESE_RADICAL2_M) {
coll_type = CS_TYPE_GB18030_2022_RADICAL_CS;
} else if (nls_coll_type == NLS_COLLATION_SCHINESE_STROKE2_M) {
coll_type = CS_TYPE_GB18030_2022_STROKE_CS;
} else {
if (charset_type != CHARSET_LATIN1) {
coll_type = static_cast<ObCollationType>(
@ -1625,6 +1675,7 @@ bool ObCharset::is_valid_collation(int64_t collation_type_int)
|| CS_TYPE_GB18030_CHINESE_CS == collation_type
|| CS_TYPE_LATIN1_SWEDISH_CI == collation_type
|| CS_TYPE_LATIN1_BIN == collation_type
|| is_gb18030_2022(collation_type)
;
}
@ -1677,6 +1728,19 @@ ObCharsetType ObCharset::charset_type_by_coll(ObCollationType collation_type)
charset_type = CHARSET_LATIN1;
break;
}
case CS_TYPE_GB18030_2022_BIN:
case CS_TYPE_GB18030_2022_PINYIN_CI:
case CS_TYPE_GB18030_2022_PINYIN_CS:
case CS_TYPE_GB18030_2022_RADICAL_CI:
case CS_TYPE_GB18030_2022_RADICAL_CS:
case CS_TYPE_GB18030_2022_STROKE_CI:
case CS_TYPE_GB18030_2022_STROKE_CS:
case CS_TYPE_GB18030_2022_ZH_0900_AS_CS:
case CS_TYPE_GB18030_2022_ZH2_0900_AS_CS:
case CS_TYPE_GB18030_2022_ZH3_0900_AS_CS: {
charset_type = CHARSET_GB18030_2022;
break;
}
default: {
break;
}
@ -1704,6 +1768,9 @@ ObNlsCharsetId ObCharset::charset_type_to_ora_charset_id(ObCharsetType cs_type)
case CHARSET_LATIN1:
cs_id = CHARSET_WE8MSWIN1252_ID;
break;
case CHARSET_GB18030_2022:
cs_id = CHARSET_ZHS32GB18030_2022_ID;
break;
default:
break;
}
@ -1729,6 +1796,9 @@ ObCharsetType ObCharset::ora_charset_type_to_charset_type(ObNlsCharsetId charset
break;
case CHARSET_WE8MSWIN1252_ID:
cs_type = CHARSET_LATIN1;
case CHARSET_ZHS32GB18030_2022_ID:
cs_type = CHARSET_GB18030_2022;
break;
default:
break;
}
@ -1848,6 +1918,8 @@ int ObCharset::aggregate_collation(
* binary和string比较binary比较
* string
*/
ObCharsetType cs1 = charset_type_by_coll(collation_type1);
ObCharsetType cs2 = charset_type_by_coll(collation_type2);
if (collation_level1 < collation_level2) {
res_type = collation_type1;
res_level = collation_level1;
@ -1860,7 +1932,7 @@ int ObCharset::aggregate_collation(
} else if (CS_TYPE_BINARY == collation_type2) {
res_level = collation_level2;
res_type = collation_type2;
} else if (charset_type_by_coll(collation_type1) != charset_type_by_coll(collation_type2)) {
} else if (cs1 != cs2) {
/**
*
*
@ -1870,10 +1942,11 @@ int ObCharset::aggregate_collation(
* utf8mb4和gb18030使utf8mb4
* utf16和gb18030使utf16
* gbk和gb18030使gb18030
* gb18030_2022 gb18030 AGGREGATE
* X与latin1的组合结果都为Xlatin1目前地位最低
*/
int res = AGGREGATE_2CHARSET[charset_type_by_coll(collation_type1)][charset_type_by_coll(collation_type2)];
int res = AGGREGATE_2CHARSET[cs1][cs2];
if (res == 1) {
res_type = collation_type1;
res_level = collation_level1;
@ -1884,50 +1957,67 @@ int ObCharset::aggregate_collation(
// 所有不能转换的情况都到这里
ret = OB_CANT_AGGREGATE_2COLLATIONS;
}
} else {
//处理相同字符集的情况,每种字符集单独考虑
if (collation_type1 == collation_type2) {
res_type = collation_type1;
res_level = collation_level1;
} else if (CS_LEVEL_EXPLICIT == collation_level1) {
ret = OB_CANT_AGGREGATE_2COLLATIONS;
// ERROR 1267 (HY000): Illegal mix of collations (utf8_general_ci,EXPLICIT) and (utf8_bin,EXPLICIT) for operation '='
// LOG_USER_ERROR(ret);
} else if (charset_type_by_coll(collation_type1) == CHARSET_UTF8MB4) {
if (collation_type1 == CS_TYPE_UTF8MB4_BIN || collation_type2 == CS_TYPE_UTF8MB4_BIN) {
res_type = CS_TYPE_UTF8MB4_BIN;
res_level = (CS_TYPE_UTF8MB4_BIN == collation_type1) ? collation_level1 : collation_level2;
} else {
// utf8mb4_unicode_ci和utf8mb4_general_ci的情况报错,和mysql兼容
ret = OB_CANT_AGGREGATE_2COLLATIONS;
}
} else if (charset_type_by_coll(collation_type1) == CHARSET_GBK) {
res_type = CS_TYPE_GBK_BIN;
res_level = (CS_TYPE_GBK_BIN == collation_type1) ? collation_level1 : collation_level2;
} else if (charset_type_by_coll(collation_type1) == CHARSET_UTF16) {
if (collation_type1 == CS_TYPE_UTF16_BIN || collation_type2 == CS_TYPE_UTF16_BIN) {
res_type = CS_TYPE_UTF16_BIN;
res_level = (CS_TYPE_UTF16_BIN == collation_type1) ? collation_level1 : collation_level2;
} else {
// utf16_unicode_ci和utf16_general_ci直接报错,不应该出现这种情况
ret = OB_CANT_AGGREGATE_2COLLATIONS;
}
} else if (charset_type_by_coll(collation_type1) == CHARSET_GB18030) {
res_type = CS_TYPE_GB18030_BIN;
res_level = (CS_TYPE_GB18030_BIN == collation_type1) ? collation_level1 : collation_level2;
} else if (charset_type_by_coll(collation_type1) == CHARSET_LATIN1) {
if (collation_type1 == CS_TYPE_LATIN1_BIN || collation_type2 == CS_TYPE_LATIN1_BIN) {
res_type = CS_TYPE_LATIN1_BIN;
res_level = (CS_TYPE_LATIN1_BIN == collation_type1) ? collation_level1 : collation_level2;
} else {
//未来可能支持latin1_german,与latin1_swedish不兼容
ret = OB_CANT_AGGREGATE_2COLLATIONS;
}
} else {
//处理相同字符集的情况,每种字符集单独考虑
if (collation_type1 == collation_type2) {
res_type = collation_type1;
res_level = collation_level1;
} else if (CS_LEVEL_EXPLICIT == collation_level1) {
ret = OB_CANT_AGGREGATE_2COLLATIONS;
// ERROR 1267 (HY000): Illegal mix of collations (utf8_general_ci,EXPLICIT) and (utf8_bin,EXPLICIT) for operation '='
// LOG_USER_ERROR(ret);
} else if (charset_type_by_coll(collation_type1) == CHARSET_UTF8MB4) {
if (collation_type1 == CS_TYPE_UTF8MB4_BIN || collation_type2 == CS_TYPE_UTF8MB4_BIN) {
res_type = CS_TYPE_UTF8MB4_BIN;
res_level = (CS_TYPE_UTF8MB4_BIN == collation_type1) ? collation_level1 : collation_level2;
} else {
// utf8mb4_unicode_ci和utf8mb4_general_ci的情况报错,和mysql兼容
ret = OB_CANT_AGGREGATE_2COLLATIONS;
}
} else if (charset_type_by_coll(collation_type1) == CHARSET_GBK) {
res_type = CS_TYPE_GBK_BIN;
res_level = (CS_TYPE_GBK_BIN == collation_type1) ? collation_level1 : collation_level2;
} else if (charset_type_by_coll(collation_type1) == CHARSET_UTF16) {
if (collation_type1 == CS_TYPE_UTF16_BIN || collation_type2 == CS_TYPE_UTF16_BIN) {
res_type = CS_TYPE_UTF16_BIN;
res_level = (CS_TYPE_UTF16_BIN == collation_type1) ? collation_level1 : collation_level2;
} else {
// utf16_unicode_ci和utf16_general_ci直接报错,不应该出现这种情况
ret = OB_CANT_AGGREGATE_2COLLATIONS;
}
} else if (charset_type_by_coll(collation_type1) == CHARSET_GB18030) {
res_type = CS_TYPE_GB18030_BIN;
res_level = (CS_TYPE_GB18030_BIN == collation_type1) ? collation_level1 : collation_level2;
} else if (charset_type_by_coll(collation_type1) == CHARSET_LATIN1) {
if (collation_type1 == CS_TYPE_LATIN1_BIN || collation_type2 == CS_TYPE_LATIN1_BIN) {
res_type = CS_TYPE_LATIN1_BIN;
res_level = (CS_TYPE_LATIN1_BIN == collation_type1) ? collation_level1 : collation_level2;
} else {
//未来可能支持latin1_german,与latin1_swedish不兼容
ret = OB_CANT_AGGREGATE_2COLLATIONS;
}
} else if (charset_type_by_coll(collation_type1) == CHARSET_GB18030_2022) {
res_type = CS_TYPE_GB18030_2022_BIN;
res_level = (CS_TYPE_GB18030_2022_BIN == collation_type1) ? collation_level1 : collation_level2;
} else {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("Unexpected charset", K(collation_type1), K(collation_type2), KCSTRING(lbt()));
}
}
if (OB_SUCC(ret)) {
ObCharsetType res_cs = charset_type_by_coll(res_type);
if (CHARSET_GB18030 == res_cs) {
if (CHARSET_GB18030_2022 == cs1 || CHARSET_GB18030_2022 == cs2) {
ret = OB_CANT_AGGREGATE_2COLLATIONS;
}
} else if (CHARSET_GB18030_2022 == res_cs) {
if (CHARSET_GB18030 == cs1 || CHARSET_GB18030 == cs2) {
ret = OB_CANT_AGGREGATE_2COLLATIONS;
}
}
}
if (OB_FAIL(ret)) {
LOG_WARN("Illegal mix of collations", K(ret),
"type1", ObCharset::collation_name(collation_type1),
@ -1990,6 +2080,10 @@ ObCollationType ObCharset::get_default_collation(ObCharsetType charset_type)
collation_type = CS_TYPE_LATIN1_SWEDISH_CI;
break;
}
case CHARSET_GB18030_2022: {
collation_type = CS_TYPE_GB18030_2022_PINYIN_CI;
break;
}
default: {
break;
}
@ -2032,6 +2126,10 @@ ObCollationType ObCharset::get_default_collation_oracle(ObCharsetType charset_ty
collation_type = CS_TYPE_LATIN1_BIN;
break;
}
case CHARSET_GB18030_2022: {
collation_type = CS_TYPE_GB18030_2022_BIN;
break;
}
default: {
break;
}
@ -2067,6 +2165,10 @@ int ObCharset::get_default_collation(ObCharsetType charset_type, ObCollationType
collation_type = CS_TYPE_LATIN1_SWEDISH_CI;
break;
}
case CHARSET_GB18030_2022: {
collation_type = CS_TYPE_GB18030_2022_PINYIN_CI;
break;
}
default: {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid charset type", K(ret), K(charset_type));
@ -2104,6 +2206,10 @@ ObCollationType ObCharset::get_bin_collation(ObCharsetType charset_type)
collation_type = CS_TYPE_LATIN1_BIN;
break;
}
case CHARSET_GB18030_2022: {
collation_type = CS_TYPE_GB18030_2022_BIN;
break;
}
default: {
break;
}
@ -2234,6 +2340,7 @@ bool ObCharset::is_default_collation(ObCollationType collation_type)
case CS_TYPE_UTF16_GENERAL_CI:
case CS_TYPE_GB18030_CHINESE_CI:
case CS_TYPE_LATIN1_SWEDISH_CI:
case CS_TYPE_GB18030_2022_PINYIN_CI:
case CS_TYPE_BINARY: {
ret = true;
break;
@ -2662,7 +2769,8 @@ int ObCharset::get_aggregate_len_unit(const ObCollationType collation_type, bool
|| CHARSET_LATIN1 == res_charset
|| CHARSET_UTF16 == res_charset
|| CHARSET_GBK == res_charset
|| CHARSET_GB18030 == res_charset) {
|| CHARSET_GB18030 == res_charset
|| CHARSET_GB18030_2022 == res_charset) {
len_in_byte = false;
} else if (CHARSET_BINARY == res_charset) {
len_in_byte = true;
@ -2873,7 +2981,9 @@ bool ObCharset::is_cs_nonascii(ObCollationType collation_type)
bool ObCharset::is_cjk_charset(ObCollationType collation_type)
{
ObCharsetType cs_type = ObCharset::charset_type_by_coll(collation_type);
bool is_cjk_charset = (cs_type == CHARSET_GBK || cs_type == CHARSET_GB18030);
bool is_cjk_charset = (cs_type == CHARSET_GBK ||
cs_type == CHARSET_GB18030 ||
cs_type == CHARSET_GB18030_2022);
return is_cjk_charset;
}
@ -2884,6 +2994,7 @@ bool ObCharset::is_valid_connection_collation(ObCollationType collation_type)
|| cs_type == CHARSET_LATIN1
|| cs_type == CHARSET_GBK
|| cs_type == CHARSET_GB18030
|| cs_type == CHARSET_GB18030_2022
|| cs_type == CHARSET_BINARY;
}
@ -2903,6 +3014,9 @@ const char *ObCharset::get_oracle_charset_name_by_charset_type(ObCharsetType cha
case CHARSET_GB18030:
ret = "ZHS32GB18030";
break;
case CHARSET_GB18030_2022:
ret = "ZHS32GB18030_2022";
break;
case CHARSET_LATIN1:
ret = "WE8MSWIN1252";
break;
@ -2931,6 +3045,9 @@ int ObCharset::get_nls_charset_id_by_charset_type(ObCharsetType charset_type)
case CHARSET_LATIN1:
ret_id = ObNlsCharsetId::CHARSET_WE8MSWIN1252_ID;
break;
case CHARSET_GB18030_2022:
ret_id = ObNlsCharsetId::CHARSET_ZHS32GB18030_2022_ID;
break;
default:
break;
}
@ -2941,6 +3058,9 @@ int ObCharset::get_nls_charset_id_by_charset_type(ObCharsetType charset_type)
int ObCharset::init_charset()
{
int ret = OB_SUCCESS;
if (OB_FAIL(init_gb18030_2022())) {
LOG_WARN("failed to init gb18030 2022", K(ret));
}
return ret;
}

View File

@ -37,25 +37,27 @@ enum ObCharsetType
CHARSET_UTF16 = 4,
CHARSET_GB18030 = 5,
CHARSET_LATIN1 = 6,
CHARSET_GB18030_2022 = 7,
CHARSET_MAX,
};
/*
*AGGREGATE_2CHARSET[CHARSET_UTF8MB4][CHARSET_GBK]=1CHARSET_UTF8MB4
*AGGREGATE_2CHARSET[CHARSET_GBK][CHARSET_UTF8MB4]=2CHARSET_UTF8MB4
*AGGREGATE_2CHARSET[CHARSET_UTF8MB4][CHARSET_GBK]=1CHARSET_UTF8MB4
*AGGREGATE_2CHARSET[CHARSET_GBK][CHARSET_UTF8MB4]=2CHARSET_UTF8MB4
*1&2,0
*return value means idx of the resule type 0 means OB_CANT_AGGREGATE_2COLLATIONS
*there is no possibly to reach AGGREGATE_2CHARSET[CHARSET_UTF8MB4][CHARSET_UTF8MB4] and so on
*/
static const int AGGREGATE_2CHARSET[CHARSET_MAX][CHARSET_MAX] = {
//CHARSET_INVALI,CHARSET_UTF8MB4...
{0,0,0,0,0,0,0},//CHARSET_INVALI
{0,0,0,0,0,0,0},//CHARSET_BINARY
{0,0,0,1,2,1,1},//CHARSET_UTF8MB4
{0,0,2,0,2,2,1},//CHARSET_GBK
{0,0,1,1,0,1,1},//CHARSET_UTF16
{0,0,2,1,2,0,1},//CHARSET_GB18030
{0,0,2,2,2,2,0},//CHARSET_LATIN1
{0,0,0,0,0,0,0,0},//CHARSET_INVALI
{0,0,0,0,0,0,0,0},//CHARSET_BINARY
{0,0,0,1,2,1,1,1},//CHARSET_UTF8MB4
{0,0,2,0,2,2,1,2},//CHARSET_GBK
{0,0,1,1,0,1,1,1},//CHARSET_UTF16
{0,0,2,1,2,0,1,0},//CHARSET_GB18030
{0,0,2,2,2,2,0,2},//CHARSET_LATIN1
{0,0,2,1,2,0,1,0} //CHARSET_GB18030_2022
};
enum ObCollationType
@ -73,6 +75,13 @@ enum ObCollationType
CS_TYPE_COLLATION_FREE = 100, // mysql中间没有使用这个
CS_TYPE_UTF16_UNICODE_CI = 101,
CS_TYPE_ANY = 125, // unused in mysql
CS_TYPE_GB18030_2022_BIN = 216, // unused in mysql
CS_TYPE_GB18030_2022_PINYIN_CI = 217, // unused in mysql
CS_TYPE_GB18030_2022_PINYIN_CS = 218, // unused in mysql
CS_TYPE_GB18030_2022_RADICAL_CI = 219, // unused in mysql
CS_TYPE_GB18030_2022_RADICAL_CS = 220, // unused in mysql
CS_TYPE_GB18030_2022_STROKE_CI = 221, // unused in mysql
CS_TYPE_GB18030_2022_STROKE_CS = 222, // unused in mysql
CS_TYPE_UTF8MB4_UNICODE_CI = 224,
CS_TYPE_GB18030_CHINESE_CI = 248,
CS_TYPE_GB18030_BIN = 249,
@ -88,6 +97,7 @@ enum ObCollationType
CS_TYPE_UTF16_ZH_0900_AS_CS,
CS_TYPE_GB18030_ZH_0900_AS_CS,
CS_TYPE_latin1_ZH_0900_AS_CS, //invaid, not really used
CS_TYPE_GB18030_2022_ZH_0900_AS_CS,
//radical-stroke order
CS_TYPE_RADICAL_BEGIN_MARK,
CS_TYPE_UTF8MB4_ZH2_0900_AS_CS,
@ -95,6 +105,7 @@ enum ObCollationType
CS_TYPE_UTF16_ZH2_0900_AS_CS,
CS_TYPE_GB18030_ZH2_0900_AS_CS,
CS_TYPE_latin1_ZH2_0900_AS_CS ,//invaid
CS_TYPE_GB18030_2022_ZH2_0900_AS_CS,
//stroke order
CS_TYPE_STROKE_BEGIN_MARK,
CS_TYPE_UTF8MB4_ZH3_0900_AS_CS,
@ -102,6 +113,7 @@ enum ObCollationType
CS_TYPE_UTF16_ZH3_0900_AS_CS,
CS_TYPE_GB18030_ZH3_0900_AS_CS,
CS_TYPE_latin1_ZH3_0900_AS_CS, //invaid
CS_TYPE_GB18030_2022_ZH3_0900_AS_CS,
CS_TYPE_MAX
};
@ -113,6 +125,7 @@ enum ObNlsCharsetId
CHARSET_WE8MSWIN1252_ID=31,
CHARSET_ZHS16GBK_ID = 852,
CHARSET_ZHS32GB18030_ID = 854,
CHARSET_ZHS32GB18030_2022_ID = 859, // not used in oracle
CHARSET_UTF8_ID = 871,
CHARSET_AL32UTF8_ID = 873,
CHARSET_AL16UTF16_ID = 2000,
@ -174,6 +187,9 @@ enum ObNLSCollation
NLS_COLLATION_SCHINESE_RADICAL_900,
NLS_COLLATION_SCHINESE_STROKE_900,
NLS_COLLATION_SCHINESE_PINYIN_M,
NLS_COLLATION_SCHINESE_PINYIN2_M,
NLS_COLLATION_SCHINESE_RADICAL2_M,
NLS_COLLATION_SCHINESE_STROKE2_M,
NLS_COLLATION_MAX
};
@ -195,8 +211,8 @@ public:
//比如latin1 1byte ,utf8mb4 4byte,转换因子为4,也可以理解为最多使用4字节存储一个字符
static const int32_t CharConvertFactorNum = 4;
static const int64_t VALID_CHARSET_TYPES = 6;
static const int64_t VALID_COLLATION_TYPES = 13;
static const int64_t VALID_CHARSET_TYPES = 7;
static const int64_t VALID_COLLATION_TYPES = 20;
static int init_charset();
// strntodv2 is an enhanced version of strntod,
@ -376,8 +392,20 @@ public:
|| CHARSET_GBK == charset_type
|| CHARSET_UTF16 == charset_type
|| CHARSET_GB18030 == charset_type
|| CHARSET_GB18030_2022 == charset_type
|| CHARSET_LATIN1 == charset_type;
}
static bool is_gb18030_2022(int64_t coll_type_int) {
ObCollationType coll_type = static_cast<ObCollationType>(coll_type_int);
return CS_TYPE_GB18030_2022_BIN <= coll_type && coll_type <= CS_TYPE_GB18030_2022_STROKE_CS;
}
static bool is_gb_charset(int64_t cs_type_int)
{
ObCharsetType charset_type = static_cast<ObCharsetType>(cs_type_int);
return CHARSET_GBK == charset_type
|| CHARSET_GB18030 == charset_type
|| CHARSET_GB18030_2022 == charset_type;
}
static ObCharsetType charset_type_by_coll(ObCollationType coll_type);
static int charset_name_by_coll(const ObString &coll_name, common::ObString &cs_name);
static int charset_name_by_coll(ObCollationType coll_type, common::ObString &cs_name);

View File

@ -161,6 +161,8 @@ typedef int (*ob_charset_conv_wc_mb)(const struct ObCharsetInfo *, ob_wc_t,
typedef size_t (*ob_charset_conv_case)(const struct ObCharsetInfo *,
char *, size_t, char *, size_t);
int init_gb18030_2022();
extern ObUCAInfo ob_uca_v400;
extern uchar ob_uca520_length[4352];
extern uint16 *ob_uca520_weight[4352];
@ -429,6 +431,13 @@ extern ObCharsetInfo ob_charset_utf16_bin;
extern ObCharsetInfo ob_charset_gb18030_chinese_ci;
extern ObCharsetInfo ob_charset_gb18030_chinese_cs;
extern ObCharsetInfo ob_charset_gb18030_bin;
extern ObCharsetInfo ob_charset_gb18030_2022_pinyin_ci;
extern ObCharsetInfo ob_charset_gb18030_2022_pinyin_cs;
extern ObCharsetInfo ob_charset_gb18030_2022_radical_ci;
extern ObCharsetInfo ob_charset_gb18030_2022_radical_cs;
extern ObCharsetInfo ob_charset_gb18030_2022_stroke_ci;
extern ObCharsetInfo ob_charset_gb18030_2022_stroke_cs;
extern ObCharsetInfo ob_charset_gb18030_2022_bin;
extern ObCollationHandler ob_collation_mb_bin_handler;
extern ObCharsetHandler ob_charset_utf8mb4_handler;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -200,6 +200,13 @@ TEST_F(TestCharset, case_insensitive_equal)
ASSERT_FALSE(yy);
yy = ObCharset::case_insensitive_equal(y3, y4, CS_TYPE_UTF8MB4_GENERAL_CI);
ASSERT_TRUE(yy);
yy = ObCharset::case_insensitive_equal(y1, y2, CS_TYPE_GB18030_2022_PINYIN_CI);
ASSERT_TRUE(yy);
yy = ObCharset::case_insensitive_equal(y2, y3, CS_TYPE_GB18030_2022_PINYIN_CI);
ASSERT_FALSE(yy);
yy = ObCharset::case_insensitive_equal(y3, y4, CS_TYPE_GB18030_2022_PINYIN_CI);
ASSERT_TRUE(yy);
}
TEST_F(TestCharset, hash_sort)
@ -369,6 +376,23 @@ TEST_F(TestCharset, test_find_gb18030_case_prob)
}
}
}
cs_type = CS_TYPE_GB18030_2022_BIN;
for (int i = 0; i < 256; i++) {
const ObUnicaseInfoChar *info = ObCharset::get_charset(cs_type)->caseinfo->page[i];
if (NULL != info) {
for (int j = 0; j < 256; j++) {
ASSERT_TRUE(OB_SUCCESS == ObCharset::wc_mb(cs_type, info[j].tolower, buf1, buf_len, length1));
ASSERT_TRUE(OB_SUCCESS == ObCharset::wc_mb(cs_type, info[j].toupper, buf2, buf_len, length2));
buf1[length1] = '\0';
buf2[length2] = '\0';
if (length1 != length2) {
ASSERT_TRUE(OB_SUCCESS == to_hex_cstr(buf1, length1, hex_buf1, buf_len));
ASSERT_TRUE(OB_SUCCESS == to_hex_cstr(buf2, length2, hex_buf2, buf_len));
std::cout<< info[j].tolower <<"," << info[j].toupper << "," << hex_buf1 << "," << hex_buf2 << std::endl;
}
}
}
}
}
/*
@ -417,7 +441,8 @@ TEST_F(TestCharset, test_zh_0900_as_cs)
};
ObCollationType coll_types[] = {CS_TYPE_UTF8MB4_ZH_0900_AS_CS, CS_TYPE_GBK_ZH_0900_AS_CS,
CS_TYPE_GB18030_ZH_0900_AS_CS, CS_TYPE_UTF16_ZH_0900_AS_CS};
CS_TYPE_GB18030_ZH_0900_AS_CS, CS_TYPE_UTF16_ZH_0900_AS_CS,
CS_TYPE_GB18030_2022_ZH_0900_AS_CS};
for (int i = 0; i < array_elements(coll_types); i++) {
ObCollationType coll_type = coll_types[i];
@ -472,7 +497,7 @@ TEST_F(TestCharset, test_zh2_0900_as_cs)
return output;
};
ObCollationType coll_types[] = {CS_TYPE_UTF8MB4_ZH2_0900_AS_CS};
ObCollationType coll_types[] = {CS_TYPE_UTF8MB4_ZH2_0900_AS_CS, CS_TYPE_GB18030_2022_ZH2_0900_AS_CS};
for (int i = 0; i < array_elements(coll_types); i++) {
ObCollationType coll_type = coll_types[i];
@ -567,6 +592,170 @@ TEST_F(TestCharset, toupper)
}
}
static uint get_magic_gb18030_2022_uni(uint code)
{
switch (code) {
case 0xFE59 : return 0x9FB4;
case 0xFE61 : return 0x9FB5;
case 0xFE66 : return 0x9FB6;
case 0xFE67 : return 0x9FB7;
case 0xFE6D : return 0x9FB8;
case 0xFE7E : return 0x9FB9;
case 0xFE90 : return 0x9FBA;
case 0xFEA0 : return 0x9FBB;
case 0xA6D9 : return 0xFE10;
case 0xA6DA : return 0xFE12;
case 0xA6DB : return 0xFE11;
case 0xA6DC : return 0xFE13;
case 0xA6DD : return 0xFE14;
case 0xA6DE : return 0xFE15;
case 0xA6DF : return 0xFE16;
case 0xA6EC : return 0xFE17;
case 0xA6ED : return 0xFE18;
case 0xA6F3 : return 0xFE19;
case 0x82359037 : return 0xE81E;
case 0x82359038 : return 0xE826;
case 0x82359039 : return 0xE82B;
case 0x82359130 : return 0xE82C;
case 0x82359131 : return 0xE832;
case 0x82359132 : return 0xE843;
case 0x82359133 : return 0xE854;
case 0x82359134 : return 0xE864;
case 0x84318236 : return 0xE78D;
case 0x84318238 : return 0xE78E;
case 0x84318237 : return 0xE78F;
case 0x84318239 : return 0xE790;
case 0x84318330 : return 0xE791;
case 0x84318331 : return 0xE792;
case 0x84318332 : return 0xE793;
case 0x84318333 : return 0xE794;
case 0x84318334 : return 0xE795;
case 0x84318335 : return 0xE796;
default: return 0;
}
}
static uint get_magic_uni_gb18030_2022(uint code)
{
switch (code) {
case 0x9FB4 : return 0xFE59;
case 0x9FB5 : return 0xFE61;
case 0x9FB6 : return 0xFE66;
case 0x9FB7 : return 0xFE67;
case 0x9FB8 : return 0xFE6D;
case 0x9FB9 : return 0xFE7E;
case 0x9FBA : return 0xFE90;
case 0x9FBB : return 0xFEA0;
case 0xFE10 : return 0xA6D9;
case 0xFE12 : return 0xA6DA;
case 0xFE11 : return 0xA6DB;
case 0xFE13 : return 0xA6DC;
case 0xFE14 : return 0xA6DD;
case 0xFE15 : return 0xA6DE;
case 0xFE16 : return 0xA6DF;
case 0xFE17 : return 0xA6EC;
case 0xFE18 : return 0xA6ED;
case 0xFE19 : return 0xA6F3;
case 0xE81E : return 0x82359037;
case 0xE826 : return 0x82359038;
case 0xE82B : return 0x82359039;
case 0xE82C : return 0x82359130;
case 0xE832 : return 0x82359131;
case 0xE843 : return 0x82359132;
case 0xE854 : return 0x82359133;
case 0xE864 : return 0x82359134;
case 0xE78D : return 0x84318236;
case 0xE78E : return 0x84318238;
case 0xE78F : return 0x84318237;
case 0xE790 : return 0x84318239;
case 0xE791 : return 0x84318330;
case 0xE792 : return 0x84318331;
case 0xE793 : return 0x84318332;
case 0xE794 : return 0x84318333;
case 0xE795 : return 0x84318334;
case 0xE796 : return 0x84318335;
default: return 0;
}
}
static inline uint gb18030_chs_to_code(const uchar *src, size_t srclen) {
uint r = 0;
ob_charset_assert(srclen == 1 || srclen == 2 || srclen == 4);
switch (srclen) {
case 1:
r = src[0];
break;
case 2:
r = (src[0] << 8) + src[1];
break;
case 4:
r = (src[0] << 24) + (src[1] << 16) + (src[2] << 8) + src[3];
break;
default:
ob_charset_assert(0);
}
return r;
}
TEST_F(TestCharset, check_gb18030_2022)
{
int ret = 0;
uchar s[4];
ob_charset_conv_mb_wc ob_mb_wc_gb18030_2022 = ob_charset_gb18030_2022_pinyin_ci.cset->mb_wc;
ob_charset_conv_mb_wc ob_mb_wc_gb18030 = ob_charset_gb18030_chinese_ci.cset->mb_wc;
ob_charset_conv_wc_mb ob_wc_mb_gb18030_2022 = ob_charset_gb18030_2022_pinyin_ci.cset->wc_mb;
ob_charset_conv_wc_mb ob_wc_mb_gb18030 = ob_charset_gb18030_chinese_ci.cset->wc_mb;
for (s[0] = 0x81; s[0] <= 0xFE; s[0]++) {
for (s[1] = 0x40; s[1] <= 0xFE; s[1]++) {
if (s[1] == 0x7F) {
continue;
}
uint gb_code = gb18030_chs_to_code(s, 2);
ob_wc_t uni_gb18030_2022;
ob_mb_wc_gb18030_2022(NULL, &uni_gb18030_2022, s, s + 4);
ulong target = get_magic_gb18030_2022_uni(gb_code);
if (target == 0) {
ob_mb_wc_gb18030(NULL, &target, s, s + 4);
}
ASSERT_TRUE(target = uni_gb18030_2022);
}
}
for (s[0] = 0x81; s[0] <= 0xFE; s[0]++) {
for (s[1] = 0x30; s[1] <= 0x39; s[1]++) {
for (s[2] = 0x81; s[2] <= 0xFE; s[2]++) {
for (s[3] = 0x30; s[3] <= 0x39; s[3]++) {
uint gb_code = gb18030_chs_to_code(s, 4);
ob_wc_t uni_gb18030_2022;
ob_mb_wc_gb18030_2022(NULL, &uni_gb18030_2022, s, s + 4);
ulong target = get_magic_gb18030_2022_uni(gb_code);
if (target == 0) {
ob_mb_wc_gb18030(NULL, &target, s, s + 4);
}
ASSERT_TRUE(target = uni_gb18030_2022);
}
}
}
}
for (uint i=0; i <= 0x10FFFF; i ++) {
uchar s_gb18030[4];
uchar s_gb18030_2022[4];
uint target = get_magic_uni_gb18030_2022(i);
if (target == 0) {
int len_gb18030 = ob_wc_mb_gb18030(NULL, i, s_gb18030, s_gb18030 + 4);
target = (len_gb18030 == 0) ? 0 : gb18030_chs_to_code(s_gb18030, len_gb18030);
}
int len_gb18030_2022 = ob_wc_mb_gb18030_2022(NULL, i, s_gb18030_2022, s_gb18030_2022 + 4);
uint code_gb18030_2022 = (len_gb18030_2022 == 0) ? 0 : gb18030_chs_to_code(s_gb18030_2022, len_gb18030_2022);
ASSERT_TRUE(target == code_gb18030_2022);
}
}
int main(int argc, char **argv)
{
OB_LOGGER.set_log_level("INFO");

View File

@ -30526,7 +30526,8 @@ int ObDDLService::update_oracle_tenant_sys_var(
|| CS_TYPE_LATIN1_BIN == tenant_schema.get_collation_type()
|| CS_TYPE_GBK_BIN == tenant_schema.get_collation_type()
|| CS_TYPE_UTF16_BIN == tenant_schema.get_collation_type()
|| CS_TYPE_GB18030_BIN == tenant_schema.get_collation_type()) {
|| CS_TYPE_GB18030_BIN == tenant_schema.get_collation_type()
|| CS_TYPE_GB18030_2022_BIN == tenant_schema.get_collation_type()) {
VAR_INT_TO_STRING(val_buf, tenant_schema.get_collation_type());
SET_TENANT_VARIABLE(SYS_VAR_CHARACTER_SET_SERVER, val_buf);
SET_TENANT_VARIABLE(SYS_VAR_CHARACTER_SET_DATABASE, val_buf);
@ -30539,6 +30540,9 @@ int ObDDLService::update_oracle_tenant_sys_var(
} else if (CHARSET_GB18030 ==
ObCharset::charset_type_by_coll(tenant_schema.get_collation_type())) {
OZ(databuff_printf(val_buf, OB_MAX_SYS_PARAM_VALUE_LENGTH, "%s", "ZHS32GB18030"));
} else if (CHARSET_GB18030_2022 ==
ObCharset::charset_type_by_coll(tenant_schema.get_collation_type())) {
OZ(databuff_printf(val_buf, OB_MAX_SYS_PARAM_VALUE_LENGTH, "%s", "ZHS32GB18030_2022"));
} else if (CHARSET_LATIN1 ==
ObCharset::charset_type_by_coll(tenant_schema.get_collation_type())) {
OZ(databuff_printf(val_buf, OB_MAX_SYS_PARAM_VALUE_LENGTH, "%s", "WE8MSWIN1252"));

View File

@ -377,7 +377,14 @@ typedef ObConstIntMapping<0,
CS_TYPE_GB18030_CHINESE_CI, 1,
CS_TYPE_GB18030_BIN, 1,
CS_TYPE_LATIN1_SWEDISH_CI,1,
CS_TYPE_LATIN1_BIN,1 > SupportedCollections;
CS_TYPE_LATIN1_BIN,1,
CS_TYPE_GB18030_2022_BIN, 1,
CS_TYPE_GB18030_2022_PINYIN_CI, 1,
CS_TYPE_GB18030_2022_PINYIN_CS, 1,
CS_TYPE_GB18030_2022_RADICAL_CI, 1,
CS_TYPE_GB18030_2022_RADICAL_CS, 1,
CS_TYPE_GB18030_2022_STROKE_CI, 1,
CS_TYPE_GB18030_2022_STROKE_CS, 1 > SupportedCollections;
// bool is_calc_with_end_space(ObObjType type1, ObObjType type2,
// bool is_oracle_mode,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -12566,6 +12566,8 @@ def_table_schema(
WHEN 3 THEN "gbk"
WHEN 4 THEN "utf16"
WHEN 5 THEN "gb18030"
WHEN 6 THEN "latin1"
WHEN 7 THEN "gb18030_2022"
ELSE NULL
END
AS CHAR(64)
@ -24809,6 +24811,8 @@ def_table_schema(
WHEN 3 THEN "gbk"
WHEN 4 THEN "utf16"
WHEN 5 THEN "gb18030"
WHEN 6 THEN "latin1"
WHEN 7 THEN "gb18030_2022"
ELSE NULL
END AS CHAR(64)) AS CHARACTER_SET_NAME,
CAST(CASE rp.param_coll_type

View File

@ -488,7 +488,8 @@ int ObOrderPerservingEncoder::encode_from_string_varlen(
}
} else if (cs == CS_TYPE_COLLATION_FREE || cs == CS_TYPE_BINARY) {
convert_ob_charset_utf8mb4_bin((unsigned char *)str.ptr(), str.length(), to, to_len);
} else if (cs == CS_TYPE_UTF8MB4_BIN || cs == CS_TYPE_GBK_BIN || cs == CS_TYPE_GB18030_BIN) {
} else if (cs == CS_TYPE_UTF8MB4_BIN || cs == CS_TYPE_GBK_BIN
|| cs == CS_TYPE_GB18030_BIN || cs == CS_TYPE_GB18030_2022_BIN) {
if (is_mem) {
convert_ob_charset_utf8mb4_bin((unsigned char *)str.ptr(), str.length(), to, to_len);
} else {
@ -496,7 +497,8 @@ int ObOrderPerservingEncoder::encode_from_string_varlen(
}
} else if (cs == CS_TYPE_UTF8MB4_GENERAL_CI || cs == CS_TYPE_GBK_CHINESE_CI
|| cs == CS_TYPE_UTF16_GENERAL_CI || cs == CS_TYPE_UTF16_BIN
|| cs == CS_TYPE_GB18030_CHINESE_CI) {
|| cs == CS_TYPE_GB18030_CHINESE_CI ||
(CS_TYPE_GB18030_2022_PINYIN_CI <= cs && cs <= CS_TYPE_GB18030_2022_STROKE_CS)) {
int64_t res_len = ObCharset::sortkey_var_len(cs, str.ptr(), str.length(), (char *)to,
max_buf_len, is_mem, is_valid_uni);
if (res_len < 0) {
@ -529,7 +531,8 @@ int ObOrderPerservingEncoder::encode_from_string_varlen(
}
} else if (cs == CS_TYPE_COLLATION_FREE || cs == CS_TYPE_BINARY) {
convert_ob_charset_utf8mb4_bin((unsigned char *)str.ptr(), str.length(), to, to_len);
} else if (cs == CS_TYPE_UTF8MB4_BIN || cs == CS_TYPE_GBK_BIN || cs == CS_TYPE_GB18030_BIN) {
} else if (cs == CS_TYPE_UTF8MB4_BIN || cs == CS_TYPE_GBK_BIN ||
cs == CS_TYPE_GB18030_BIN || cs == CS_TYPE_GB18030_2022_BIN) {
if (param.is_memcmp_) {
convert_ob_charset_utf8mb4_bin((unsigned char *)str.ptr(), str.length(), to, to_len);
} else {
@ -537,7 +540,8 @@ int ObOrderPerservingEncoder::encode_from_string_varlen(
}
} else if (cs == CS_TYPE_UTF8MB4_GENERAL_CI || cs == CS_TYPE_GBK_CHINESE_CI
|| cs == CS_TYPE_UTF16_GENERAL_CI || cs == CS_TYPE_UTF16_BIN
|| cs == CS_TYPE_GB18030_CHINESE_CI) {
|| cs == CS_TYPE_GB18030_CHINESE_CI ||
(CS_TYPE_GB18030_2022_PINYIN_CI <= cs && cs <= CS_TYPE_GB18030_2022_STROKE_CS)) {
int64_t res_len = ObCharset::sortkey_var_len(cs, str.ptr(), str.length(), (char *)to,
max_buf_len, param.is_memcmp_, param.is_valid_uni_);
if (!param.is_valid_uni_) {
@ -562,7 +566,7 @@ int ObOrderPerservingEncoder::encode_from_string_fixlen(
ret = OB_BUF_NOT_ENOUGH;
LOG_TRACE("no enough memory to do encoding for fixed string", K(ret));
} else if (cs == CS_TYPE_COLLATION_FREE || cs == CS_TYPE_BINARY || cs == CS_TYPE_UTF8MB4_BIN
|| cs == CS_TYPE_GBK_BIN || cs == CS_TYPE_GB18030_BIN) {
|| cs == CS_TYPE_GBK_BIN || cs == CS_TYPE_GB18030_BIN || cs == CS_TYPE_GB18030_2022_BIN) {
MEMCPY(to, str.ptr(), str.length());
to_len += str.length();
} else {
@ -770,6 +774,7 @@ int ObOrderPerservingEncoder::encode_tails(unsigned char *to, int64_t max_buf_le
to_len += 2;
} else if (cs == CS_TYPE_UTF8MB4_BIN
|| cs == CS_TYPE_GBK_BIN || cs == CS_TYPE_GB18030_BIN
|| cs == CS_TYPE_GB18030_2022_BIN
|| cs == CS_TYPE_UTF8MB4_GENERAL_CI) {
if (with_empty_str) {
*to = 0x00;
@ -799,7 +804,9 @@ int ObOrderPerservingEncoder::encode_tails(unsigned char *to, int64_t max_buf_le
*(to+3) = 0x20;
}
to_len += 4;
} else if (cs == CS_TYPE_UTF16_BIN || cs == CS_TYPE_GB18030_CHINESE_CI) {
} else if (cs == CS_TYPE_UTF16_BIN
|| cs == CS_TYPE_GB18030_CHINESE_CI
|| (CS_TYPE_GB18030_2022_PINYIN_CI <= cs && cs <= CS_TYPE_GB18030_2022_STROKE_CS)) {
if (with_empty_str) {
MEMSET(to, 0x00, 4);
to += 4;

View File

@ -136,7 +136,7 @@ public:
|| cs == CS_TYPE_GBK_CHINESE_CI
// utf 16 will be open later
//|| cs == CS_TYPE_UTF16_GENERAL_CI || cs == CS_TYPE_UTF16_BIN
|| cs == CS_TYPE_GB18030_CHINESE_CI);
|| cs == CS_TYPE_GB18030_CHINESE_CI || ObCharset::is_gb18030_2022(cs));
}
private:

View File

@ -12844,14 +12844,12 @@ int ObObjCaster::is_order_consistent(const ObObjMeta &from,
int64_t idx_from = get_idx_of_collate(from_cs_type);
int64_t idx_to = get_idx_of_collate(to_cs_type);
int64_t idx_res = get_idx_of_collate(res_cs_type);
if (OB_UNLIKELY(idx_from < 0 || idx_to < 0 || idx_res < 0
||idx_from >= ObCharset::VALID_COLLATION_TYPES
||idx_to >= ObCharset::VALID_COLLATION_TYPES
||idx_res >= ObCharset::VALID_COLLATION_TYPES)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected collation type", K(ret), K(from), K(to));
} else {
if (idx_from >= 0 && idx_from < VALID_OC_COLLATION_TYPES &&
idx_to >= 0 && idx_to < VALID_OC_COLLATION_TYPES &&
idx_res >= 0 && idx_res < VALID_OC_COLLATION_TYPES) {
result = ORDER_CONSISTENT_WITH_BOTH_STRING[idx_from][idx_to][idx_res];
} else {
result = (from_cs_type == to_cs_type) && (from_cs_type == res_cs_type);
}
}
} else {
@ -12860,54 +12858,6 @@ int ObObjCaster::is_order_consistent(const ObObjMeta &from,
return ret;
}
/* make sure that you have read the doc before you call these functions !
*
* doc:
*/
int ObObjCaster::is_injection(const ObObjMeta &from,
const ObObjMeta &to,
bool &result)
{
int ret = OB_SUCCESS;
result = false;
ObObjTypeClass tc1 = from.get_type_class();
ObObjTypeClass tc2 = to.get_type_class();
if (OB_UNLIKELY(ob_is_invalid_obj_tc(tc1) || ob_is_invalid_obj_tc(tc2))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected obj type class", K(ret), K(from), K(to));
} else if (from.is_string_or_lob_locator_type() && to.is_string_or_lob_locator_type()) {
ObCollationType res_cs_type = CS_TYPE_INVALID;
ObCollationLevel res_cs_level = CS_LEVEL_INVALID;
ObCollationType from_cs_type = from.get_collation_type();
ObCollationType to_cs_type = to.get_collation_type();
if (OB_FAIL(ObCharset::aggregate_collation(from.get_collation_level(),
from_cs_type,
to.get_collation_level(),
to_cs_type,
res_cs_level,
res_cs_type))) {
LOG_WARN("fail to aggregate collation", K(ret), K(from), K(to));
} else {
int64_t idx_from = get_idx_of_collate(from_cs_type);
int64_t idx_to = get_idx_of_collate(to_cs_type);
int64_t idx_res = get_idx_of_collate(res_cs_type);
if (OB_UNLIKELY(idx_from < 0 || idx_to < 0 || idx_res < 0
||idx_from >= ObCharset::VALID_COLLATION_TYPES
||idx_to >= ObCharset::VALID_COLLATION_TYPES
||idx_res >= ObCharset::VALID_COLLATION_TYPES)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected collation type", K(ret), K(from), K(to));
} else {
result = INJECTION_WITH_BOTH_STRING[idx_from][idx_to][idx_res];
}
}
} else {
result = INJECTION[tc1][tc2];
}
return ret;
}
/**
* @brief ObObjCaster::oracle_number_to_char
* number类型转为兼容oracle的字符串
@ -14019,658 +13969,29 @@ const bool ObObjCaster::ORDER_CONSISTENT[ObMaxTC][ObMaxTC] =
},
};
const bool ObObjCaster::ORDER_CONSISTENT_WITH_BOTH_STRING[ObCharset::VALID_COLLATION_TYPES][ObCharset::VALID_COLLATION_TYPES][ObCharset::VALID_COLLATION_TYPES] =
const bool ObObjCaster::ORDER_CONSISTENT_WITH_BOTH_STRING[ObObjCaster::VALID_OC_COLLATION_TYPES][ObObjCaster::VALID_OC_COLLATION_TYPES][ObObjCaster::VALID_OC_COLLATION_TYPES] =
{
//CS_TYPE_UTF8MB4_GENERAL_CI
{
//ci //utf8bin //bin
{true, true, true, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{true, true, true},
{false, false, false},
{false, false, false},
},
//CS_TYPE_UTF8MB4_BIN
{
//ci //utf8bin //bin
{true, true , true, false, false, false, false, false, false, false, false, false, false},
{false, true , true, false, false, false, false, false, false, false, false, false, false},
{false, true , true, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{true, true , true},
{false, true , true},
{false, true , true},
},
//CS_TYPE_BINARY
{
//ci //utf8bin //bin
{true, true , true, false, false, false, false, false, false, false, false, false, false},
{false, true , true, false, false, false, false, false, false, false, false, false, false},
{false, true , true, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{true, true , true},
{false, true , true},
{false, true , true},
},
//CS_TYPE_GBKBIN
{
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
},
//CS_TYPE_CHINESE_CI
{
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
},
{
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
},
{
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
},
{
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
},
{
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
},
{
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
},
{
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
},
{
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
},
{
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
{false, false , false, false, false, false, false, false, false, false, false, false, false},
},
};
const bool ObObjCaster::INJECTION[ObMaxTC][ObMaxTC] =
{
// null
{
false, // null
false, // int
false, // uint
false, // float
false, // double
false, // number
false, // datetime
false, // date
false, // time
false, // year
false, // string
false, // extend
false, // unknown
false, // lob
false, // bit
false, // enumset
false, // enumsetInner
false, // OTimestamp
false, // raw
},
// int
{
false, // null
true, // int
true, // uint
true, // float
true, // double
true, // number
true, // datetime
true, // date
true, // time
true, // year
false, // string
false, // extend
false, // unknown
false, // lob
true, // bit
true, //enumset
false, //enumsetInner
true, //OTimestamp
false, // raw
},
// uint
{
false, // null
true, // int
true, // uint
true, // float
true, // double
true, // number
true, // datetime
true, // date
true, // time
true, // year
false, // string
false, // extend
false, // unknown
false, // lob
true, // bit
true, //enumset
false, //enumsetInner
true, //OTimestamp
false, // raw
},
// float
{
false, // null
false, // int
false, // uint
true, // float
true, // double
false, // number
true, // datetime
true, // date
true, // time
true, // year
false, // string
false, // extend
false, // unknown
false, // lob
false, // bit
false, //enumset
false, //enumsetInner
true, //OTimestamp
false, // raw
},
// double
{
false, // null
false, // int
false, // uint
true, // float
true, // double
false, // number
true, // datetime
true, // date
true, // time
true, // year
false, // string
false, // extend
false, // unknown
false, // lob
false, // bit
false, //enumset
false, //enumsetInner
true, //OTimestamp
false, // raw
},
// number
{
false, // null
true, // int
true, // uint
true, // float
true, // double
true, // number
true, // datetime
true, // date
true, // time
true, // year
false, // string
false, // extend
false, // unknown
false, // lob
true, // bit
true, //enumset
false, //enumsetInner
true, //OTimestamp
false, // raw
},
// datetime
{
false, // null
false, // int
false, // uint
false, // float
false, // double
false, // number //2010-01-01 12:34:56.12345 = 20100101123456.1234520 and 2010-01-01 12:34:56.12345 = 20100101123456.1234530
true, // datetime
true, // date
true, // time
true, // year
false, // string
false, // extend
false, // unknown
false, // lob
false, // bit
false, //enumset
false, //enumsetInner
true, //OTimestamp
false, // raw
},
// date
{
false, // null
false, // int //think about 0000-00-00
false, // uint
false, // float
false, // double
false, // number
true, // datetime
true, // date
true, // time
true, // year
false, // string
false, // extend
false, // unknown
false, // lob
false, // bit
false, //enumset
false, //enumsetInner
true, //OTimestamp
false, // raw
},
// time
{
false, // null
false, // int
false, // uint
false, // float
false, // double
false, // number //think about time(5) = decimal(40,7)
true, // datetime
true, // date
true, // time
true, // year
false, // string //00:12:34 = "00:12:34" and 00:12:34 = "00:12:34.000"
false, // extend
false, // unknown
false, // lob
false, // bit
false, //enumset
false, //enumsetInner
true, //OTimestamp
false, // raw
},
// year //0000-9999
{
false, // null
true, // int
true, // uint
true, // float
true, // double
true, // number
true, // datetime //1999 = 1999-00-00 00:00:00
true, // date //1999 = 1999-00-00
true, // time
true , // year
false, // string //1999 = "99" and 1999 = "1999"
false, // extend
false, // unknown
false, // lob
true, // bit
true, //enumset
false, //enumsetInner
true, //OTimestamp
false, // raw
},
// string
{
false, // null
true, // int
true, // uint
true, // float
true, // double
true, // number
true, // datetime
true, // date
true, // time
true, // year
false, // string
false, // extend
false, // unknown
false, // lob
true, // bit
true, //enumset
false, //enumsetInner
true, //OTimestamp
false, // raw
},
// extend
{
false, // null
false, // int
false, // uint
false, // float
false, // double
false, // number
false, // datetime
false, // date
false, // time
false, // year
false, // string
false, // extend
false, // unknown
false, // lob
false, // bit
false, //enumset
false, //enumsetInner
false, //OTimestamp
false, // raw
},
// unknown
{
false, // null
false, // int
false, // uint
false, // float
false, // double
false, // number
false, // datetime
false, // date
false, // time
false, // year
false, // string
false, // extend
false, // unknown
false, // lob
false, // bit
false, //enumset
false, //enumsetInner
false, //OTimestamp
false, // raw
},
// lob
{
false, // null
true, // int
true, // uint
true, // float
true, // double
true, // number
true, // datetime
true, // date
true, // time
true, // year
false, // string
false, // extend
false, // unknown
false, // lob
true, // bit
true, //enumset
false, //enumsetInner
true, //OTimestamp
false, // raw
},
// bit
{
false, // null
true, // int
true, // uint
true, // float
true, // double
true, // number
true, // datetime
true, // date
true, // time
true, // year
false, // string
false, // extend
false, // unknown
false, // lob
true, // bit
true, //enumset
false, //enumsetInner
true, //OTimestamp
false, // raw
},
//setenum
{
false, // null
true, // int
true, // uint
true, // float
true, // double
true, // number
false, // datetime
false, // date
false, // time
true, // year
false, // string
false, // extend
false, // unknown
false, // lob
true, // bit
true, //enumset
false, //enumsetInner
false, //OTimestamp
false, // raw
},
//setenumInner
{
false, // null
false, // int
false, // uint
false, // float
false, // double
false, // number
false, // datetime
false, // date
false, // time
false, // year
false, // string
false, // extend
false, // unknown
false, // lob
false, // bit
false, //enumset
false, //enumsetInner
false, //OTimestamp
false, // raw
},
// OTimestamp
{
false, // null
false, // int
false, // uint
false, // float
false, // double
false, // number //2010-01-01 12:34:56.12345 = 20100101123456.1234520 and 2010-01-01 12:34:56.12345 = 20100101123456.1234530
true, // datetime
true, // date
true, // time
true, // year
false, // string
false, // extend
false, // unknown
false, // lob
false, // bit
false, //enumset
false, //enumsetInner
true, //OTimestamp
false, // raw
},
// raw
{
false, // null
true, // int
true, // uint
true, // float
true, // double
true, // number
true, // datetime
true, // date
true, // time
true, // year
false, // string
false, // extend
false, // unknown
false, // lob
true, // bit
true, //enumset
false, //enumsetInner
true, //OTimestamp
false, // raw
},
};
const bool ObObjCaster::INJECTION_WITH_BOTH_STRING[ObCharset::VALID_COLLATION_TYPES][ObCharset::VALID_COLLATION_TYPES][ObCharset::VALID_COLLATION_TYPES] =
{
//CS_TYPE_UTF8MB4_GENERAL_CI
{
//ci //utf8bin //bin
{true, true, true},//CS_TYPE_UTF8MB4_GENERAL_CI
{false, true , true},//CS_TYPE_UTF8MB4_BIN
{false, true , true},//CS_TYPE_BINARY
},
//CS_TYPE_UTF8MB4_BIN
{
//ci //utf8bin //bin
{true, true , true},//CS_TYPE_UTF8MB4_GENERAL_CI
{false, true , true},//CS_TYPE_UTF8MB4_BIN
{false, true , true},//CS_TYPE_BINARY
},
//CS_TYPE_BINARY
{
//ci //utf8bin //bin
{true, true , true},//CS_TYPE_UTF8MB4_GENERAL_CI
{false, true , true},//CS_TYPE_UTF8MB4_BIN
{false, true , true},//CS_TYPE_BINARY
}
};
int ObObjEvaluator::is_true(const ObObj &obj, ObCastMode cast_mode, bool &result)

View File

@ -431,47 +431,16 @@ private:
case CS_TYPE_BINARY:
idx = 2;
break;
case CS_TYPE_GBK_BIN:
idx = 3;
break;
case CS_TYPE_GBK_CHINESE_CI:
idx = 4;
break;
case CS_TYPE_UTF16_GENERAL_CI:
idx = 5;
break;
case CS_TYPE_UTF16_BIN:
idx = 6;
break;
case CS_TYPE_UTF8MB4_UNICODE_CI:
idx = 7;
break;
case CS_TYPE_UTF16_UNICODE_CI:
idx = 8;
break;
case CS_TYPE_GB18030_BIN:
idx = 9;
break;
case CS_TYPE_GB18030_CHINESE_CI:
idx = 10;
break;
case CS_TYPE_LATIN1_BIN:
idx = 11;
break;
case CS_TYPE_LATIN1_SWEDISH_CI:
idx = 12;
break;
default:
idx = -1;
}
return idx;
}
private:
static const int64_t VALID_OC_COLLATION_TYPES = 3;
static const bool CAST_MONOTONIC[ObMaxTC][ObMaxTC];
static const bool ORDER_CONSISTENT[ObMaxTC][ObMaxTC];
static const bool ORDER_CONSISTENT_WITH_BOTH_STRING[ObCharset::VALID_COLLATION_TYPES][ObCharset::VALID_COLLATION_TYPES][ObCharset::VALID_COLLATION_TYPES];
static const bool INJECTION[ObMaxTC][ObMaxTC];
static const bool INJECTION_WITH_BOTH_STRING[ObCharset::VALID_COLLATION_TYPES][ObCharset::VALID_COLLATION_TYPES][ObCharset::VALID_COLLATION_TYPES];
static const bool ORDER_CONSISTENT_WITH_BOTH_STRING[VALID_OC_COLLATION_TYPES][VALID_OC_COLLATION_TYPES][VALID_OC_COLLATION_TYPES];
};
class ObObjEvaluator

View File

@ -114,6 +114,7 @@ public:
str, end, nrows, escape_buf, escaped_buf_end, handle_one_line, errors, is_end_file);
break;
case common::CHARSET_GB18030:
case common::CHARSET_GB18030_2022:
ret = scan_proto<common::CHARSET_GB18030, handle_func, DO_ESCAPE>(
str, end, nrows, escape_buf, escaped_buf_end, handle_one_line, errors, is_end_file);
break;

View File

@ -11757,12 +11757,19 @@ int ObDatumCast::is_trivial_cast(const ObObjType in_type,
is_trivial_cast = false;
int ret = OB_SUCCESS;
ObCharsetType in_cs = ObCharset::charset_type_by_coll(in_cs_type);
ObCharsetType out_cs = ObCharset::charset_type_by_coll(out_cs_type);
ObObjTypeClass in_tc = ob_obj_type_class(in_type);
ObObjTypeClass out_tc = ob_obj_type_class(out_type);
const bool is_same_charset = (ob_is_string_type(in_type) &&
ob_is_string_type(out_type) &&
ObCharset::charset_type_by_coll(in_cs_type) ==
ObCharset::charset_type_by_coll(out_cs_type));
(in_cs == out_cs ||
/** GB18030 and GB18030_2022 have the same code points,
* but they have different mapping to unicode.
* So, we do not do charset_convert for them in cast*/
(in_cs == CHARSET_GB18030 && out_cs == CHARSET_GB18030_2022) ||
(in_cs == CHARSET_GB18030_2022 && out_cs == CHARSET_GB18030)));
const bool is_clob_to_nonclob = (ob_is_clob(in_type, in_cs_type)
&& !ob_is_clob(out_type, out_cs_type));
const bool is_nonblob_to_blob = ((false == ob_is_blob(in_type, in_cs_type)) &&

View File

@ -235,10 +235,17 @@ int ObExprConvertOracle::calc_convert_oracle_expr(const ObExpr &expr,
ObTextStringDatumResult output_result(expr.datum_meta_.type_, &expr, &ctx, &res_datum);
int64_t src_byte_len = 0;
int64_t buf_size = 0;
ObCharsetType src_cs = ObCharset::charset_type_by_coll(src_cs_type);
ObCharsetType dst_cs = ObCharset::charset_type_by_coll(dst_cs_type);
if (ob_is_string_tc(expr.datum_meta_.type_)
&& (src.length() == 0
|| ObCharset::charset_type_by_coll(src_cs_type) == ObCharset::charset_type_by_coll(dst_cs_type)
|| ObCharset::charset_type_by_coll(dst_cs_type) == CHARSET_BINARY)) {
|| src_cs == dst_cs
|| dst_cs == CHARSET_BINARY
/** GB18030 and GB18030_2022 have the same code points,
* but they have different mapping to unicode.
* So, we do not do charset_convert for them in convert*/
|| (src_cs == CHARSET_GB18030 && dst_cs == CHARSET_GB18030_2022)
|| (src_cs == CHARSET_GB18030_2022 && dst_cs == CHARSET_GB18030))) {
dst = src; // no need convert
} else if (OB_FAIL(src_iter.init(0, NULL, &calc_alloc))) {
LOG_WARN("init src_iter failed ", K(ret), K(src_iter));

View File

@ -83,7 +83,10 @@ int ObExprNLSSort::convert_to_coll_code(ObEvalCtx &ctx,
ObString &to_str)
{
int ret = OB_SUCCESS;
if (to_type == CS_TYPE_GB18030_CHINESE_CS) {
if (to_type == CS_TYPE_GB18030_CHINESE_CS ||
to_type == CS_TYPE_GB18030_2022_PINYIN_CS ||
to_type == CS_TYPE_GB18030_2022_RADICAL_CS ||
to_type == CS_TYPE_GB18030_2022_STROKE_CS) {
char *conv_buf = NULL;
const int32_t MostBytes = 4; //most 4 bytes
size_t conv_buf_len = from_str.length() * MostBytes;

View File

@ -549,6 +549,10 @@ int ObSQLUtils::is_charset_data_version_valid(ObCharsetType charset_type, const
ret = OB_NOT_SUPPORTED;
SQL_LOG(WARN, "latin1 not supported when data_version < 4_1_0_0", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.1, charset latin1 is");
} else if (CHARSET_GB18030_2022 == charset_type && data_version < DATA_VERSION_4_2_0_0 ) {
ret = OB_NOT_SUPPORTED;
SQL_LOG(WARN, "GB18030_2022 not supported when data_version < 4_2_0_0", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.2, charset GB18030_2022 is");
}
return ret;
}

View File

@ -188,7 +188,7 @@ inline int64_t ObFastParserBase::is_identifier_flags(const int64_t pos)
// added here to avoid the next judgment whether it is utf8 char or gbk char
} else if (CHARSET_UTF8MB4 == charset_type_ || CHARSET_UTF16 == charset_type_) {
idf_pos = is_utf8_char(pos);
} else if (CHARSET_GBK == charset_type_ || CHARSET_GB18030 == charset_type_) {
} else if (ObCharset::is_gb_charset(charset_type_)) {
idf_pos = is_gbk_char(pos);
} else if (CHARSET_LATIN1 == charset_type_) {
idf_pos = is_latin1_char(pos);
@ -943,7 +943,7 @@ char *ObFastParserBase::parse_strdup_with_replace_multi_byte_char(
} else {
out_str[len++] = str[i];
}
} else if (CHARSET_GBK == charset_type_ || CHARSET_GB18030 == charset_type_) {
} else if (ObCharset::is_gb_charset(charset_type_)) {
if (i + 1 < dup_len) {
if (str[i] == (char)0xa1 && str[i+1] == (char)0xa1) {//gbk multi byte space
out_str[len++] = ' ';
@ -1162,7 +1162,7 @@ inline int64_t ObFastParserBase::is_first_identifier_flags(const int64_t pos)
// added here to avoid the next judgment whether it is utf8 char or gbk char
} else if (CHARSET_UTF8MB4 == charset_type_ || CHARSET_UTF16 == charset_type_) {
idf_pos = is_utf8_char(pos);
} else if (CHARSET_GBK == charset_type_ || CHARSET_GB18030 == charset_type_) {
} else if (ObCharset::is_gb_charset(charset_type_)) {
idf_pos = is_gbk_char(pos);
} else if (CHARSET_LATIN1 == charset_type_) {
idf_pos = is_latin1_char(pos);

View File

@ -214,7 +214,7 @@ protected:
byte_len = 3; \
} \
} else if (is_oracle_mode_ \
&& (CHARSET_GBK == charset_type_ || CHARSET_GB18030 == charset_type_)) { \
&& (ObCharset::is_gb_charset(charset_type_))) { \
if (pos + 2 < len && -1 != is_gbk_multi_byte_##CHARACTER_NAME(str, pos)) { \
bool_ret = true; \
byte_len = 2; \

View File

@ -207,6 +207,13 @@ char *parse_strdup_with_replace_multi_byte_char(const char *str, int *connection
switch (*connection_collation_) {
case 28/*CS_TYPE_GBK_CHINESE_CI*/:
case 87/*CS_TYPE_GBK_BIN*/:
case 216/*CS_TYPE_GB18030_2022_BIN*/:
case 217/*CS_TYPE_GB18030_2022_PINYIN_CI*/:
case 218/*CS_TYPE_GB18030_2022_PINYIN_CS*/:
case 219/*CS_TYPE_GB18030_2022_RADICAL_CI*/:
case 220/*CS_TYPE_GB18030_2022_RADICAL_CS*/:
case 221/*CS_TYPE_GB18030_2022_STROKE_CI*/:
case 222/*CS_TYPE_GB18030_2022_STROKE_CS*/:
case 248/*CS_TYPE_GB18030_CHINESE_CI*/:
case 249/*CS_TYPE_GB18030_BIN*/: {
if (i + 1 < dup_len) {

View File

@ -112,6 +112,7 @@ _UTF8 { REPUT_TOKEN_NEG_SIGN(_UTF8); }
_UTF8MB4 { REPUT_TOKEN_NEG_SIGN(_UTF8MB4); }
_GBK { REPUT_TOKEN_NEG_SIGN(_GBK); }
_GB18030 { REPUT_TOKEN_NEG_SIGN(_GB18030); }
_GB18030_2022 { REPUT_TOKEN_NEG_SIGN(_GB18030_2022); }
_LATIN1 { REPUT_TOKEN_NEG_SIGN(_LATIN1); }
_BINARY { REPUT_TOKEN_NEG_SIGN(_BINARY); }
_UTF16 { REPUT_TOKEN_NEG_SIGN(_UTF16); }

View File

@ -189,7 +189,7 @@ APPEND NO_GATHER_OPTIMIZER_STATISTICS GATHER_OPTIMIZER_STATISTICS DBMS_STATS
NEG_SIGN
%token /*can not be relation name*/
_BINARY _UTF8 _UTF8MB4 _GBK _UTF16 _GB18030 _LATIN1 CNNOP
_BINARY _UTF8 _UTF8MB4 _GBK _UTF16 _GB18030 _GB18030_2022 _LATIN1 CNNOP
SELECT_HINT_BEGIN UPDATE_HINT_BEGIN DELETE_HINT_BEGIN INSERT_HINT_BEGIN REPLACE_HINT_BEGIN HINT_HINT_BEGIN HINT_END
LOAD_DATA_HINT_BEGIN CREATE_HINT_BEGIN
END_P SET_VAR DELIMITER
@ -976,6 +976,15 @@ _UTF8
YYABORT_NO_MEMORY;
}
}
| _GB18030_2022
{
malloc_terminal_node($$, result->malloc_pool_, T_CHARSET);
$$->str_value_ = parse_strdup("gb18030_2022", result->malloc_pool_, &($$->str_len_));
if (OB_UNLIKELY(NULL == $$->str_value_)) {
yyerror(NULL, result, "No more space for mallocing string");
YYABORT_NO_MEMORY;
}
}
| _UTF16
{
malloc_terminal_node($$, result->malloc_pool_, T_CHARSET);

View File

@ -7,6 +7,7 @@ gbk gbk_chinese_ci GBK charset 2
utf16 utf16_general_ci UTF-16 Unicode 2
gb18030 gb18030_chinese_ci GB18030 charset 4
latin1 latin1_swedish_ci cp1252 West European 1
gb18030_2022 gb18030_2022_chinese_ci GB18030-2022 charset 4
select character_set_name, default_collate_name, description, maxlen from character_sets;
character_set_name default_collate_name description maxlen
binary binary Binary pseudo charset 1
@ -15,6 +16,7 @@ gbk gbk_chinese_ci GBK charset 2
utf16 utf16_general_ci UTF-16 Unicode 2
gb18030 gb18030_chinese_ci GB18030 charset 4
latin1 latin1_swedish_ci cp1252 West European 1
gb18030_2022 gb18030_2022_chinese_ci GB18030-2022 charset 4
select maxlen from character_sets;
maxlen
1
@ -23,6 +25,7 @@ maxlen
2
4
1
4
select * from character_sets where character_set_name like '%binary%';
CHARACTER_SET_NAME DEFAULT_COLLATE_NAME DESCRIPTION MAXLEN
binary binary Binary pseudo charset 1