[FEAT MERGE] Support gb18030_2022
This commit is contained in:
parent
ef51ca80f8
commit
3efcefc29e
@ -80,6 +80,9 @@
|
||||
http://www.evertype.com/alphabets/icelandic.pdf
|
||||
http://cldr.unicode.org/)
|
||||
http://unicode.org/Public/cldr/24/core.zip)
|
||||
http://unicode.org/Public/cldr/42/)
|
||||
https://www.unicode.org/reports/tr38/tr38-33.html#kRSUnicode)
|
||||
https://www.unicode.org/Public/UCD/latest/ucd/Unihan.zip)
|
||||
www.doxygen.org)
|
||||
www.tcx.se
|
||||
www.google.com
|
||||
|
214
deps/oblib/src/lib/charset/ob_charset.cpp
vendored
214
deps/oblib/src/lib/charset/ob_charset.cpp
vendored
@ -281,6 +281,7 @@ const ObCharsetWrapper ObCharset::charset_wrap_arr_[ObCharset::VALID_CHARSET_TYP
|
||||
{CHARSET_UTF16, "UTF-16 Unicode", CS_TYPE_UTF16_GENERAL_CI, 2},
|
||||
{CHARSET_GB18030, "GB18030 charset", CS_TYPE_GB18030_CHINESE_CI, 4},
|
||||
{CHARSET_LATIN1, "cp1252 West European", CS_TYPE_LATIN1_SWEDISH_CI, 1},
|
||||
{CHARSET_GB18030_2022, "GB18030-2022 charset", CS_TYPE_GB18030_2022_PINYIN_CI, 4},
|
||||
};
|
||||
|
||||
const ObCollationWrapper ObCharset::collation_wrap_arr_[ObCharset::VALID_COLLATION_TYPES] =
|
||||
@ -298,6 +299,13 @@ const ObCollationWrapper ObCharset::collation_wrap_arr_[ObCharset::VALID_COLLATI
|
||||
{CS_TYPE_GB18030_BIN, CHARSET_GB18030, CS_TYPE_GB18030_BIN, false, true, 1},
|
||||
{CS_TYPE_LATIN1_SWEDISH_CI, CHARSET_LATIN1, CS_TYPE_LATIN1_SWEDISH_CI,true, true, 1},
|
||||
{CS_TYPE_LATIN1_BIN, CHARSET_LATIN1, CS_TYPE_LATIN1_BIN,false, true, 1},
|
||||
{CS_TYPE_GB18030_2022_BIN, CHARSET_GB18030_2022, CS_TYPE_GB18030_2022_BIN, false, true, 1},
|
||||
{CS_TYPE_GB18030_2022_PINYIN_CI, CHARSET_GB18030_2022, CS_TYPE_GB18030_2022_PINYIN_CI, true, true, 1},
|
||||
{CS_TYPE_GB18030_2022_PINYIN_CS, CHARSET_GB18030_2022, CS_TYPE_GB18030_2022_PINYIN_CS, false, true, 1},
|
||||
{CS_TYPE_GB18030_2022_RADICAL_CI, CHARSET_GB18030_2022, CS_TYPE_GB18030_2022_RADICAL_CI, false, true, 1},
|
||||
{CS_TYPE_GB18030_2022_RADICAL_CS, CHARSET_GB18030_2022, CS_TYPE_GB18030_2022_RADICAL_CS, false, true, 1},
|
||||
{CS_TYPE_GB18030_2022_STROKE_CI, CHARSET_GB18030_2022, CS_TYPE_GB18030_2022_STROKE_CI, false, true, 1},
|
||||
{CS_TYPE_GB18030_2022_STROKE_CS, CHARSET_GB18030_2022, CS_TYPE_GB18030_2022_STROKE_CS, false, true, 1},
|
||||
};
|
||||
|
||||
ObCharsetInfo *ObCharset::charset_arr[CS_TYPE_MAX] = {
|
||||
@ -338,7 +346,10 @@ ObCharsetInfo *ObCharset::charset_arr[CS_TYPE_MAX] = {
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 192
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 200
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 208
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 216
|
||||
&ob_charset_gb18030_2022_bin, &ob_charset_gb18030_2022_pinyin_ci, // 216
|
||||
&ob_charset_gb18030_2022_pinyin_cs, &ob_charset_gb18030_2022_radical_ci,// 218
|
||||
&ob_charset_gb18030_2022_radical_cs, &ob_charset_gb18030_2022_stroke_ci, // 220
|
||||
&ob_charset_gb18030_2022_stroke_cs, NULL, // 222
|
||||
NULL,
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 225
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 232
|
||||
@ -723,7 +734,8 @@ int ObCharset::caseup(const ObCollationType collation_type,
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("fail to allocate memory", K(ret));
|
||||
|
||||
} else if (charset_type_by_coll(collation_type) == CHARSET_GB18030) {
|
||||
} else if (charset_type_by_coll(collation_type) == CHARSET_GB18030 ||
|
||||
charset_type_by_coll(collation_type) == CHARSET_GB18030_2022) {
|
||||
size_t dst_len = caseup(collation_type, (char*)src.ptr(), src.length(), buf, buf_len);
|
||||
dst.assign_ptr(buf, static_cast<int32_t>(dst_len));
|
||||
} else {
|
||||
@ -763,7 +775,8 @@ int ObCharset::casedn(const ObCollationType collation_type,
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("fail to allocate memory", K(ret));
|
||||
|
||||
} else if (charset_type_by_coll(collation_type) == CHARSET_GB18030) {
|
||||
} else if (charset_type_by_coll(collation_type) == CHARSET_GB18030 ||
|
||||
charset_type_by_coll(collation_type) == CHARSET_GB18030_2022) {
|
||||
size_t dst_len = casedn(collation_type, (char*)src.ptr(), src.length(), buf, buf_len);
|
||||
dst.assign_ptr(buf, static_cast<int32_t>(dst_len));
|
||||
} else {
|
||||
@ -1353,6 +1366,10 @@ const char *ObCharset::charset_name(ObCharsetType charset_type)
|
||||
ret_name = "latin1";
|
||||
break;
|
||||
}
|
||||
case CHARSET_GB18030_2022: {
|
||||
ret_name = "gb18030_2022";
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
@ -1452,6 +1469,8 @@ ObCharsetType ObCharset::charset_type(const ObString &cs_name)
|
||||
charset_type = CHARSET_GB18030;
|
||||
} else if (0 == cs_name.case_compare(ob_charset_latin1.csname)) {
|
||||
charset_type = CHARSET_LATIN1;
|
||||
} else if (0 == cs_name.case_compare(ob_charset_gb18030_2022_bin.csname)) {
|
||||
charset_type = CHARSET_GB18030_2022;
|
||||
}
|
||||
return charset_type;
|
||||
}
|
||||
@ -1470,6 +1489,8 @@ ObCharsetType ObCharset::charset_type_by_name_oracle(const ObString &cs_name)
|
||||
charset_type = CHARSET_GB18030;
|
||||
} else if (0 == cs_name.case_compare("WE8MSWIN1252")) {
|
||||
charset_type = CHARSET_LATIN1;
|
||||
} else if (0 == cs_name.case_compare("ZHS32GB18030_2022")) {
|
||||
charset_type = CHARSET_GB18030_2022;
|
||||
}
|
||||
return charset_type;
|
||||
}
|
||||
@ -1520,6 +1541,20 @@ ObCollationType ObCharset::collation_type(const ObString &cs_name)
|
||||
collation_type = CS_TYPE_GB18030_CHINESE_CS;
|
||||
} else if (0 == cs_name.case_compare("any_cs")) {
|
||||
collation_type = CS_TYPE_ANY;
|
||||
} else if (0 == cs_name.case_compare(ob_charset_gb18030_2022_bin.name)) {
|
||||
collation_type = CS_TYPE_GB18030_2022_BIN;
|
||||
} else if (0 == cs_name.case_compare(ob_charset_gb18030_2022_pinyin_ci.name)) {
|
||||
collation_type = CS_TYPE_GB18030_2022_PINYIN_CI;
|
||||
} else if (0 == cs_name.case_compare(ob_charset_gb18030_2022_pinyin_cs.name)) {
|
||||
collation_type = CS_TYPE_GB18030_2022_PINYIN_CS;
|
||||
} else if (0 == cs_name.case_compare(ob_charset_gb18030_2022_radical_ci.name)) {
|
||||
collation_type = CS_TYPE_GB18030_2022_RADICAL_CI;
|
||||
} else if (0 == cs_name.case_compare(ob_charset_gb18030_2022_radical_cs.name)) {
|
||||
collation_type = CS_TYPE_GB18030_2022_RADICAL_CS;
|
||||
} else if (0 == cs_name.case_compare(ob_charset_gb18030_2022_stroke_ci.name)) {
|
||||
collation_type = CS_TYPE_GB18030_2022_STROKE_CI;
|
||||
} else if (0 == cs_name.case_compare(ob_charset_gb18030_2022_stroke_cs.name)) {
|
||||
collation_type = CS_TYPE_GB18030_2022_STROKE_CS;
|
||||
}
|
||||
return collation_type;
|
||||
}
|
||||
@ -1561,6 +1596,8 @@ bool ObCharset::is_valid_collation(ObCharsetType charset_type, ObCollationType c
|
||||
if (CS_TYPE_LATIN1_SWEDISH_CI == collation_type || CS_TYPE_LATIN1_BIN == collation_type) {
|
||||
ret = true;
|
||||
}
|
||||
} else if (CHARSET_GB18030_2022 == charset_type) {
|
||||
ret = is_gb18030_2022(collation_type);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@ -1576,7 +1613,8 @@ ObCollationType ObCharset::get_coll_type_by_nlssort_param(ObCharsetType charset_
|
||||
CS_TYPE_GBK_BIN,
|
||||
CS_TYPE_UTF16_BIN,
|
||||
CS_TYPE_GB18030_BIN,
|
||||
CS_TYPE_LATIN1_BIN
|
||||
CS_TYPE_LATIN1_BIN,
|
||||
CS_TYPE_GB18030_2022_BIN,
|
||||
};
|
||||
static ObCollationType non_bin_coll_marks[NLS_COLLATION_MAX] = {
|
||||
CS_TYPE_INVALID,
|
||||
@ -1586,6 +1624,12 @@ ObCollationType ObCharset::get_coll_type_by_nlssort_param(ObCharsetType charset_
|
||||
};
|
||||
if (0 == nlssort_param.case_compare("SCHINESE_PINYIN_M")) {
|
||||
nls_coll_type = NLS_COLLATION_SCHINESE_PINYIN_M;
|
||||
} else if (0 == nlssort_param.case_compare("SCHINESE_PINYIN2_M")) {
|
||||
nls_coll_type = NLS_COLLATION_SCHINESE_PINYIN2_M;
|
||||
} else if (0 == nlssort_param.case_compare("SCHINESE_RADICAL2_M")) {
|
||||
nls_coll_type = NLS_COLLATION_SCHINESE_RADICAL2_M;
|
||||
} else if (0 == nlssort_param.case_compare("SCHINESE_STROKE2_M")) {
|
||||
nls_coll_type = NLS_COLLATION_SCHINESE_STROKE2_M;
|
||||
} else if (0 == nlssort_param.case_compare("UCA0900_SCHINESE_PINYIN")) {
|
||||
nls_coll_type = NLS_COLLATION_SCHINESE_PINYIN_900;
|
||||
} else if (0 == nlssort_param.case_compare("UCA0900_SCHINESE_RADICAL")) {
|
||||
@ -1600,6 +1644,12 @@ ObCollationType ObCharset::get_coll_type_by_nlssort_param(ObCharsetType charset_
|
||||
coll_type = bin_coll_map[charset_type];
|
||||
} else if (nls_coll_type == NLS_COLLATION_SCHINESE_PINYIN_M) {
|
||||
coll_type = CS_TYPE_GB18030_CHINESE_CS;
|
||||
} else if (nls_coll_type == NLS_COLLATION_SCHINESE_PINYIN2_M) {
|
||||
coll_type = CS_TYPE_GB18030_2022_PINYIN_CS;
|
||||
} else if (nls_coll_type == NLS_COLLATION_SCHINESE_RADICAL2_M) {
|
||||
coll_type = CS_TYPE_GB18030_2022_RADICAL_CS;
|
||||
} else if (nls_coll_type == NLS_COLLATION_SCHINESE_STROKE2_M) {
|
||||
coll_type = CS_TYPE_GB18030_2022_STROKE_CS;
|
||||
} else {
|
||||
if (charset_type != CHARSET_LATIN1) {
|
||||
coll_type = static_cast<ObCollationType>(
|
||||
@ -1625,6 +1675,7 @@ bool ObCharset::is_valid_collation(int64_t collation_type_int)
|
||||
|| CS_TYPE_GB18030_CHINESE_CS == collation_type
|
||||
|| CS_TYPE_LATIN1_SWEDISH_CI == collation_type
|
||||
|| CS_TYPE_LATIN1_BIN == collation_type
|
||||
|| is_gb18030_2022(collation_type)
|
||||
;
|
||||
}
|
||||
|
||||
@ -1677,6 +1728,19 @@ ObCharsetType ObCharset::charset_type_by_coll(ObCollationType collation_type)
|
||||
charset_type = CHARSET_LATIN1;
|
||||
break;
|
||||
}
|
||||
case CS_TYPE_GB18030_2022_BIN:
|
||||
case CS_TYPE_GB18030_2022_PINYIN_CI:
|
||||
case CS_TYPE_GB18030_2022_PINYIN_CS:
|
||||
case CS_TYPE_GB18030_2022_RADICAL_CI:
|
||||
case CS_TYPE_GB18030_2022_RADICAL_CS:
|
||||
case CS_TYPE_GB18030_2022_STROKE_CI:
|
||||
case CS_TYPE_GB18030_2022_STROKE_CS:
|
||||
case CS_TYPE_GB18030_2022_ZH_0900_AS_CS:
|
||||
case CS_TYPE_GB18030_2022_ZH2_0900_AS_CS:
|
||||
case CS_TYPE_GB18030_2022_ZH3_0900_AS_CS: {
|
||||
charset_type = CHARSET_GB18030_2022;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
@ -1704,6 +1768,9 @@ ObNlsCharsetId ObCharset::charset_type_to_ora_charset_id(ObCharsetType cs_type)
|
||||
case CHARSET_LATIN1:
|
||||
cs_id = CHARSET_WE8MSWIN1252_ID;
|
||||
break;
|
||||
case CHARSET_GB18030_2022:
|
||||
cs_id = CHARSET_ZHS32GB18030_2022_ID;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -1729,6 +1796,9 @@ ObCharsetType ObCharset::ora_charset_type_to_charset_type(ObNlsCharsetId charset
|
||||
break;
|
||||
case CHARSET_WE8MSWIN1252_ID:
|
||||
cs_type = CHARSET_LATIN1;
|
||||
case CHARSET_ZHS32GB18030_2022_ID:
|
||||
cs_type = CHARSET_GB18030_2022;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -1848,6 +1918,8 @@ int ObCharset::aggregate_collation(
|
||||
* 如果优先级相同,binary和string比较,统一用binary比较
|
||||
* 如果都是string,按照规则进行处理
|
||||
*/
|
||||
ObCharsetType cs1 = charset_type_by_coll(collation_type1);
|
||||
ObCharsetType cs2 = charset_type_by_coll(collation_type2);
|
||||
if (collation_level1 < collation_level2) {
|
||||
res_type = collation_type1;
|
||||
res_level = collation_level1;
|
||||
@ -1860,7 +1932,7 @@ int ObCharset::aggregate_collation(
|
||||
} else if (CS_TYPE_BINARY == collation_type2) {
|
||||
res_level = collation_level2;
|
||||
res_type = collation_type2;
|
||||
} else if (charset_type_by_coll(collation_type1) != charset_type_by_coll(collation_type2)) {
|
||||
} else if (cs1 != cs2) {
|
||||
/**
|
||||
* 左右字符集不相同的情况
|
||||
* 主要以下情况
|
||||
@ -1870,10 +1942,11 @@ int ObCharset::aggregate_collation(
|
||||
* utf8mb4和gb18030:使用utf8mb4
|
||||
* utf16和gb18030:使用utf16
|
||||
* gbk和gb18030:使用gb18030
|
||||
* gb18030_2022 与 gb18030 的 AGGREGATE 暂定禁止
|
||||
* 以上任一字符集X与latin1的组合结果都为X,latin1目前地位最低
|
||||
*/
|
||||
|
||||
int res = AGGREGATE_2CHARSET[charset_type_by_coll(collation_type1)][charset_type_by_coll(collation_type2)];
|
||||
int res = AGGREGATE_2CHARSET[cs1][cs2];
|
||||
if (res == 1) {
|
||||
res_type = collation_type1;
|
||||
res_level = collation_level1;
|
||||
@ -1884,50 +1957,67 @@ int ObCharset::aggregate_collation(
|
||||
// 所有不能转换的情况都到这里
|
||||
ret = OB_CANT_AGGREGATE_2COLLATIONS;
|
||||
}
|
||||
} else {
|
||||
//处理相同字符集的情况,每种字符集单独考虑
|
||||
if (collation_type1 == collation_type2) {
|
||||
res_type = collation_type1;
|
||||
res_level = collation_level1;
|
||||
} else if (CS_LEVEL_EXPLICIT == collation_level1) {
|
||||
ret = OB_CANT_AGGREGATE_2COLLATIONS;
|
||||
// ERROR 1267 (HY000): Illegal mix of collations (utf8_general_ci,EXPLICIT) and (utf8_bin,EXPLICIT) for operation '='
|
||||
// LOG_USER_ERROR(ret);
|
||||
} else if (charset_type_by_coll(collation_type1) == CHARSET_UTF8MB4) {
|
||||
if (collation_type1 == CS_TYPE_UTF8MB4_BIN || collation_type2 == CS_TYPE_UTF8MB4_BIN) {
|
||||
res_type = CS_TYPE_UTF8MB4_BIN;
|
||||
res_level = (CS_TYPE_UTF8MB4_BIN == collation_type1) ? collation_level1 : collation_level2;
|
||||
} else {
|
||||
// utf8mb4_unicode_ci和utf8mb4_general_ci的情况报错,和mysql兼容
|
||||
ret = OB_CANT_AGGREGATE_2COLLATIONS;
|
||||
}
|
||||
} else if (charset_type_by_coll(collation_type1) == CHARSET_GBK) {
|
||||
res_type = CS_TYPE_GBK_BIN;
|
||||
res_level = (CS_TYPE_GBK_BIN == collation_type1) ? collation_level1 : collation_level2;
|
||||
} else if (charset_type_by_coll(collation_type1) == CHARSET_UTF16) {
|
||||
if (collation_type1 == CS_TYPE_UTF16_BIN || collation_type2 == CS_TYPE_UTF16_BIN) {
|
||||
res_type = CS_TYPE_UTF16_BIN;
|
||||
res_level = (CS_TYPE_UTF16_BIN == collation_type1) ? collation_level1 : collation_level2;
|
||||
} else {
|
||||
// utf16_unicode_ci和utf16_general_ci直接报错,不应该出现这种情况
|
||||
ret = OB_CANT_AGGREGATE_2COLLATIONS;
|
||||
}
|
||||
} else if (charset_type_by_coll(collation_type1) == CHARSET_GB18030) {
|
||||
res_type = CS_TYPE_GB18030_BIN;
|
||||
res_level = (CS_TYPE_GB18030_BIN == collation_type1) ? collation_level1 : collation_level2;
|
||||
} else if (charset_type_by_coll(collation_type1) == CHARSET_LATIN1) {
|
||||
if (collation_type1 == CS_TYPE_LATIN1_BIN || collation_type2 == CS_TYPE_LATIN1_BIN) {
|
||||
res_type = CS_TYPE_LATIN1_BIN;
|
||||
res_level = (CS_TYPE_LATIN1_BIN == collation_type1) ? collation_level1 : collation_level2;
|
||||
} else {
|
||||
//未来可能支持latin1_german,与latin1_swedish不兼容
|
||||
ret = OB_CANT_AGGREGATE_2COLLATIONS;
|
||||
}
|
||||
} else {
|
||||
//处理相同字符集的情况,每种字符集单独考虑
|
||||
if (collation_type1 == collation_type2) {
|
||||
res_type = collation_type1;
|
||||
res_level = collation_level1;
|
||||
} else if (CS_LEVEL_EXPLICIT == collation_level1) {
|
||||
ret = OB_CANT_AGGREGATE_2COLLATIONS;
|
||||
// ERROR 1267 (HY000): Illegal mix of collations (utf8_general_ci,EXPLICIT) and (utf8_bin,EXPLICIT) for operation '='
|
||||
// LOG_USER_ERROR(ret);
|
||||
} else if (charset_type_by_coll(collation_type1) == CHARSET_UTF8MB4) {
|
||||
if (collation_type1 == CS_TYPE_UTF8MB4_BIN || collation_type2 == CS_TYPE_UTF8MB4_BIN) {
|
||||
res_type = CS_TYPE_UTF8MB4_BIN;
|
||||
res_level = (CS_TYPE_UTF8MB4_BIN == collation_type1) ? collation_level1 : collation_level2;
|
||||
} else {
|
||||
// utf8mb4_unicode_ci和utf8mb4_general_ci的情况报错,和mysql兼容
|
||||
ret = OB_CANT_AGGREGATE_2COLLATIONS;
|
||||
}
|
||||
} else if (charset_type_by_coll(collation_type1) == CHARSET_GBK) {
|
||||
res_type = CS_TYPE_GBK_BIN;
|
||||
res_level = (CS_TYPE_GBK_BIN == collation_type1) ? collation_level1 : collation_level2;
|
||||
} else if (charset_type_by_coll(collation_type1) == CHARSET_UTF16) {
|
||||
if (collation_type1 == CS_TYPE_UTF16_BIN || collation_type2 == CS_TYPE_UTF16_BIN) {
|
||||
res_type = CS_TYPE_UTF16_BIN;
|
||||
res_level = (CS_TYPE_UTF16_BIN == collation_type1) ? collation_level1 : collation_level2;
|
||||
} else {
|
||||
// utf16_unicode_ci和utf16_general_ci直接报错,不应该出现这种情况
|
||||
ret = OB_CANT_AGGREGATE_2COLLATIONS;
|
||||
}
|
||||
} else if (charset_type_by_coll(collation_type1) == CHARSET_GB18030) {
|
||||
res_type = CS_TYPE_GB18030_BIN;
|
||||
res_level = (CS_TYPE_GB18030_BIN == collation_type1) ? collation_level1 : collation_level2;
|
||||
} else if (charset_type_by_coll(collation_type1) == CHARSET_LATIN1) {
|
||||
if (collation_type1 == CS_TYPE_LATIN1_BIN || collation_type2 == CS_TYPE_LATIN1_BIN) {
|
||||
res_type = CS_TYPE_LATIN1_BIN;
|
||||
res_level = (CS_TYPE_LATIN1_BIN == collation_type1) ? collation_level1 : collation_level2;
|
||||
} else {
|
||||
//未来可能支持latin1_german,与latin1_swedish不兼容
|
||||
ret = OB_CANT_AGGREGATE_2COLLATIONS;
|
||||
}
|
||||
} else if (charset_type_by_coll(collation_type1) == CHARSET_GB18030_2022) {
|
||||
res_type = CS_TYPE_GB18030_2022_BIN;
|
||||
res_level = (CS_TYPE_GB18030_2022_BIN == collation_type1) ? collation_level1 : collation_level2;
|
||||
} else {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_ERROR("Unexpected charset", K(collation_type1), K(collation_type2), KCSTRING(lbt()));
|
||||
}
|
||||
}
|
||||
|
||||
if (OB_SUCC(ret)) {
|
||||
ObCharsetType res_cs = charset_type_by_coll(res_type);
|
||||
if (CHARSET_GB18030 == res_cs) {
|
||||
if (CHARSET_GB18030_2022 == cs1 || CHARSET_GB18030_2022 == cs2) {
|
||||
ret = OB_CANT_AGGREGATE_2COLLATIONS;
|
||||
}
|
||||
} else if (CHARSET_GB18030_2022 == res_cs) {
|
||||
if (CHARSET_GB18030 == cs1 || CHARSET_GB18030 == cs2) {
|
||||
ret = OB_CANT_AGGREGATE_2COLLATIONS;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (OB_FAIL(ret)) {
|
||||
LOG_WARN("Illegal mix of collations", K(ret),
|
||||
"type1", ObCharset::collation_name(collation_type1),
|
||||
@ -1990,6 +2080,10 @@ ObCollationType ObCharset::get_default_collation(ObCharsetType charset_type)
|
||||
collation_type = CS_TYPE_LATIN1_SWEDISH_CI;
|
||||
break;
|
||||
}
|
||||
case CHARSET_GB18030_2022: {
|
||||
collation_type = CS_TYPE_GB18030_2022_PINYIN_CI;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
@ -2032,6 +2126,10 @@ ObCollationType ObCharset::get_default_collation_oracle(ObCharsetType charset_ty
|
||||
collation_type = CS_TYPE_LATIN1_BIN;
|
||||
break;
|
||||
}
|
||||
case CHARSET_GB18030_2022: {
|
||||
collation_type = CS_TYPE_GB18030_2022_BIN;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
@ -2067,6 +2165,10 @@ int ObCharset::get_default_collation(ObCharsetType charset_type, ObCollationType
|
||||
collation_type = CS_TYPE_LATIN1_SWEDISH_CI;
|
||||
break;
|
||||
}
|
||||
case CHARSET_GB18030_2022: {
|
||||
collation_type = CS_TYPE_GB18030_2022_PINYIN_CI;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("invalid charset type", K(ret), K(charset_type));
|
||||
@ -2104,6 +2206,10 @@ ObCollationType ObCharset::get_bin_collation(ObCharsetType charset_type)
|
||||
collation_type = CS_TYPE_LATIN1_BIN;
|
||||
break;
|
||||
}
|
||||
case CHARSET_GB18030_2022: {
|
||||
collation_type = CS_TYPE_GB18030_2022_BIN;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
@ -2234,6 +2340,7 @@ bool ObCharset::is_default_collation(ObCollationType collation_type)
|
||||
case CS_TYPE_UTF16_GENERAL_CI:
|
||||
case CS_TYPE_GB18030_CHINESE_CI:
|
||||
case CS_TYPE_LATIN1_SWEDISH_CI:
|
||||
case CS_TYPE_GB18030_2022_PINYIN_CI:
|
||||
case CS_TYPE_BINARY: {
|
||||
ret = true;
|
||||
break;
|
||||
@ -2662,7 +2769,8 @@ int ObCharset::get_aggregate_len_unit(const ObCollationType collation_type, bool
|
||||
|| CHARSET_LATIN1 == res_charset
|
||||
|| CHARSET_UTF16 == res_charset
|
||||
|| CHARSET_GBK == res_charset
|
||||
|| CHARSET_GB18030 == res_charset) {
|
||||
|| CHARSET_GB18030 == res_charset
|
||||
|| CHARSET_GB18030_2022 == res_charset) {
|
||||
len_in_byte = false;
|
||||
} else if (CHARSET_BINARY == res_charset) {
|
||||
len_in_byte = true;
|
||||
@ -2873,7 +2981,9 @@ bool ObCharset::is_cs_nonascii(ObCollationType collation_type)
|
||||
bool ObCharset::is_cjk_charset(ObCollationType collation_type)
|
||||
{
|
||||
ObCharsetType cs_type = ObCharset::charset_type_by_coll(collation_type);
|
||||
bool is_cjk_charset = (cs_type == CHARSET_GBK || cs_type == CHARSET_GB18030);
|
||||
bool is_cjk_charset = (cs_type == CHARSET_GBK ||
|
||||
cs_type == CHARSET_GB18030 ||
|
||||
cs_type == CHARSET_GB18030_2022);
|
||||
return is_cjk_charset;
|
||||
}
|
||||
|
||||
@ -2884,6 +2994,7 @@ bool ObCharset::is_valid_connection_collation(ObCollationType collation_type)
|
||||
|| cs_type == CHARSET_LATIN1
|
||||
|| cs_type == CHARSET_GBK
|
||||
|| cs_type == CHARSET_GB18030
|
||||
|| cs_type == CHARSET_GB18030_2022
|
||||
|| cs_type == CHARSET_BINARY;
|
||||
}
|
||||
|
||||
@ -2903,6 +3014,9 @@ const char *ObCharset::get_oracle_charset_name_by_charset_type(ObCharsetType cha
|
||||
case CHARSET_GB18030:
|
||||
ret = "ZHS32GB18030";
|
||||
break;
|
||||
case CHARSET_GB18030_2022:
|
||||
ret = "ZHS32GB18030_2022";
|
||||
break;
|
||||
case CHARSET_LATIN1:
|
||||
ret = "WE8MSWIN1252";
|
||||
break;
|
||||
@ -2931,6 +3045,9 @@ int ObCharset::get_nls_charset_id_by_charset_type(ObCharsetType charset_type)
|
||||
case CHARSET_LATIN1:
|
||||
ret_id = ObNlsCharsetId::CHARSET_WE8MSWIN1252_ID;
|
||||
break;
|
||||
case CHARSET_GB18030_2022:
|
||||
ret_id = ObNlsCharsetId::CHARSET_ZHS32GB18030_2022_ID;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -2941,6 +3058,9 @@ int ObCharset::get_nls_charset_id_by_charset_type(ObCharsetType charset_type)
|
||||
int ObCharset::init_charset()
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (OB_FAIL(init_gb18030_2022())) {
|
||||
LOG_WARN("failed to init gb18030 2022", K(ret));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
50
deps/oblib/src/lib/charset/ob_charset.h
vendored
50
deps/oblib/src/lib/charset/ob_charset.h
vendored
@ -37,25 +37,27 @@ enum ObCharsetType
|
||||
CHARSET_UTF16 = 4,
|
||||
CHARSET_GB18030 = 5,
|
||||
CHARSET_LATIN1 = 6,
|
||||
CHARSET_GB18030_2022 = 7,
|
||||
CHARSET_MAX,
|
||||
};
|
||||
|
||||
/*
|
||||
*AGGREGATE_2CHARSET[CHARSET_UTF8MB4][CHARSET_GBK]=1表示结果为CHARSET_UTF8MB4
|
||||
*AGGREGATE_2CHARSET[CHARSET_GBK][CHARSET_UTF8MB4]=2表示结果为CHARSET_UTF8MB4
|
||||
*AGGREGATE_2CHARSET[CHARSET_UTF8MB4][CHARSET_GBK]=1表示结果为第一个参数CHARSET_UTF8MB4
|
||||
*AGGREGATE_2CHARSET[CHARSET_GBK][CHARSET_UTF8MB4]=2表示结果为第二个参数CHARSET_UTF8MB4
|
||||
*矩阵中只对当前需要考虑的情况填值1&2,其余补0
|
||||
*return value means idx of the resule type, 0 means OB_CANT_AGGREGATE_2COLLATIONS
|
||||
*there is no possibly to reach AGGREGATE_2CHARSET[CHARSET_UTF8MB4][CHARSET_UTF8MB4] and so on
|
||||
*/
|
||||
static const int AGGREGATE_2CHARSET[CHARSET_MAX][CHARSET_MAX] = {
|
||||
//CHARSET_INVALI,CHARSET_UTF8MB4...
|
||||
{0,0,0,0,0,0,0},//CHARSET_INVALI
|
||||
{0,0,0,0,0,0,0},//CHARSET_BINARY
|
||||
{0,0,0,1,2,1,1},//CHARSET_UTF8MB4
|
||||
{0,0,2,0,2,2,1},//CHARSET_GBK
|
||||
{0,0,1,1,0,1,1},//CHARSET_UTF16
|
||||
{0,0,2,1,2,0,1},//CHARSET_GB18030
|
||||
{0,0,2,2,2,2,0},//CHARSET_LATIN1
|
||||
{0,0,0,0,0,0,0,0},//CHARSET_INVALI
|
||||
{0,0,0,0,0,0,0,0},//CHARSET_BINARY
|
||||
{0,0,0,1,2,1,1,1},//CHARSET_UTF8MB4
|
||||
{0,0,2,0,2,2,1,2},//CHARSET_GBK
|
||||
{0,0,1,1,0,1,1,1},//CHARSET_UTF16
|
||||
{0,0,2,1,2,0,1,0},//CHARSET_GB18030
|
||||
{0,0,2,2,2,2,0,2},//CHARSET_LATIN1
|
||||
{0,0,2,1,2,0,1,0} //CHARSET_GB18030_2022
|
||||
};
|
||||
|
||||
enum ObCollationType
|
||||
@ -73,6 +75,13 @@ enum ObCollationType
|
||||
CS_TYPE_COLLATION_FREE = 100, // mysql中间没有使用这个
|
||||
CS_TYPE_UTF16_UNICODE_CI = 101,
|
||||
CS_TYPE_ANY = 125, // unused in mysql
|
||||
CS_TYPE_GB18030_2022_BIN = 216, // unused in mysql
|
||||
CS_TYPE_GB18030_2022_PINYIN_CI = 217, // unused in mysql
|
||||
CS_TYPE_GB18030_2022_PINYIN_CS = 218, // unused in mysql
|
||||
CS_TYPE_GB18030_2022_RADICAL_CI = 219, // unused in mysql
|
||||
CS_TYPE_GB18030_2022_RADICAL_CS = 220, // unused in mysql
|
||||
CS_TYPE_GB18030_2022_STROKE_CI = 221, // unused in mysql
|
||||
CS_TYPE_GB18030_2022_STROKE_CS = 222, // unused in mysql
|
||||
CS_TYPE_UTF8MB4_UNICODE_CI = 224,
|
||||
CS_TYPE_GB18030_CHINESE_CI = 248,
|
||||
CS_TYPE_GB18030_BIN = 249,
|
||||
@ -88,6 +97,7 @@ enum ObCollationType
|
||||
CS_TYPE_UTF16_ZH_0900_AS_CS,
|
||||
CS_TYPE_GB18030_ZH_0900_AS_CS,
|
||||
CS_TYPE_latin1_ZH_0900_AS_CS, //invaid, not really used
|
||||
CS_TYPE_GB18030_2022_ZH_0900_AS_CS,
|
||||
//radical-stroke order
|
||||
CS_TYPE_RADICAL_BEGIN_MARK,
|
||||
CS_TYPE_UTF8MB4_ZH2_0900_AS_CS,
|
||||
@ -95,6 +105,7 @@ enum ObCollationType
|
||||
CS_TYPE_UTF16_ZH2_0900_AS_CS,
|
||||
CS_TYPE_GB18030_ZH2_0900_AS_CS,
|
||||
CS_TYPE_latin1_ZH2_0900_AS_CS ,//invaid
|
||||
CS_TYPE_GB18030_2022_ZH2_0900_AS_CS,
|
||||
//stroke order
|
||||
CS_TYPE_STROKE_BEGIN_MARK,
|
||||
CS_TYPE_UTF8MB4_ZH3_0900_AS_CS,
|
||||
@ -102,6 +113,7 @@ enum ObCollationType
|
||||
CS_TYPE_UTF16_ZH3_0900_AS_CS,
|
||||
CS_TYPE_GB18030_ZH3_0900_AS_CS,
|
||||
CS_TYPE_latin1_ZH3_0900_AS_CS, //invaid
|
||||
CS_TYPE_GB18030_2022_ZH3_0900_AS_CS,
|
||||
CS_TYPE_MAX
|
||||
};
|
||||
|
||||
@ -113,6 +125,7 @@ enum ObNlsCharsetId
|
||||
CHARSET_WE8MSWIN1252_ID=31,
|
||||
CHARSET_ZHS16GBK_ID = 852,
|
||||
CHARSET_ZHS32GB18030_ID = 854,
|
||||
CHARSET_ZHS32GB18030_2022_ID = 859, // not used in oracle
|
||||
CHARSET_UTF8_ID = 871,
|
||||
CHARSET_AL32UTF8_ID = 873,
|
||||
CHARSET_AL16UTF16_ID = 2000,
|
||||
@ -174,6 +187,9 @@ enum ObNLSCollation
|
||||
NLS_COLLATION_SCHINESE_RADICAL_900,
|
||||
NLS_COLLATION_SCHINESE_STROKE_900,
|
||||
NLS_COLLATION_SCHINESE_PINYIN_M,
|
||||
NLS_COLLATION_SCHINESE_PINYIN2_M,
|
||||
NLS_COLLATION_SCHINESE_RADICAL2_M,
|
||||
NLS_COLLATION_SCHINESE_STROKE2_M,
|
||||
NLS_COLLATION_MAX
|
||||
};
|
||||
|
||||
@ -195,8 +211,8 @@ public:
|
||||
//比如latin1 1byte ,utf8mb4 4byte,转换因子为4,也可以理解为最多使用4字节存储一个字符
|
||||
static const int32_t CharConvertFactorNum = 4;
|
||||
|
||||
static const int64_t VALID_CHARSET_TYPES = 6;
|
||||
static const int64_t VALID_COLLATION_TYPES = 13;
|
||||
static const int64_t VALID_CHARSET_TYPES = 7;
|
||||
static const int64_t VALID_COLLATION_TYPES = 20;
|
||||
|
||||
static int init_charset();
|
||||
// strntodv2 is an enhanced version of strntod,
|
||||
@ -376,8 +392,20 @@ public:
|
||||
|| CHARSET_GBK == charset_type
|
||||
|| CHARSET_UTF16 == charset_type
|
||||
|| CHARSET_GB18030 == charset_type
|
||||
|| CHARSET_GB18030_2022 == charset_type
|
||||
|| CHARSET_LATIN1 == charset_type;
|
||||
}
|
||||
static bool is_gb18030_2022(int64_t coll_type_int) {
|
||||
ObCollationType coll_type = static_cast<ObCollationType>(coll_type_int);
|
||||
return CS_TYPE_GB18030_2022_BIN <= coll_type && coll_type <= CS_TYPE_GB18030_2022_STROKE_CS;
|
||||
}
|
||||
static bool is_gb_charset(int64_t cs_type_int)
|
||||
{
|
||||
ObCharsetType charset_type = static_cast<ObCharsetType>(cs_type_int);
|
||||
return CHARSET_GBK == charset_type
|
||||
|| CHARSET_GB18030 == charset_type
|
||||
|| CHARSET_GB18030_2022 == charset_type;
|
||||
}
|
||||
static ObCharsetType charset_type_by_coll(ObCollationType coll_type);
|
||||
static int charset_name_by_coll(const ObString &coll_name, common::ObString &cs_name);
|
||||
static int charset_name_by_coll(ObCollationType coll_type, common::ObString &cs_name);
|
||||
|
9
deps/oblib/src/lib/charset/ob_ctype.h
vendored
9
deps/oblib/src/lib/charset/ob_ctype.h
vendored
@ -161,6 +161,8 @@ typedef int (*ob_charset_conv_wc_mb)(const struct ObCharsetInfo *, ob_wc_t,
|
||||
typedef size_t (*ob_charset_conv_case)(const struct ObCharsetInfo *,
|
||||
char *, size_t, char *, size_t);
|
||||
|
||||
int init_gb18030_2022();
|
||||
|
||||
extern ObUCAInfo ob_uca_v400;
|
||||
extern uchar ob_uca520_length[4352];
|
||||
extern uint16 *ob_uca520_weight[4352];
|
||||
@ -429,6 +431,13 @@ extern ObCharsetInfo ob_charset_utf16_bin;
|
||||
extern ObCharsetInfo ob_charset_gb18030_chinese_ci;
|
||||
extern ObCharsetInfo ob_charset_gb18030_chinese_cs;
|
||||
extern ObCharsetInfo ob_charset_gb18030_bin;
|
||||
extern ObCharsetInfo ob_charset_gb18030_2022_pinyin_ci;
|
||||
extern ObCharsetInfo ob_charset_gb18030_2022_pinyin_cs;
|
||||
extern ObCharsetInfo ob_charset_gb18030_2022_radical_ci;
|
||||
extern ObCharsetInfo ob_charset_gb18030_2022_radical_cs;
|
||||
extern ObCharsetInfo ob_charset_gb18030_2022_stroke_ci;
|
||||
extern ObCharsetInfo ob_charset_gb18030_2022_stroke_cs;
|
||||
extern ObCharsetInfo ob_charset_gb18030_2022_bin;
|
||||
|
||||
extern ObCollationHandler ob_collation_mb_bin_handler;
|
||||
extern ObCharsetHandler ob_charset_utf8mb4_handler;
|
||||
|
872
deps/oblib/src/lib/charset/ob_ctype_gb18030_os.cc
vendored
872
deps/oblib/src/lib/charset/ob_ctype_gb18030_os.cc
vendored
File diff suppressed because it is too large
Load Diff
30764
deps/oblib/src/lib/charset/ob_gb18030_2022_tab.h
vendored
Normal file
30764
deps/oblib/src/lib/charset/ob_gb18030_2022_tab.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
193
deps/oblib/unittest/lib/charset/test_charset.cpp
vendored
193
deps/oblib/unittest/lib/charset/test_charset.cpp
vendored
@ -200,6 +200,13 @@ TEST_F(TestCharset, case_insensitive_equal)
|
||||
ASSERT_FALSE(yy);
|
||||
yy = ObCharset::case_insensitive_equal(y3, y4, CS_TYPE_UTF8MB4_GENERAL_CI);
|
||||
ASSERT_TRUE(yy);
|
||||
|
||||
yy = ObCharset::case_insensitive_equal(y1, y2, CS_TYPE_GB18030_2022_PINYIN_CI);
|
||||
ASSERT_TRUE(yy);
|
||||
yy = ObCharset::case_insensitive_equal(y2, y3, CS_TYPE_GB18030_2022_PINYIN_CI);
|
||||
ASSERT_FALSE(yy);
|
||||
yy = ObCharset::case_insensitive_equal(y3, y4, CS_TYPE_GB18030_2022_PINYIN_CI);
|
||||
ASSERT_TRUE(yy);
|
||||
}
|
||||
|
||||
TEST_F(TestCharset, hash_sort)
|
||||
@ -369,6 +376,23 @@ TEST_F(TestCharset, test_find_gb18030_case_prob)
|
||||
}
|
||||
}
|
||||
}
|
||||
cs_type = CS_TYPE_GB18030_2022_BIN;
|
||||
for (int i = 0; i < 256; i++) {
|
||||
const ObUnicaseInfoChar *info = ObCharset::get_charset(cs_type)->caseinfo->page[i];
|
||||
if (NULL != info) {
|
||||
for (int j = 0; j < 256; j++) {
|
||||
ASSERT_TRUE(OB_SUCCESS == ObCharset::wc_mb(cs_type, info[j].tolower, buf1, buf_len, length1));
|
||||
ASSERT_TRUE(OB_SUCCESS == ObCharset::wc_mb(cs_type, info[j].toupper, buf2, buf_len, length2));
|
||||
buf1[length1] = '\0';
|
||||
buf2[length2] = '\0';
|
||||
if (length1 != length2) {
|
||||
ASSERT_TRUE(OB_SUCCESS == to_hex_cstr(buf1, length1, hex_buf1, buf_len));
|
||||
ASSERT_TRUE(OB_SUCCESS == to_hex_cstr(buf2, length2, hex_buf2, buf_len));
|
||||
std::cout<< info[j].tolower <<"," << info[j].toupper << "," << hex_buf1 << "," << hex_buf2 << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -417,7 +441,8 @@ TEST_F(TestCharset, test_zh_0900_as_cs)
|
||||
};
|
||||
|
||||
ObCollationType coll_types[] = {CS_TYPE_UTF8MB4_ZH_0900_AS_CS, CS_TYPE_GBK_ZH_0900_AS_CS,
|
||||
CS_TYPE_GB18030_ZH_0900_AS_CS, CS_TYPE_UTF16_ZH_0900_AS_CS};
|
||||
CS_TYPE_GB18030_ZH_0900_AS_CS, CS_TYPE_UTF16_ZH_0900_AS_CS,
|
||||
CS_TYPE_GB18030_2022_ZH_0900_AS_CS};
|
||||
|
||||
for (int i = 0; i < array_elements(coll_types); i++) {
|
||||
ObCollationType coll_type = coll_types[i];
|
||||
@ -472,7 +497,7 @@ TEST_F(TestCharset, test_zh2_0900_as_cs)
|
||||
return output;
|
||||
};
|
||||
|
||||
ObCollationType coll_types[] = {CS_TYPE_UTF8MB4_ZH2_0900_AS_CS};
|
||||
ObCollationType coll_types[] = {CS_TYPE_UTF8MB4_ZH2_0900_AS_CS, CS_TYPE_GB18030_2022_ZH2_0900_AS_CS};
|
||||
|
||||
for (int i = 0; i < array_elements(coll_types); i++) {
|
||||
ObCollationType coll_type = coll_types[i];
|
||||
@ -567,6 +592,170 @@ TEST_F(TestCharset, toupper)
|
||||
}
|
||||
}
|
||||
|
||||
static uint get_magic_gb18030_2022_uni(uint code)
|
||||
{
|
||||
switch (code) {
|
||||
case 0xFE59 : return 0x9FB4;
|
||||
case 0xFE61 : return 0x9FB5;
|
||||
case 0xFE66 : return 0x9FB6;
|
||||
case 0xFE67 : return 0x9FB7;
|
||||
case 0xFE6D : return 0x9FB8;
|
||||
case 0xFE7E : return 0x9FB9;
|
||||
case 0xFE90 : return 0x9FBA;
|
||||
case 0xFEA0 : return 0x9FBB;
|
||||
case 0xA6D9 : return 0xFE10;
|
||||
case 0xA6DA : return 0xFE12;
|
||||
case 0xA6DB : return 0xFE11;
|
||||
case 0xA6DC : return 0xFE13;
|
||||
case 0xA6DD : return 0xFE14;
|
||||
case 0xA6DE : return 0xFE15;
|
||||
case 0xA6DF : return 0xFE16;
|
||||
case 0xA6EC : return 0xFE17;
|
||||
case 0xA6ED : return 0xFE18;
|
||||
case 0xA6F3 : return 0xFE19;
|
||||
case 0x82359037 : return 0xE81E;
|
||||
case 0x82359038 : return 0xE826;
|
||||
case 0x82359039 : return 0xE82B;
|
||||
case 0x82359130 : return 0xE82C;
|
||||
case 0x82359131 : return 0xE832;
|
||||
case 0x82359132 : return 0xE843;
|
||||
case 0x82359133 : return 0xE854;
|
||||
case 0x82359134 : return 0xE864;
|
||||
case 0x84318236 : return 0xE78D;
|
||||
case 0x84318238 : return 0xE78E;
|
||||
case 0x84318237 : return 0xE78F;
|
||||
case 0x84318239 : return 0xE790;
|
||||
case 0x84318330 : return 0xE791;
|
||||
case 0x84318331 : return 0xE792;
|
||||
case 0x84318332 : return 0xE793;
|
||||
case 0x84318333 : return 0xE794;
|
||||
case 0x84318334 : return 0xE795;
|
||||
case 0x84318335 : return 0xE796;
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static uint get_magic_uni_gb18030_2022(uint code)
|
||||
{
|
||||
switch (code) {
|
||||
case 0x9FB4 : return 0xFE59;
|
||||
case 0x9FB5 : return 0xFE61;
|
||||
case 0x9FB6 : return 0xFE66;
|
||||
case 0x9FB7 : return 0xFE67;
|
||||
case 0x9FB8 : return 0xFE6D;
|
||||
case 0x9FB9 : return 0xFE7E;
|
||||
case 0x9FBA : return 0xFE90;
|
||||
case 0x9FBB : return 0xFEA0;
|
||||
case 0xFE10 : return 0xA6D9;
|
||||
case 0xFE12 : return 0xA6DA;
|
||||
case 0xFE11 : return 0xA6DB;
|
||||
case 0xFE13 : return 0xA6DC;
|
||||
case 0xFE14 : return 0xA6DD;
|
||||
case 0xFE15 : return 0xA6DE;
|
||||
case 0xFE16 : return 0xA6DF;
|
||||
case 0xFE17 : return 0xA6EC;
|
||||
case 0xFE18 : return 0xA6ED;
|
||||
case 0xFE19 : return 0xA6F3;
|
||||
case 0xE81E : return 0x82359037;
|
||||
case 0xE826 : return 0x82359038;
|
||||
case 0xE82B : return 0x82359039;
|
||||
case 0xE82C : return 0x82359130;
|
||||
case 0xE832 : return 0x82359131;
|
||||
case 0xE843 : return 0x82359132;
|
||||
case 0xE854 : return 0x82359133;
|
||||
case 0xE864 : return 0x82359134;
|
||||
case 0xE78D : return 0x84318236;
|
||||
case 0xE78E : return 0x84318238;
|
||||
case 0xE78F : return 0x84318237;
|
||||
case 0xE790 : return 0x84318239;
|
||||
case 0xE791 : return 0x84318330;
|
||||
case 0xE792 : return 0x84318331;
|
||||
case 0xE793 : return 0x84318332;
|
||||
case 0xE794 : return 0x84318333;
|
||||
case 0xE795 : return 0x84318334;
|
||||
case 0xE796 : return 0x84318335;
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint gb18030_chs_to_code(const uchar *src, size_t srclen) {
|
||||
uint r = 0;
|
||||
|
||||
ob_charset_assert(srclen == 1 || srclen == 2 || srclen == 4);
|
||||
|
||||
switch (srclen) {
|
||||
case 1:
|
||||
r = src[0];
|
||||
break;
|
||||
case 2:
|
||||
r = (src[0] << 8) + src[1];
|
||||
break;
|
||||
case 4:
|
||||
r = (src[0] << 24) + (src[1] << 16) + (src[2] << 8) + src[3];
|
||||
break;
|
||||
default:
|
||||
ob_charset_assert(0);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
TEST_F(TestCharset, check_gb18030_2022)
|
||||
{
|
||||
int ret = 0;
|
||||
uchar s[4];
|
||||
|
||||
ob_charset_conv_mb_wc ob_mb_wc_gb18030_2022 = ob_charset_gb18030_2022_pinyin_ci.cset->mb_wc;
|
||||
ob_charset_conv_mb_wc ob_mb_wc_gb18030 = ob_charset_gb18030_chinese_ci.cset->mb_wc;
|
||||
ob_charset_conv_wc_mb ob_wc_mb_gb18030_2022 = ob_charset_gb18030_2022_pinyin_ci.cset->wc_mb;
|
||||
ob_charset_conv_wc_mb ob_wc_mb_gb18030 = ob_charset_gb18030_chinese_ci.cset->wc_mb;
|
||||
|
||||
for (s[0] = 0x81; s[0] <= 0xFE; s[0]++) {
|
||||
for (s[1] = 0x40; s[1] <= 0xFE; s[1]++) {
|
||||
if (s[1] == 0x7F) {
|
||||
continue;
|
||||
}
|
||||
uint gb_code = gb18030_chs_to_code(s, 2);
|
||||
ob_wc_t uni_gb18030_2022;
|
||||
ob_mb_wc_gb18030_2022(NULL, &uni_gb18030_2022, s, s + 4);
|
||||
ulong target = get_magic_gb18030_2022_uni(gb_code);
|
||||
if (target == 0) {
|
||||
ob_mb_wc_gb18030(NULL, &target, s, s + 4);
|
||||
}
|
||||
ASSERT_TRUE(target = uni_gb18030_2022);
|
||||
}
|
||||
}
|
||||
for (s[0] = 0x81; s[0] <= 0xFE; s[0]++) {
|
||||
for (s[1] = 0x30; s[1] <= 0x39; s[1]++) {
|
||||
for (s[2] = 0x81; s[2] <= 0xFE; s[2]++) {
|
||||
for (s[3] = 0x30; s[3] <= 0x39; s[3]++) {
|
||||
uint gb_code = gb18030_chs_to_code(s, 4);
|
||||
ob_wc_t uni_gb18030_2022;
|
||||
ob_mb_wc_gb18030_2022(NULL, &uni_gb18030_2022, s, s + 4);
|
||||
ulong target = get_magic_gb18030_2022_uni(gb_code);
|
||||
if (target == 0) {
|
||||
ob_mb_wc_gb18030(NULL, &target, s, s + 4);
|
||||
}
|
||||
ASSERT_TRUE(target = uni_gb18030_2022);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (uint i=0; i <= 0x10FFFF; i ++) {
|
||||
uchar s_gb18030[4];
|
||||
uchar s_gb18030_2022[4];
|
||||
uint target = get_magic_uni_gb18030_2022(i);
|
||||
if (target == 0) {
|
||||
int len_gb18030 = ob_wc_mb_gb18030(NULL, i, s_gb18030, s_gb18030 + 4);
|
||||
target = (len_gb18030 == 0) ? 0 : gb18030_chs_to_code(s_gb18030, len_gb18030);
|
||||
}
|
||||
int len_gb18030_2022 = ob_wc_mb_gb18030_2022(NULL, i, s_gb18030_2022, s_gb18030_2022 + 4);
|
||||
uint code_gb18030_2022 = (len_gb18030_2022 == 0) ? 0 : gb18030_chs_to_code(s_gb18030_2022, len_gb18030_2022);
|
||||
ASSERT_TRUE(target == code_gb18030_2022);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
OB_LOGGER.set_log_level("INFO");
|
||||
|
@ -30526,7 +30526,8 @@ int ObDDLService::update_oracle_tenant_sys_var(
|
||||
|| CS_TYPE_LATIN1_BIN == tenant_schema.get_collation_type()
|
||||
|| CS_TYPE_GBK_BIN == tenant_schema.get_collation_type()
|
||||
|| CS_TYPE_UTF16_BIN == tenant_schema.get_collation_type()
|
||||
|| CS_TYPE_GB18030_BIN == tenant_schema.get_collation_type()) {
|
||||
|| CS_TYPE_GB18030_BIN == tenant_schema.get_collation_type()
|
||||
|| CS_TYPE_GB18030_2022_BIN == tenant_schema.get_collation_type()) {
|
||||
VAR_INT_TO_STRING(val_buf, tenant_schema.get_collation_type());
|
||||
SET_TENANT_VARIABLE(SYS_VAR_CHARACTER_SET_SERVER, val_buf);
|
||||
SET_TENANT_VARIABLE(SYS_VAR_CHARACTER_SET_DATABASE, val_buf);
|
||||
@ -30539,6 +30540,9 @@ int ObDDLService::update_oracle_tenant_sys_var(
|
||||
} else if (CHARSET_GB18030 ==
|
||||
ObCharset::charset_type_by_coll(tenant_schema.get_collation_type())) {
|
||||
OZ(databuff_printf(val_buf, OB_MAX_SYS_PARAM_VALUE_LENGTH, "%s", "ZHS32GB18030"));
|
||||
} else if (CHARSET_GB18030_2022 ==
|
||||
ObCharset::charset_type_by_coll(tenant_schema.get_collation_type())) {
|
||||
OZ(databuff_printf(val_buf, OB_MAX_SYS_PARAM_VALUE_LENGTH, "%s", "ZHS32GB18030_2022"));
|
||||
} else if (CHARSET_LATIN1 ==
|
||||
ObCharset::charset_type_by_coll(tenant_schema.get_collation_type())) {
|
||||
OZ(databuff_printf(val_buf, OB_MAX_SYS_PARAM_VALUE_LENGTH, "%s", "WE8MSWIN1252"));
|
||||
|
@ -377,7 +377,14 @@ typedef ObConstIntMapping<0,
|
||||
CS_TYPE_GB18030_CHINESE_CI, 1,
|
||||
CS_TYPE_GB18030_BIN, 1,
|
||||
CS_TYPE_LATIN1_SWEDISH_CI,1,
|
||||
CS_TYPE_LATIN1_BIN,1 > SupportedCollections;
|
||||
CS_TYPE_LATIN1_BIN,1,
|
||||
CS_TYPE_GB18030_2022_BIN, 1,
|
||||
CS_TYPE_GB18030_2022_PINYIN_CI, 1,
|
||||
CS_TYPE_GB18030_2022_PINYIN_CS, 1,
|
||||
CS_TYPE_GB18030_2022_RADICAL_CI, 1,
|
||||
CS_TYPE_GB18030_2022_RADICAL_CS, 1,
|
||||
CS_TYPE_GB18030_2022_STROKE_CI, 1,
|
||||
CS_TYPE_GB18030_2022_STROKE_CS, 1 > SupportedCollections;
|
||||
|
||||
// bool is_calc_with_end_space(ObObjType type1, ObObjType type2,
|
||||
// bool is_oracle_mode,
|
||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -12566,6 +12566,8 @@ def_table_schema(
|
||||
WHEN 3 THEN "gbk"
|
||||
WHEN 4 THEN "utf16"
|
||||
WHEN 5 THEN "gb18030"
|
||||
WHEN 6 THEN "latin1"
|
||||
WHEN 7 THEN "gb18030_2022"
|
||||
ELSE NULL
|
||||
END
|
||||
AS CHAR(64)
|
||||
@ -24809,6 +24811,8 @@ def_table_schema(
|
||||
WHEN 3 THEN "gbk"
|
||||
WHEN 4 THEN "utf16"
|
||||
WHEN 5 THEN "gb18030"
|
||||
WHEN 6 THEN "latin1"
|
||||
WHEN 7 THEN "gb18030_2022"
|
||||
ELSE NULL
|
||||
END AS CHAR(64)) AS CHARACTER_SET_NAME,
|
||||
CAST(CASE rp.param_coll_type
|
||||
|
@ -488,7 +488,8 @@ int ObOrderPerservingEncoder::encode_from_string_varlen(
|
||||
}
|
||||
} else if (cs == CS_TYPE_COLLATION_FREE || cs == CS_TYPE_BINARY) {
|
||||
convert_ob_charset_utf8mb4_bin((unsigned char *)str.ptr(), str.length(), to, to_len);
|
||||
} else if (cs == CS_TYPE_UTF8MB4_BIN || cs == CS_TYPE_GBK_BIN || cs == CS_TYPE_GB18030_BIN) {
|
||||
} else if (cs == CS_TYPE_UTF8MB4_BIN || cs == CS_TYPE_GBK_BIN
|
||||
|| cs == CS_TYPE_GB18030_BIN || cs == CS_TYPE_GB18030_2022_BIN) {
|
||||
if (is_mem) {
|
||||
convert_ob_charset_utf8mb4_bin((unsigned char *)str.ptr(), str.length(), to, to_len);
|
||||
} else {
|
||||
@ -496,7 +497,8 @@ int ObOrderPerservingEncoder::encode_from_string_varlen(
|
||||
}
|
||||
} else if (cs == CS_TYPE_UTF8MB4_GENERAL_CI || cs == CS_TYPE_GBK_CHINESE_CI
|
||||
|| cs == CS_TYPE_UTF16_GENERAL_CI || cs == CS_TYPE_UTF16_BIN
|
||||
|| cs == CS_TYPE_GB18030_CHINESE_CI) {
|
||||
|| cs == CS_TYPE_GB18030_CHINESE_CI ||
|
||||
(CS_TYPE_GB18030_2022_PINYIN_CI <= cs && cs <= CS_TYPE_GB18030_2022_STROKE_CS)) {
|
||||
int64_t res_len = ObCharset::sortkey_var_len(cs, str.ptr(), str.length(), (char *)to,
|
||||
max_buf_len, is_mem, is_valid_uni);
|
||||
if (res_len < 0) {
|
||||
@ -529,7 +531,8 @@ int ObOrderPerservingEncoder::encode_from_string_varlen(
|
||||
}
|
||||
} else if (cs == CS_TYPE_COLLATION_FREE || cs == CS_TYPE_BINARY) {
|
||||
convert_ob_charset_utf8mb4_bin((unsigned char *)str.ptr(), str.length(), to, to_len);
|
||||
} else if (cs == CS_TYPE_UTF8MB4_BIN || cs == CS_TYPE_GBK_BIN || cs == CS_TYPE_GB18030_BIN) {
|
||||
} else if (cs == CS_TYPE_UTF8MB4_BIN || cs == CS_TYPE_GBK_BIN ||
|
||||
cs == CS_TYPE_GB18030_BIN || cs == CS_TYPE_GB18030_2022_BIN) {
|
||||
if (param.is_memcmp_) {
|
||||
convert_ob_charset_utf8mb4_bin((unsigned char *)str.ptr(), str.length(), to, to_len);
|
||||
} else {
|
||||
@ -537,7 +540,8 @@ int ObOrderPerservingEncoder::encode_from_string_varlen(
|
||||
}
|
||||
} else if (cs == CS_TYPE_UTF8MB4_GENERAL_CI || cs == CS_TYPE_GBK_CHINESE_CI
|
||||
|| cs == CS_TYPE_UTF16_GENERAL_CI || cs == CS_TYPE_UTF16_BIN
|
||||
|| cs == CS_TYPE_GB18030_CHINESE_CI) {
|
||||
|| cs == CS_TYPE_GB18030_CHINESE_CI ||
|
||||
(CS_TYPE_GB18030_2022_PINYIN_CI <= cs && cs <= CS_TYPE_GB18030_2022_STROKE_CS)) {
|
||||
int64_t res_len = ObCharset::sortkey_var_len(cs, str.ptr(), str.length(), (char *)to,
|
||||
max_buf_len, param.is_memcmp_, param.is_valid_uni_);
|
||||
if (!param.is_valid_uni_) {
|
||||
@ -562,7 +566,7 @@ int ObOrderPerservingEncoder::encode_from_string_fixlen(
|
||||
ret = OB_BUF_NOT_ENOUGH;
|
||||
LOG_TRACE("no enough memory to do encoding for fixed string", K(ret));
|
||||
} else if (cs == CS_TYPE_COLLATION_FREE || cs == CS_TYPE_BINARY || cs == CS_TYPE_UTF8MB4_BIN
|
||||
|| cs == CS_TYPE_GBK_BIN || cs == CS_TYPE_GB18030_BIN) {
|
||||
|| cs == CS_TYPE_GBK_BIN || cs == CS_TYPE_GB18030_BIN || cs == CS_TYPE_GB18030_2022_BIN) {
|
||||
MEMCPY(to, str.ptr(), str.length());
|
||||
to_len += str.length();
|
||||
} else {
|
||||
@ -770,6 +774,7 @@ int ObOrderPerservingEncoder::encode_tails(unsigned char *to, int64_t max_buf_le
|
||||
to_len += 2;
|
||||
} else if (cs == CS_TYPE_UTF8MB4_BIN
|
||||
|| cs == CS_TYPE_GBK_BIN || cs == CS_TYPE_GB18030_BIN
|
||||
|| cs == CS_TYPE_GB18030_2022_BIN
|
||||
|| cs == CS_TYPE_UTF8MB4_GENERAL_CI) {
|
||||
if (with_empty_str) {
|
||||
*to = 0x00;
|
||||
@ -799,7 +804,9 @@ int ObOrderPerservingEncoder::encode_tails(unsigned char *to, int64_t max_buf_le
|
||||
*(to+3) = 0x20;
|
||||
}
|
||||
to_len += 4;
|
||||
} else if (cs == CS_TYPE_UTF16_BIN || cs == CS_TYPE_GB18030_CHINESE_CI) {
|
||||
} else if (cs == CS_TYPE_UTF16_BIN
|
||||
|| cs == CS_TYPE_GB18030_CHINESE_CI
|
||||
|| (CS_TYPE_GB18030_2022_PINYIN_CI <= cs && cs <= CS_TYPE_GB18030_2022_STROKE_CS)) {
|
||||
if (with_empty_str) {
|
||||
MEMSET(to, 0x00, 4);
|
||||
to += 4;
|
||||
|
@ -136,7 +136,7 @@ public:
|
||||
|| cs == CS_TYPE_GBK_CHINESE_CI
|
||||
// utf 16 will be open later
|
||||
//|| cs == CS_TYPE_UTF16_GENERAL_CI || cs == CS_TYPE_UTF16_BIN
|
||||
|| cs == CS_TYPE_GB18030_CHINESE_CI);
|
||||
|| cs == CS_TYPE_GB18030_CHINESE_CI || ObCharset::is_gb18030_2022(cs));
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -12844,14 +12844,12 @@ int ObObjCaster::is_order_consistent(const ObObjMeta &from,
|
||||
int64_t idx_from = get_idx_of_collate(from_cs_type);
|
||||
int64_t idx_to = get_idx_of_collate(to_cs_type);
|
||||
int64_t idx_res = get_idx_of_collate(res_cs_type);
|
||||
if (OB_UNLIKELY(idx_from < 0 || idx_to < 0 || idx_res < 0
|
||||
||idx_from >= ObCharset::VALID_COLLATION_TYPES
|
||||
||idx_to >= ObCharset::VALID_COLLATION_TYPES
|
||||
||idx_res >= ObCharset::VALID_COLLATION_TYPES)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("unexpected collation type", K(ret), K(from), K(to));
|
||||
} else {
|
||||
if (idx_from >= 0 && idx_from < VALID_OC_COLLATION_TYPES &&
|
||||
idx_to >= 0 && idx_to < VALID_OC_COLLATION_TYPES &&
|
||||
idx_res >= 0 && idx_res < VALID_OC_COLLATION_TYPES) {
|
||||
result = ORDER_CONSISTENT_WITH_BOTH_STRING[idx_from][idx_to][idx_res];
|
||||
} else {
|
||||
result = (from_cs_type == to_cs_type) && (from_cs_type == res_cs_type);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -12860,54 +12858,6 @@ int ObObjCaster::is_order_consistent(const ObObjMeta &from,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* make sure that you have read the doc before you call these functions !
|
||||
*
|
||||
* doc:
|
||||
*/
|
||||
|
||||
int ObObjCaster::is_injection(const ObObjMeta &from,
|
||||
const ObObjMeta &to,
|
||||
bool &result)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
result = false;
|
||||
ObObjTypeClass tc1 = from.get_type_class();
|
||||
ObObjTypeClass tc2 = to.get_type_class();
|
||||
if (OB_UNLIKELY(ob_is_invalid_obj_tc(tc1) || ob_is_invalid_obj_tc(tc2))) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("unexpected obj type class", K(ret), K(from), K(to));
|
||||
} else if (from.is_string_or_lob_locator_type() && to.is_string_or_lob_locator_type()) {
|
||||
ObCollationType res_cs_type = CS_TYPE_INVALID;
|
||||
ObCollationLevel res_cs_level = CS_LEVEL_INVALID;
|
||||
ObCollationType from_cs_type = from.get_collation_type();
|
||||
ObCollationType to_cs_type = to.get_collation_type();
|
||||
if (OB_FAIL(ObCharset::aggregate_collation(from.get_collation_level(),
|
||||
from_cs_type,
|
||||
to.get_collation_level(),
|
||||
to_cs_type,
|
||||
res_cs_level,
|
||||
res_cs_type))) {
|
||||
LOG_WARN("fail to aggregate collation", K(ret), K(from), K(to));
|
||||
} else {
|
||||
int64_t idx_from = get_idx_of_collate(from_cs_type);
|
||||
int64_t idx_to = get_idx_of_collate(to_cs_type);
|
||||
int64_t idx_res = get_idx_of_collate(res_cs_type);
|
||||
if (OB_UNLIKELY(idx_from < 0 || idx_to < 0 || idx_res < 0
|
||||
||idx_from >= ObCharset::VALID_COLLATION_TYPES
|
||||
||idx_to >= ObCharset::VALID_COLLATION_TYPES
|
||||
||idx_res >= ObCharset::VALID_COLLATION_TYPES)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("unexpected collation type", K(ret), K(from), K(to));
|
||||
} else {
|
||||
result = INJECTION_WITH_BOTH_STRING[idx_from][idx_to][idx_res];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
result = INJECTION[tc1][tc2];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief ObObjCaster::oracle_number_to_char
|
||||
* 将number类型转为兼容oracle的字符串
|
||||
@ -14019,658 +13969,29 @@ const bool ObObjCaster::ORDER_CONSISTENT[ObMaxTC][ObMaxTC] =
|
||||
},
|
||||
};
|
||||
|
||||
const bool ObObjCaster::ORDER_CONSISTENT_WITH_BOTH_STRING[ObCharset::VALID_COLLATION_TYPES][ObCharset::VALID_COLLATION_TYPES][ObCharset::VALID_COLLATION_TYPES] =
|
||||
const bool ObObjCaster::ORDER_CONSISTENT_WITH_BOTH_STRING[ObObjCaster::VALID_OC_COLLATION_TYPES][ObObjCaster::VALID_OC_COLLATION_TYPES][ObObjCaster::VALID_OC_COLLATION_TYPES] =
|
||||
{
|
||||
//CS_TYPE_UTF8MB4_GENERAL_CI
|
||||
{
|
||||
//ci //utf8bin //bin
|
||||
{true, true, true, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{true, true, true},
|
||||
{false, false, false},
|
||||
{false, false, false},
|
||||
},
|
||||
//CS_TYPE_UTF8MB4_BIN
|
||||
{
|
||||
//ci //utf8bin //bin
|
||||
{true, true , true, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, true , true, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, true , true, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{true, true , true},
|
||||
{false, true , true},
|
||||
{false, true , true},
|
||||
},
|
||||
//CS_TYPE_BINARY
|
||||
{
|
||||
//ci //utf8bin //bin
|
||||
{true, true , true, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, true , true, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, true , true, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{true, true , true},
|
||||
{false, true , true},
|
||||
{false, true , true},
|
||||
},
|
||||
//CS_TYPE_GBKBIN
|
||||
{
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
},
|
||||
//CS_TYPE_CHINESE_CI
|
||||
{
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
},
|
||||
{
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
},
|
||||
{
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
},
|
||||
{
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
},
|
||||
{
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
},
|
||||
{
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
},
|
||||
{
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
},
|
||||
{
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
},
|
||||
{
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
{false, false , false, false, false, false, false, false, false, false, false, false, false},
|
||||
},
|
||||
};
|
||||
|
||||
const bool ObObjCaster::INJECTION[ObMaxTC][ObMaxTC] =
|
||||
{
|
||||
// null
|
||||
{
|
||||
false, // null
|
||||
false, // int
|
||||
false, // uint
|
||||
false, // float
|
||||
false, // double
|
||||
false, // number
|
||||
false, // datetime
|
||||
false, // date
|
||||
false, // time
|
||||
false, // year
|
||||
false, // string
|
||||
false, // extend
|
||||
false, // unknown
|
||||
false, // lob
|
||||
false, // bit
|
||||
false, // enumset
|
||||
false, // enumsetInner
|
||||
false, // OTimestamp
|
||||
false, // raw
|
||||
},
|
||||
// int
|
||||
{
|
||||
false, // null
|
||||
true, // int
|
||||
true, // uint
|
||||
true, // float
|
||||
true, // double
|
||||
true, // number
|
||||
true, // datetime
|
||||
true, // date
|
||||
true, // time
|
||||
true, // year
|
||||
false, // string
|
||||
false, // extend
|
||||
false, // unknown
|
||||
false, // lob
|
||||
true, // bit
|
||||
true, //enumset
|
||||
false, //enumsetInner
|
||||
true, //OTimestamp
|
||||
false, // raw
|
||||
},
|
||||
// uint
|
||||
{
|
||||
false, // null
|
||||
true, // int
|
||||
true, // uint
|
||||
true, // float
|
||||
true, // double
|
||||
true, // number
|
||||
true, // datetime
|
||||
true, // date
|
||||
true, // time
|
||||
true, // year
|
||||
false, // string
|
||||
false, // extend
|
||||
false, // unknown
|
||||
false, // lob
|
||||
true, // bit
|
||||
true, //enumset
|
||||
false, //enumsetInner
|
||||
true, //OTimestamp
|
||||
false, // raw
|
||||
},
|
||||
// float
|
||||
{
|
||||
false, // null
|
||||
false, // int
|
||||
false, // uint
|
||||
true, // float
|
||||
true, // double
|
||||
false, // number
|
||||
true, // datetime
|
||||
true, // date
|
||||
true, // time
|
||||
true, // year
|
||||
false, // string
|
||||
false, // extend
|
||||
false, // unknown
|
||||
false, // lob
|
||||
false, // bit
|
||||
false, //enumset
|
||||
false, //enumsetInner
|
||||
true, //OTimestamp
|
||||
false, // raw
|
||||
},
|
||||
// double
|
||||
{
|
||||
false, // null
|
||||
false, // int
|
||||
false, // uint
|
||||
true, // float
|
||||
true, // double
|
||||
false, // number
|
||||
true, // datetime
|
||||
true, // date
|
||||
true, // time
|
||||
true, // year
|
||||
false, // string
|
||||
false, // extend
|
||||
false, // unknown
|
||||
false, // lob
|
||||
false, // bit
|
||||
false, //enumset
|
||||
false, //enumsetInner
|
||||
true, //OTimestamp
|
||||
false, // raw
|
||||
},
|
||||
// number
|
||||
{
|
||||
false, // null
|
||||
true, // int
|
||||
true, // uint
|
||||
true, // float
|
||||
true, // double
|
||||
true, // number
|
||||
true, // datetime
|
||||
true, // date
|
||||
true, // time
|
||||
true, // year
|
||||
false, // string
|
||||
false, // extend
|
||||
false, // unknown
|
||||
false, // lob
|
||||
true, // bit
|
||||
true, //enumset
|
||||
false, //enumsetInner
|
||||
true, //OTimestamp
|
||||
false, // raw
|
||||
},
|
||||
// datetime
|
||||
{
|
||||
false, // null
|
||||
false, // int
|
||||
false, // uint
|
||||
false, // float
|
||||
false, // double
|
||||
false, // number //2010-01-01 12:34:56.12345 = 20100101123456.1234520 and 2010-01-01 12:34:56.12345 = 20100101123456.1234530
|
||||
true, // datetime
|
||||
true, // date
|
||||
true, // time
|
||||
true, // year
|
||||
false, // string
|
||||
false, // extend
|
||||
false, // unknown
|
||||
false, // lob
|
||||
false, // bit
|
||||
false, //enumset
|
||||
false, //enumsetInner
|
||||
true, //OTimestamp
|
||||
false, // raw
|
||||
},
|
||||
// date
|
||||
{
|
||||
false, // null
|
||||
false, // int //think about 0000-00-00
|
||||
false, // uint
|
||||
false, // float
|
||||
false, // double
|
||||
false, // number
|
||||
true, // datetime
|
||||
true, // date
|
||||
true, // time
|
||||
true, // year
|
||||
false, // string
|
||||
false, // extend
|
||||
false, // unknown
|
||||
false, // lob
|
||||
false, // bit
|
||||
false, //enumset
|
||||
false, //enumsetInner
|
||||
true, //OTimestamp
|
||||
false, // raw
|
||||
},
|
||||
// time
|
||||
{
|
||||
false, // null
|
||||
false, // int
|
||||
false, // uint
|
||||
false, // float
|
||||
false, // double
|
||||
false, // number //think about time(5) = decimal(40,7)
|
||||
true, // datetime
|
||||
true, // date
|
||||
true, // time
|
||||
true, // year
|
||||
false, // string //00:12:34 = "00:12:34" and 00:12:34 = "00:12:34.000"
|
||||
false, // extend
|
||||
false, // unknown
|
||||
false, // lob
|
||||
false, // bit
|
||||
false, //enumset
|
||||
false, //enumsetInner
|
||||
true, //OTimestamp
|
||||
false, // raw
|
||||
},
|
||||
// year //0000-9999
|
||||
{
|
||||
false, // null
|
||||
true, // int
|
||||
true, // uint
|
||||
true, // float
|
||||
true, // double
|
||||
true, // number
|
||||
true, // datetime //1999 = 1999-00-00 00:00:00
|
||||
true, // date //1999 = 1999-00-00
|
||||
true, // time
|
||||
true , // year
|
||||
false, // string //1999 = "99" and 1999 = "1999"
|
||||
false, // extend
|
||||
false, // unknown
|
||||
false, // lob
|
||||
true, // bit
|
||||
true, //enumset
|
||||
false, //enumsetInner
|
||||
true, //OTimestamp
|
||||
false, // raw
|
||||
},
|
||||
// string
|
||||
{
|
||||
false, // null
|
||||
true, // int
|
||||
true, // uint
|
||||
true, // float
|
||||
true, // double
|
||||
true, // number
|
||||
true, // datetime
|
||||
true, // date
|
||||
true, // time
|
||||
true, // year
|
||||
false, // string
|
||||
false, // extend
|
||||
false, // unknown
|
||||
false, // lob
|
||||
true, // bit
|
||||
true, //enumset
|
||||
false, //enumsetInner
|
||||
true, //OTimestamp
|
||||
false, // raw
|
||||
},
|
||||
// extend
|
||||
{
|
||||
false, // null
|
||||
false, // int
|
||||
false, // uint
|
||||
false, // float
|
||||
false, // double
|
||||
false, // number
|
||||
false, // datetime
|
||||
false, // date
|
||||
false, // time
|
||||
false, // year
|
||||
false, // string
|
||||
false, // extend
|
||||
false, // unknown
|
||||
false, // lob
|
||||
false, // bit
|
||||
false, //enumset
|
||||
false, //enumsetInner
|
||||
false, //OTimestamp
|
||||
false, // raw
|
||||
},
|
||||
// unknown
|
||||
{
|
||||
false, // null
|
||||
false, // int
|
||||
false, // uint
|
||||
false, // float
|
||||
false, // double
|
||||
false, // number
|
||||
false, // datetime
|
||||
false, // date
|
||||
false, // time
|
||||
false, // year
|
||||
false, // string
|
||||
false, // extend
|
||||
false, // unknown
|
||||
false, // lob
|
||||
false, // bit
|
||||
false, //enumset
|
||||
false, //enumsetInner
|
||||
false, //OTimestamp
|
||||
false, // raw
|
||||
},
|
||||
// lob
|
||||
{
|
||||
false, // null
|
||||
true, // int
|
||||
true, // uint
|
||||
true, // float
|
||||
true, // double
|
||||
true, // number
|
||||
true, // datetime
|
||||
true, // date
|
||||
true, // time
|
||||
true, // year
|
||||
false, // string
|
||||
false, // extend
|
||||
false, // unknown
|
||||
false, // lob
|
||||
true, // bit
|
||||
true, //enumset
|
||||
false, //enumsetInner
|
||||
true, //OTimestamp
|
||||
false, // raw
|
||||
},
|
||||
// bit
|
||||
{
|
||||
false, // null
|
||||
true, // int
|
||||
true, // uint
|
||||
true, // float
|
||||
true, // double
|
||||
true, // number
|
||||
true, // datetime
|
||||
true, // date
|
||||
true, // time
|
||||
true, // year
|
||||
false, // string
|
||||
false, // extend
|
||||
false, // unknown
|
||||
false, // lob
|
||||
true, // bit
|
||||
true, //enumset
|
||||
false, //enumsetInner
|
||||
true, //OTimestamp
|
||||
false, // raw
|
||||
},
|
||||
//setenum
|
||||
{
|
||||
false, // null
|
||||
true, // int
|
||||
true, // uint
|
||||
true, // float
|
||||
true, // double
|
||||
true, // number
|
||||
false, // datetime
|
||||
false, // date
|
||||
false, // time
|
||||
true, // year
|
||||
false, // string
|
||||
false, // extend
|
||||
false, // unknown
|
||||
false, // lob
|
||||
true, // bit
|
||||
true, //enumset
|
||||
false, //enumsetInner
|
||||
false, //OTimestamp
|
||||
false, // raw
|
||||
},
|
||||
//setenumInner
|
||||
{
|
||||
false, // null
|
||||
false, // int
|
||||
false, // uint
|
||||
false, // float
|
||||
false, // double
|
||||
false, // number
|
||||
false, // datetime
|
||||
false, // date
|
||||
false, // time
|
||||
false, // year
|
||||
false, // string
|
||||
false, // extend
|
||||
false, // unknown
|
||||
false, // lob
|
||||
false, // bit
|
||||
false, //enumset
|
||||
false, //enumsetInner
|
||||
false, //OTimestamp
|
||||
false, // raw
|
||||
},
|
||||
// OTimestamp
|
||||
{
|
||||
false, // null
|
||||
false, // int
|
||||
false, // uint
|
||||
false, // float
|
||||
false, // double
|
||||
false, // number //2010-01-01 12:34:56.12345 = 20100101123456.1234520 and 2010-01-01 12:34:56.12345 = 20100101123456.1234530
|
||||
true, // datetime
|
||||
true, // date
|
||||
true, // time
|
||||
true, // year
|
||||
false, // string
|
||||
false, // extend
|
||||
false, // unknown
|
||||
false, // lob
|
||||
false, // bit
|
||||
false, //enumset
|
||||
false, //enumsetInner
|
||||
true, //OTimestamp
|
||||
false, // raw
|
||||
},
|
||||
// raw
|
||||
{
|
||||
false, // null
|
||||
true, // int
|
||||
true, // uint
|
||||
true, // float
|
||||
true, // double
|
||||
true, // number
|
||||
true, // datetime
|
||||
true, // date
|
||||
true, // time
|
||||
true, // year
|
||||
false, // string
|
||||
false, // extend
|
||||
false, // unknown
|
||||
false, // lob
|
||||
true, // bit
|
||||
true, //enumset
|
||||
false, //enumsetInner
|
||||
true, //OTimestamp
|
||||
false, // raw
|
||||
},
|
||||
};
|
||||
|
||||
const bool ObObjCaster::INJECTION_WITH_BOTH_STRING[ObCharset::VALID_COLLATION_TYPES][ObCharset::VALID_COLLATION_TYPES][ObCharset::VALID_COLLATION_TYPES] =
|
||||
{
|
||||
//CS_TYPE_UTF8MB4_GENERAL_CI
|
||||
{
|
||||
//ci //utf8bin //bin
|
||||
{true, true, true},//CS_TYPE_UTF8MB4_GENERAL_CI
|
||||
{false, true , true},//CS_TYPE_UTF8MB4_BIN
|
||||
{false, true , true},//CS_TYPE_BINARY
|
||||
},
|
||||
//CS_TYPE_UTF8MB4_BIN
|
||||
{
|
||||
//ci //utf8bin //bin
|
||||
{true, true , true},//CS_TYPE_UTF8MB4_GENERAL_CI
|
||||
{false, true , true},//CS_TYPE_UTF8MB4_BIN
|
||||
{false, true , true},//CS_TYPE_BINARY
|
||||
},
|
||||
//CS_TYPE_BINARY
|
||||
{
|
||||
//ci //utf8bin //bin
|
||||
{true, true , true},//CS_TYPE_UTF8MB4_GENERAL_CI
|
||||
{false, true , true},//CS_TYPE_UTF8MB4_BIN
|
||||
{false, true , true},//CS_TYPE_BINARY
|
||||
}
|
||||
};
|
||||
|
||||
int ObObjEvaluator::is_true(const ObObj &obj, ObCastMode cast_mode, bool &result)
|
||||
|
@ -431,47 +431,16 @@ private:
|
||||
case CS_TYPE_BINARY:
|
||||
idx = 2;
|
||||
break;
|
||||
case CS_TYPE_GBK_BIN:
|
||||
idx = 3;
|
||||
break;
|
||||
case CS_TYPE_GBK_CHINESE_CI:
|
||||
idx = 4;
|
||||
break;
|
||||
case CS_TYPE_UTF16_GENERAL_CI:
|
||||
idx = 5;
|
||||
break;
|
||||
case CS_TYPE_UTF16_BIN:
|
||||
idx = 6;
|
||||
break;
|
||||
case CS_TYPE_UTF8MB4_UNICODE_CI:
|
||||
idx = 7;
|
||||
break;
|
||||
case CS_TYPE_UTF16_UNICODE_CI:
|
||||
idx = 8;
|
||||
break;
|
||||
case CS_TYPE_GB18030_BIN:
|
||||
idx = 9;
|
||||
break;
|
||||
case CS_TYPE_GB18030_CHINESE_CI:
|
||||
idx = 10;
|
||||
break;
|
||||
case CS_TYPE_LATIN1_BIN:
|
||||
idx = 11;
|
||||
break;
|
||||
case CS_TYPE_LATIN1_SWEDISH_CI:
|
||||
idx = 12;
|
||||
break;
|
||||
default:
|
||||
idx = -1;
|
||||
}
|
||||
return idx;
|
||||
}
|
||||
private:
|
||||
static const int64_t VALID_OC_COLLATION_TYPES = 3;
|
||||
static const bool CAST_MONOTONIC[ObMaxTC][ObMaxTC];
|
||||
static const bool ORDER_CONSISTENT[ObMaxTC][ObMaxTC];
|
||||
static const bool ORDER_CONSISTENT_WITH_BOTH_STRING[ObCharset::VALID_COLLATION_TYPES][ObCharset::VALID_COLLATION_TYPES][ObCharset::VALID_COLLATION_TYPES];
|
||||
static const bool INJECTION[ObMaxTC][ObMaxTC];
|
||||
static const bool INJECTION_WITH_BOTH_STRING[ObCharset::VALID_COLLATION_TYPES][ObCharset::VALID_COLLATION_TYPES][ObCharset::VALID_COLLATION_TYPES];
|
||||
static const bool ORDER_CONSISTENT_WITH_BOTH_STRING[VALID_OC_COLLATION_TYPES][VALID_OC_COLLATION_TYPES][VALID_OC_COLLATION_TYPES];
|
||||
};
|
||||
|
||||
class ObObjEvaluator
|
||||
|
@ -114,6 +114,7 @@ public:
|
||||
str, end, nrows, escape_buf, escaped_buf_end, handle_one_line, errors, is_end_file);
|
||||
break;
|
||||
case common::CHARSET_GB18030:
|
||||
case common::CHARSET_GB18030_2022:
|
||||
ret = scan_proto<common::CHARSET_GB18030, handle_func, DO_ESCAPE>(
|
||||
str, end, nrows, escape_buf, escaped_buf_end, handle_one_line, errors, is_end_file);
|
||||
break;
|
||||
|
@ -11757,12 +11757,19 @@ int ObDatumCast::is_trivial_cast(const ObObjType in_type,
|
||||
is_trivial_cast = false;
|
||||
int ret = OB_SUCCESS;
|
||||
|
||||
ObCharsetType in_cs = ObCharset::charset_type_by_coll(in_cs_type);
|
||||
ObCharsetType out_cs = ObCharset::charset_type_by_coll(out_cs_type);
|
||||
|
||||
ObObjTypeClass in_tc = ob_obj_type_class(in_type);
|
||||
ObObjTypeClass out_tc = ob_obj_type_class(out_type);
|
||||
const bool is_same_charset = (ob_is_string_type(in_type) &&
|
||||
ob_is_string_type(out_type) &&
|
||||
ObCharset::charset_type_by_coll(in_cs_type) ==
|
||||
ObCharset::charset_type_by_coll(out_cs_type));
|
||||
(in_cs == out_cs ||
|
||||
/** GB18030 and GB18030_2022 have the same code points,
|
||||
* but they have different mapping to unicode.
|
||||
* So, we do not do charset_convert for them in cast*/
|
||||
(in_cs == CHARSET_GB18030 && out_cs == CHARSET_GB18030_2022) ||
|
||||
(in_cs == CHARSET_GB18030_2022 && out_cs == CHARSET_GB18030)));
|
||||
const bool is_clob_to_nonclob = (ob_is_clob(in_type, in_cs_type)
|
||||
&& !ob_is_clob(out_type, out_cs_type));
|
||||
const bool is_nonblob_to_blob = ((false == ob_is_blob(in_type, in_cs_type)) &&
|
||||
|
@ -235,10 +235,17 @@ int ObExprConvertOracle::calc_convert_oracle_expr(const ObExpr &expr,
|
||||
ObTextStringDatumResult output_result(expr.datum_meta_.type_, &expr, &ctx, &res_datum);
|
||||
int64_t src_byte_len = 0;
|
||||
int64_t buf_size = 0;
|
||||
ObCharsetType src_cs = ObCharset::charset_type_by_coll(src_cs_type);
|
||||
ObCharsetType dst_cs = ObCharset::charset_type_by_coll(dst_cs_type);
|
||||
if (ob_is_string_tc(expr.datum_meta_.type_)
|
||||
&& (src.length() == 0
|
||||
|| ObCharset::charset_type_by_coll(src_cs_type) == ObCharset::charset_type_by_coll(dst_cs_type)
|
||||
|| ObCharset::charset_type_by_coll(dst_cs_type) == CHARSET_BINARY)) {
|
||||
|| src_cs == dst_cs
|
||||
|| dst_cs == CHARSET_BINARY
|
||||
/** GB18030 and GB18030_2022 have the same code points,
|
||||
* but they have different mapping to unicode.
|
||||
* So, we do not do charset_convert for them in convert*/
|
||||
|| (src_cs == CHARSET_GB18030 && dst_cs == CHARSET_GB18030_2022)
|
||||
|| (src_cs == CHARSET_GB18030_2022 && dst_cs == CHARSET_GB18030))) {
|
||||
dst = src; // no need convert
|
||||
} else if (OB_FAIL(src_iter.init(0, NULL, &calc_alloc))) {
|
||||
LOG_WARN("init src_iter failed ", K(ret), K(src_iter));
|
||||
|
@ -83,7 +83,10 @@ int ObExprNLSSort::convert_to_coll_code(ObEvalCtx &ctx,
|
||||
ObString &to_str)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (to_type == CS_TYPE_GB18030_CHINESE_CS) {
|
||||
if (to_type == CS_TYPE_GB18030_CHINESE_CS ||
|
||||
to_type == CS_TYPE_GB18030_2022_PINYIN_CS ||
|
||||
to_type == CS_TYPE_GB18030_2022_RADICAL_CS ||
|
||||
to_type == CS_TYPE_GB18030_2022_STROKE_CS) {
|
||||
char *conv_buf = NULL;
|
||||
const int32_t MostBytes = 4; //most 4 bytes
|
||||
size_t conv_buf_len = from_str.length() * MostBytes;
|
||||
|
@ -549,6 +549,10 @@ int ObSQLUtils::is_charset_data_version_valid(ObCharsetType charset_type, const
|
||||
ret = OB_NOT_SUPPORTED;
|
||||
SQL_LOG(WARN, "latin1 not supported when data_version < 4_1_0_0", K(ret));
|
||||
LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.1, charset latin1 is");
|
||||
} else if (CHARSET_GB18030_2022 == charset_type && data_version < DATA_VERSION_4_2_0_0 ) {
|
||||
ret = OB_NOT_SUPPORTED;
|
||||
SQL_LOG(WARN, "GB18030_2022 not supported when data_version < 4_2_0_0", K(ret));
|
||||
LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.2, charset GB18030_2022 is");
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
@ -188,7 +188,7 @@ inline int64_t ObFastParserBase::is_identifier_flags(const int64_t pos)
|
||||
// added here to avoid the next judgment whether it is utf8 char or gbk char
|
||||
} else if (CHARSET_UTF8MB4 == charset_type_ || CHARSET_UTF16 == charset_type_) {
|
||||
idf_pos = is_utf8_char(pos);
|
||||
} else if (CHARSET_GBK == charset_type_ || CHARSET_GB18030 == charset_type_) {
|
||||
} else if (ObCharset::is_gb_charset(charset_type_)) {
|
||||
idf_pos = is_gbk_char(pos);
|
||||
} else if (CHARSET_LATIN1 == charset_type_) {
|
||||
idf_pos = is_latin1_char(pos);
|
||||
@ -943,7 +943,7 @@ char *ObFastParserBase::parse_strdup_with_replace_multi_byte_char(
|
||||
} else {
|
||||
out_str[len++] = str[i];
|
||||
}
|
||||
} else if (CHARSET_GBK == charset_type_ || CHARSET_GB18030 == charset_type_) {
|
||||
} else if (ObCharset::is_gb_charset(charset_type_)) {
|
||||
if (i + 1 < dup_len) {
|
||||
if (str[i] == (char)0xa1 && str[i+1] == (char)0xa1) {//gbk multi byte space
|
||||
out_str[len++] = ' ';
|
||||
@ -1162,7 +1162,7 @@ inline int64_t ObFastParserBase::is_first_identifier_flags(const int64_t pos)
|
||||
// added here to avoid the next judgment whether it is utf8 char or gbk char
|
||||
} else if (CHARSET_UTF8MB4 == charset_type_ || CHARSET_UTF16 == charset_type_) {
|
||||
idf_pos = is_utf8_char(pos);
|
||||
} else if (CHARSET_GBK == charset_type_ || CHARSET_GB18030 == charset_type_) {
|
||||
} else if (ObCharset::is_gb_charset(charset_type_)) {
|
||||
idf_pos = is_gbk_char(pos);
|
||||
} else if (CHARSET_LATIN1 == charset_type_) {
|
||||
idf_pos = is_latin1_char(pos);
|
||||
|
@ -214,7 +214,7 @@ protected:
|
||||
byte_len = 3; \
|
||||
} \
|
||||
} else if (is_oracle_mode_ \
|
||||
&& (CHARSET_GBK == charset_type_ || CHARSET_GB18030 == charset_type_)) { \
|
||||
&& (ObCharset::is_gb_charset(charset_type_))) { \
|
||||
if (pos + 2 < len && -1 != is_gbk_multi_byte_##CHARACTER_NAME(str, pos)) { \
|
||||
bool_ret = true; \
|
||||
byte_len = 2; \
|
||||
|
@ -207,6 +207,13 @@ char *parse_strdup_with_replace_multi_byte_char(const char *str, int *connection
|
||||
switch (*connection_collation_) {
|
||||
case 28/*CS_TYPE_GBK_CHINESE_CI*/:
|
||||
case 87/*CS_TYPE_GBK_BIN*/:
|
||||
case 216/*CS_TYPE_GB18030_2022_BIN*/:
|
||||
case 217/*CS_TYPE_GB18030_2022_PINYIN_CI*/:
|
||||
case 218/*CS_TYPE_GB18030_2022_PINYIN_CS*/:
|
||||
case 219/*CS_TYPE_GB18030_2022_RADICAL_CI*/:
|
||||
case 220/*CS_TYPE_GB18030_2022_RADICAL_CS*/:
|
||||
case 221/*CS_TYPE_GB18030_2022_STROKE_CI*/:
|
||||
case 222/*CS_TYPE_GB18030_2022_STROKE_CS*/:
|
||||
case 248/*CS_TYPE_GB18030_CHINESE_CI*/:
|
||||
case 249/*CS_TYPE_GB18030_BIN*/: {
|
||||
if (i + 1 < dup_len) {
|
||||
|
@ -112,6 +112,7 @@ _UTF8 { REPUT_TOKEN_NEG_SIGN(_UTF8); }
|
||||
_UTF8MB4 { REPUT_TOKEN_NEG_SIGN(_UTF8MB4); }
|
||||
_GBK { REPUT_TOKEN_NEG_SIGN(_GBK); }
|
||||
_GB18030 { REPUT_TOKEN_NEG_SIGN(_GB18030); }
|
||||
_GB18030_2022 { REPUT_TOKEN_NEG_SIGN(_GB18030_2022); }
|
||||
_LATIN1 { REPUT_TOKEN_NEG_SIGN(_LATIN1); }
|
||||
_BINARY { REPUT_TOKEN_NEG_SIGN(_BINARY); }
|
||||
_UTF16 { REPUT_TOKEN_NEG_SIGN(_UTF16); }
|
||||
|
@ -189,7 +189,7 @@ APPEND NO_GATHER_OPTIMIZER_STATISTICS GATHER_OPTIMIZER_STATISTICS DBMS_STATS
|
||||
NEG_SIGN
|
||||
|
||||
%token /*can not be relation name*/
|
||||
_BINARY _UTF8 _UTF8MB4 _GBK _UTF16 _GB18030 _LATIN1 CNNOP
|
||||
_BINARY _UTF8 _UTF8MB4 _GBK _UTF16 _GB18030 _GB18030_2022 _LATIN1 CNNOP
|
||||
SELECT_HINT_BEGIN UPDATE_HINT_BEGIN DELETE_HINT_BEGIN INSERT_HINT_BEGIN REPLACE_HINT_BEGIN HINT_HINT_BEGIN HINT_END
|
||||
LOAD_DATA_HINT_BEGIN CREATE_HINT_BEGIN
|
||||
END_P SET_VAR DELIMITER
|
||||
@ -976,6 +976,15 @@ _UTF8
|
||||
YYABORT_NO_MEMORY;
|
||||
}
|
||||
}
|
||||
| _GB18030_2022
|
||||
{
|
||||
malloc_terminal_node($$, result->malloc_pool_, T_CHARSET);
|
||||
$$->str_value_ = parse_strdup("gb18030_2022", result->malloc_pool_, &($$->str_len_));
|
||||
if (OB_UNLIKELY(NULL == $$->str_value_)) {
|
||||
yyerror(NULL, result, "No more space for mallocing string");
|
||||
YYABORT_NO_MEMORY;
|
||||
}
|
||||
}
|
||||
| _UTF16
|
||||
{
|
||||
malloc_terminal_node($$, result->malloc_pool_, T_CHARSET);
|
||||
|
@ -7,6 +7,7 @@ gbk gbk_chinese_ci GBK charset 2
|
||||
utf16 utf16_general_ci UTF-16 Unicode 2
|
||||
gb18030 gb18030_chinese_ci GB18030 charset 4
|
||||
latin1 latin1_swedish_ci cp1252 West European 1
|
||||
gb18030_2022 gb18030_2022_chinese_ci GB18030-2022 charset 4
|
||||
select character_set_name, default_collate_name, description, maxlen from character_sets;
|
||||
character_set_name default_collate_name description maxlen
|
||||
binary binary Binary pseudo charset 1
|
||||
@ -15,6 +16,7 @@ gbk gbk_chinese_ci GBK charset 2
|
||||
utf16 utf16_general_ci UTF-16 Unicode 2
|
||||
gb18030 gb18030_chinese_ci GB18030 charset 4
|
||||
latin1 latin1_swedish_ci cp1252 West European 1
|
||||
gb18030_2022 gb18030_2022_chinese_ci GB18030-2022 charset 4
|
||||
select maxlen from character_sets;
|
||||
maxlen
|
||||
1
|
||||
@ -23,6 +25,7 @@ maxlen
|
||||
2
|
||||
4
|
||||
1
|
||||
4
|
||||
select * from character_sets where character_set_name like '%binary%';
|
||||
CHARACTER_SET_NAME DEFAULT_COLLATE_NAME DESCRIPTION MAXLEN
|
||||
binary binary Binary pseudo charset 1
|
||||
|
Loading…
x
Reference in New Issue
Block a user