[FEAT MERGE] Support gb18030_2022

This commit is contained in:
xianyu-w
2023-04-26 15:13:03 +00:00
committed by ob-robot
parent ef51ca80f8
commit 3efcefc29e
27 changed files with 32100 additions and 855 deletions

View File

@ -11757,12 +11757,19 @@ int ObDatumCast::is_trivial_cast(const ObObjType in_type,
is_trivial_cast = false;
int ret = OB_SUCCESS;
ObCharsetType in_cs = ObCharset::charset_type_by_coll(in_cs_type);
ObCharsetType out_cs = ObCharset::charset_type_by_coll(out_cs_type);
ObObjTypeClass in_tc = ob_obj_type_class(in_type);
ObObjTypeClass out_tc = ob_obj_type_class(out_type);
const bool is_same_charset = (ob_is_string_type(in_type) &&
ob_is_string_type(out_type) &&
ObCharset::charset_type_by_coll(in_cs_type) ==
ObCharset::charset_type_by_coll(out_cs_type));
(in_cs == out_cs ||
/** GB18030 and GB18030_2022 have the same code points,
* but they have different mapping to unicode.
* So, we do not do charset_convert for them in cast*/
(in_cs == CHARSET_GB18030 && out_cs == CHARSET_GB18030_2022) ||
(in_cs == CHARSET_GB18030_2022 && out_cs == CHARSET_GB18030)));
const bool is_clob_to_nonclob = (ob_is_clob(in_type, in_cs_type)
&& !ob_is_clob(out_type, out_cs_type));
const bool is_nonblob_to_blob = ((false == ob_is_blob(in_type, in_cs_type)) &&

View File

@ -235,10 +235,17 @@ int ObExprConvertOracle::calc_convert_oracle_expr(const ObExpr &expr,
ObTextStringDatumResult output_result(expr.datum_meta_.type_, &expr, &ctx, &res_datum);
int64_t src_byte_len = 0;
int64_t buf_size = 0;
ObCharsetType src_cs = ObCharset::charset_type_by_coll(src_cs_type);
ObCharsetType dst_cs = ObCharset::charset_type_by_coll(dst_cs_type);
if (ob_is_string_tc(expr.datum_meta_.type_)
&& (src.length() == 0
|| ObCharset::charset_type_by_coll(src_cs_type) == ObCharset::charset_type_by_coll(dst_cs_type)
|| ObCharset::charset_type_by_coll(dst_cs_type) == CHARSET_BINARY)) {
|| src_cs == dst_cs
|| dst_cs == CHARSET_BINARY
/** GB18030 and GB18030_2022 have the same code points,
* but they have different mapping to unicode.
* So, we do not do charset_convert for them in convert*/
|| (src_cs == CHARSET_GB18030 && dst_cs == CHARSET_GB18030_2022)
|| (src_cs == CHARSET_GB18030_2022 && dst_cs == CHARSET_GB18030))) {
dst = src; // no need convert
} else if (OB_FAIL(src_iter.init(0, NULL, &calc_alloc))) {
LOG_WARN("init src_iter failed ", K(ret), K(src_iter));

View File

@ -83,7 +83,10 @@ int ObExprNLSSort::convert_to_coll_code(ObEvalCtx &ctx,
ObString &to_str)
{
int ret = OB_SUCCESS;
if (to_type == CS_TYPE_GB18030_CHINESE_CS) {
if (to_type == CS_TYPE_GB18030_CHINESE_CS ||
to_type == CS_TYPE_GB18030_2022_PINYIN_CS ||
to_type == CS_TYPE_GB18030_2022_RADICAL_CS ||
to_type == CS_TYPE_GB18030_2022_STROKE_CS) {
char *conv_buf = NULL;
const int32_t MostBytes = 4; //most 4 bytes
size_t conv_buf_len = from_str.length() * MostBytes;