[FEAT MERGE] Support gb18030_2022
This commit is contained in:
@ -11757,12 +11757,19 @@ int ObDatumCast::is_trivial_cast(const ObObjType in_type,
|
||||
is_trivial_cast = false;
|
||||
int ret = OB_SUCCESS;
|
||||
|
||||
ObCharsetType in_cs = ObCharset::charset_type_by_coll(in_cs_type);
|
||||
ObCharsetType out_cs = ObCharset::charset_type_by_coll(out_cs_type);
|
||||
|
||||
ObObjTypeClass in_tc = ob_obj_type_class(in_type);
|
||||
ObObjTypeClass out_tc = ob_obj_type_class(out_type);
|
||||
const bool is_same_charset = (ob_is_string_type(in_type) &&
|
||||
ob_is_string_type(out_type) &&
|
||||
ObCharset::charset_type_by_coll(in_cs_type) ==
|
||||
ObCharset::charset_type_by_coll(out_cs_type));
|
||||
(in_cs == out_cs ||
|
||||
/** GB18030 and GB18030_2022 have the same code points,
|
||||
* but they have different mapping to unicode.
|
||||
* So, we do not do charset_convert for them in cast*/
|
||||
(in_cs == CHARSET_GB18030 && out_cs == CHARSET_GB18030_2022) ||
|
||||
(in_cs == CHARSET_GB18030_2022 && out_cs == CHARSET_GB18030)));
|
||||
const bool is_clob_to_nonclob = (ob_is_clob(in_type, in_cs_type)
|
||||
&& !ob_is_clob(out_type, out_cs_type));
|
||||
const bool is_nonblob_to_blob = ((false == ob_is_blob(in_type, in_cs_type)) &&
|
||||
|
||||
@ -235,10 +235,17 @@ int ObExprConvertOracle::calc_convert_oracle_expr(const ObExpr &expr,
|
||||
ObTextStringDatumResult output_result(expr.datum_meta_.type_, &expr, &ctx, &res_datum);
|
||||
int64_t src_byte_len = 0;
|
||||
int64_t buf_size = 0;
|
||||
ObCharsetType src_cs = ObCharset::charset_type_by_coll(src_cs_type);
|
||||
ObCharsetType dst_cs = ObCharset::charset_type_by_coll(dst_cs_type);
|
||||
if (ob_is_string_tc(expr.datum_meta_.type_)
|
||||
&& (src.length() == 0
|
||||
|| ObCharset::charset_type_by_coll(src_cs_type) == ObCharset::charset_type_by_coll(dst_cs_type)
|
||||
|| ObCharset::charset_type_by_coll(dst_cs_type) == CHARSET_BINARY)) {
|
||||
|| src_cs == dst_cs
|
||||
|| dst_cs == CHARSET_BINARY
|
||||
/** GB18030 and GB18030_2022 have the same code points,
|
||||
* but they have different mapping to unicode.
|
||||
* So, we do not do charset_convert for them in convert*/
|
||||
|| (src_cs == CHARSET_GB18030 && dst_cs == CHARSET_GB18030_2022)
|
||||
|| (src_cs == CHARSET_GB18030_2022 && dst_cs == CHARSET_GB18030))) {
|
||||
dst = src; // no need convert
|
||||
} else if (OB_FAIL(src_iter.init(0, NULL, &calc_alloc))) {
|
||||
LOG_WARN("init src_iter failed ", K(ret), K(src_iter));
|
||||
|
||||
@ -83,7 +83,10 @@ int ObExprNLSSort::convert_to_coll_code(ObEvalCtx &ctx,
|
||||
ObString &to_str)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (to_type == CS_TYPE_GB18030_CHINESE_CS) {
|
||||
if (to_type == CS_TYPE_GB18030_CHINESE_CS ||
|
||||
to_type == CS_TYPE_GB18030_2022_PINYIN_CS ||
|
||||
to_type == CS_TYPE_GB18030_2022_RADICAL_CS ||
|
||||
to_type == CS_TYPE_GB18030_2022_STROKE_CS) {
|
||||
char *conv_buf = NULL;
|
||||
const int32_t MostBytes = 4; //most 4 bytes
|
||||
size_t conv_buf_len = from_str.length() * MostBytes;
|
||||
|
||||
Reference in New Issue
Block a user