/** * Copyright (c) 2021 OceanBase * OceanBase CE is licensed under Mulan PubL v2. * You can use this software according to the terms and conditions of the Mulan PubL v2. * You may obtain a copy of Mulan PubL v2 at: * http://license.coscl.org.cn/MulanPubL-2.0 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * See the Mulan PubL v2 for more details. */ #define USING_LOG_PREFIX SQL_ENG #include "ob_expr_convert.h" #include "lib/charset/ob_charset.h" #include "sql/engine/expr/ob_expr_cast.h" #include "sql/session/ob_sql_session_info.h" #include "sql/engine/expr/ob_expr_result_type_util.h" #include "sql/engine/expr/ob_expr_lob_utils.h" using namespace oceanbase::common; using namespace oceanbase::sql; namespace oceanbase { namespace sql { ObExprConvert::ObExprConvert(ObIAllocator &alloc) : ObFuncExprOperator(alloc, T_FUN_SYS_CONVERT, N_CONVERT, 2, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION) { } ObExprConvert::~ObExprConvert() { } int ObExprConvert::calc_result_type2(ObExprResType &type, ObExprResType &type1, ObExprResType &type2, ObExprTypeCtx &type_ctx) const { UNUSED(type_ctx); int ret = OB_SUCCESS; type.set_type(ObVarcharType); // Only convert (xx using collation) will reach here now. It must be a varchar result. type.set_scale(type1.get_scale()); type.set_precision(type1.get_precision()); if (ob_is_string_type(type.get_type())) { type.set_length(type1.get_length()); } const ObObj &dest_collation = type2.get_param(); TYPE_CHECK(dest_collation, ObVarcharType); if (OB_SUCC(ret)) { ObString cs_name = dest_collation.get_string(); ObCharsetType charset_type = CHARSET_INVALID; if (CHARSET_INVALID == (charset_type = ObCharset::charset_type(cs_name.trim()))) { ret = OB_ERR_UNKNOWN_CHARSET; LOG_WARN("unknown charset", K(ret), K(cs_name)); } else { type.set_collation_level(CS_LEVEL_EXPLICIT); type.set_collation_type(ObCharset::get_default_collation(charset_type)); //set calc type //only set type2 here. type2.set_calc_type(ObVarcharType); // cast表达式会对convert表达式的第一个子节点cast为type1,计算时convert的结果就是第一个 // 子节点的结果 type1.set_calc_meta(type.get_obj_meta()); type1.set_calc_collation_type(type.get_collation_type()); type1.set_calc_collation_level(type.get_collation_level()); type_ctx.set_cast_mode(type_ctx.get_cast_mode() | CM_CHARSET_CONVERT_IGNORE_ERR); LOG_DEBUG("in calc result type", K(ret), K(type1), K(type2), K(type)); } } return ret; } int calc_convert_expr(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum) { int ret = OB_SUCCESS; ObDatum *child_res = NULL; if (OB_FAIL(expr.args_[0]->eval(ctx, child_res))) { LOG_WARN("eval arg 0 failed", K(ret)); } else { ObCollationType cs_type = expr.args_[0]->datum_meta_.cs_type_; int64_t mbmaxlen = 1; if (OB_FAIL(ObCharset::get_mbmaxlen_by_coll(cs_type, mbmaxlen))) { LOG_WARN("fail to get mbmaxlen", K(cs_type), K(ret)); } else if (mbmaxlen > 1 && !child_res->is_null()) { ObString checked_res; bool is_null = false; const ObSQLSessionInfo *session = ctx.exec_ctx_.get_my_session(); if (OB_ISNULL(session)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("session is null", K(ret)); } else if (OB_FAIL(ObSQLUtils::check_well_formed_str(child_res->get_string(), cs_type, checked_res, is_null, is_strict_mode(session->get_sql_mode()), false))) { LOG_WARN("check_well_formed_str failed", K(ret), K(child_res->get_string()), K(expr.datum_meta_)); } else if (is_null) { res_datum.set_null(); } else { res_datum.set_string(checked_res); } } else { res_datum.set_datum(*child_res); } } return ret; } int ObExprConvert::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const { int ret = OB_SUCCESS; UNUSED(expr_cg_ctx); UNUSED(raw_expr); rt_expr.eval_func_ = calc_convert_expr; return ret; } ObExprConvertOracle::ObExprConvertOracle(ObIAllocator &alloc) : ObStringExprOperator(alloc, T_FUN_SYS_CONVERT, N_CONVERT, TWO_OR_THREE, VALID_FOR_GENERATED_COL) { } ObExprConvertOracle::~ObExprConvertOracle() { } int ObExprConvertOracle::calc_result_typeN(ObExprResType &type, ObExprResType *types_array, int64_t param_num, ObExprTypeCtx &type_ctx) const { int ret = OB_SUCCESS; if (!(param_num >= 2 && param_num <= 3)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("param num should be 2 or 3", K(ret)); } //result meta deduce if (OB_SUCC(ret)) { ObLength length; auto str_params = make_const_carray(&types_array[0]); OZ (aggregate_string_type_and_charset_oracle(*type_ctx.get_session(), str_params, type, PREFER_VAR_LEN_CHAR)); OZ (deduce_string_param_calc_type_and_charset(*type_ctx.get_session(), type, str_params)); OX (length = types_array[0].get_calc_length()); OX (type.set_length(length * ObCharset::MAX_MB_LEN)); } //param calc type deduce if (OB_SUCC(ret)) { types_array[1].set_calc_type_default_varchar(); if (3 == param_num) { types_array[2].set_calc_type_default_varchar(); } } return ret; } int ObExprConvertOracle::calc_convert_oracle_expr(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum) { int ret = OB_SUCCESS; ObDatum *src_param = NULL; ObCollationType src_cs_type = CS_TYPE_INVALID; ObCollationType dst_cs_type = CS_TYPE_INVALID; ObValueChecker charset_checker(CHARSET_INVALID + 1, CHARSET_MAX - 1, OB_ERR_UNSUPPORTED_CHARACTER_SET); //param1 if (OB_FAIL(expr.args_[0]->eval(ctx, src_param))) { LOG_WARN("eval arg failed", K(ret)); } //param2 if (OB_SUCC(ret)) { ObString dst_character_set; ObDatum *dst_cs_type_param = NULL; if (OB_FAIL(expr.args_[1]->eval(ctx, dst_cs_type_param))) { LOG_WARN("eval arg failed", K(ret)); } else { dst_character_set = dst_cs_type_param->get_string(); dst_cs_type = ObCharset::get_default_collation_by_mode( ObCharset::charset_type_by_name_oracle(dst_character_set), lib::is_oracle_mode()); if (OB_FAIL(charset_checker.validate(ObCharset::charset_type_by_coll(dst_cs_type)))) { LOG_WARN("invalid charset value", K(ret), K(dst_character_set)); } } } //param3 if (OB_SUCC(ret)) { if (3 == expr.arg_cnt_) { ObString src_character_set; ObDatum *src_cs_type_param = NULL; if (OB_FAIL(expr.args_[2]->eval(ctx, src_cs_type_param))) { LOG_WARN("eval arg failed", K(ret)); } else { src_character_set = src_cs_type_param->get_string(); src_cs_type = ObCharset::get_default_collation_by_mode( ObCharset::charset_type_by_name_oracle(src_character_set), lib::is_oracle_mode()); if (OB_FAIL(charset_checker.validate(ObCharset::charset_type_by_coll(src_cs_type)))) { LOG_WARN("invalid charset value", K(ret), K(src_character_set)); } } } else { src_cs_type = expr.args_[0]->datum_meta_.cs_type_; } } //convert result if (OB_SUCC(ret)) { if (src_param->is_null()) { res_datum.set_null(); } else { ObString src = src_param->get_string(); ObString dst; char *res_buf = NULL; const int64_t res_buf_len = src.length() * ObCharset::MAX_MB_LEN; if (OB_ISNULL(res_buf = expr.get_str_res_mem(ctx, res_buf_len))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("allocate memory failed", K(ret), K(res_buf_len)); } else if (!ob_is_text_tc(expr.args_[0]->datum_meta_.type_)) { ObDataBuffer data_buf(res_buf, res_buf_len); if (OB_FAIL(ObCharset::charset_convert(data_buf, src, src_cs_type, dst_cs_type, dst, ObCharset::REPLACE_UNKNOWN_CHARACTER))) { LOG_WARN("fail to convert input string", K(src), K(src_cs_type), K(dst_cs_type), KPHEX(src.ptr(), src.length()), K(res_buf_len)); } else { if (dst.empty()) { res_datum.set_null(); } else { res_datum.set_string(dst); } } } else { // text tc ObEvalCtx::TempAllocGuard alloc_guard(ctx); ObIAllocator &calc_alloc = alloc_guard.get_allocator(); ObTextStringIter src_iter(expr.args_[0]->datum_meta_.type_, src_cs_type, src_param->get_string(), expr.args_[0]->obj_meta_.has_lob_header()); ObTextStringDatumResult output_result(expr.datum_meta_.type_, &expr, &ctx, &res_datum); int64_t src_byte_len = 0; int64_t buf_size = 0; ObCharsetType src_cs = ObCharset::charset_type_by_coll(src_cs_type); ObCharsetType dst_cs = ObCharset::charset_type_by_coll(dst_cs_type); if (ob_is_string_tc(expr.datum_meta_.type_) && (src.length() == 0 || src_cs == dst_cs || dst_cs == CHARSET_BINARY /** GB18030 and GB18030_2022 have the same code points, * but they have different mapping to unicode. * So, we do not do charset_convert for them in convert*/ || (src_cs == CHARSET_GB18030 && dst_cs == CHARSET_GB18030_2022) || (src_cs == CHARSET_GB18030_2022 && dst_cs == CHARSET_GB18030))) { dst = src; // no need convert } else if (OB_FAIL(src_iter.init(0, NULL, &calc_alloc))) { LOG_WARN("init src_iter failed ", K(ret), K(src_iter)); } else if (OB_FAIL(src_iter.get_byte_len(src_byte_len))) { LOG_WARN("get input byte len failed"); } else if (OB_FAIL(output_result.init(src_byte_len * ObCharset::MAX_MB_LEN))) { LOG_WARN("init stringtext result failed"); } else if (src_byte_len == 0) { output_result.set_result(); } else if (OB_FAIL(output_result.get_reserved_buffer(res_buf, buf_size))) { LOG_WARN("stringtext result reserve buffer failed"); } else { ObTextStringIterState state; ObString src_block_data; while (OB_SUCC(ret) && buf_size > 0 && (state = src_iter.get_next_block(src_block_data)) == TEXTSTRING_ITER_NEXT) { ObDataBuffer data_buf(res_buf, buf_size); if (OB_FAIL(ObCharset::charset_convert(data_buf, src_block_data, src_cs_type, dst_cs_type, dst, (ObCharset::REPLACE_UNKNOWN_CHARACTER | ObCharset::COPY_STRING_ON_SAME_CHARSET)))) { LOG_WARN("fail to convert input string", K(src_block_data), K(src_cs_type), K(dst_cs_type), KPHEX(src_block_data.ptr(), src_block_data.length()), K(buf_size)); } else if (OB_FAIL(output_result.lseek(dst.length(), 0))) { LOG_WARN("result lseek failed", K(ret)); } else { res_buf += dst.length(); buf_size -= dst.length(); } } if (OB_SUCC(ret)) { output_result.get_result_buffer(dst); } } if (OB_FAIL(ret)) { } else if (dst.empty()) { // initcap is only for oracle mode. set res be null when string length is 0. res_datum.set_null(); } else { res_datum.set_string(dst); } } } } return ret; } int ObExprConvertOracle::cg_expr(ObExprCGCtx &op_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const { int ret = OB_SUCCESS; UNUSED(op_cg_ctx); UNUSED(raw_expr); rt_expr.eval_func_ = calc_convert_oracle_expr; return ret; } } //namespace sql } //namespace oceanbase