/** * Copyright (c) 2021 OceanBase * OceanBase CE is licensed under Mulan PubL v2. * You can use this software according to the terms and conditions of the Mulan PubL v2. * You may obtain a copy of Mulan PubL v2 at: * http://license.coscl.org.cn/MulanPubL-2.0 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * See the Mulan PubL v2 for more details. */ #define USING_LOG_PREFIX SQL_ENG #include "sql/engine/expr/ob_expr_unistr.h" #include "sql/session/ob_sql_session_info.h" #include "sql/engine/expr/ob_expr_result_type_util.h" #include "lib/utility/ob_print_utils.h" namespace oceanbase { using namespace common; namespace sql { ObExprUnistr::ObExprUnistr(common::ObIAllocator &alloc) : ObStringExprOperator(alloc, T_FUN_UNISTR, N_UNISTR, 1) { } ObExprUnistr::~ObExprUnistr() { } int ObExprUnistr::calc_result_type1(ObExprResType &type, ObExprResType &type1, common::ObExprTypeCtx &type_ctx) const { int ret = OB_SUCCESS; ObLength length = 0; type.set_nvarchar2(); type.set_collation_type(type_ctx.get_session()->get_nls_collation_nation()); type.set_collation_level(CS_LEVEL_IMPLICIT); type.set_length_semantics(LS_CHAR); if (!type1.is_character_type()) { type1.set_calc_type(ObVarcharType); type1.set_calc_collation_type(type_ctx.get_session()->get_dtc_params().nls_collation_); type1.set_calc_length_semantics(LS_CHAR); } OZ (ObExprResultTypeUtil::deduce_max_string_length_oracle(type_ctx.get_session()->get_dtc_params(), type1, type, length)); type1.set_calc_length(length); type.set_length(length); return ret; } int calc_unistr(const ObString &src, const ObCollationType src_cs_type, const ObCollationType dst_cs_type, char* buf, const int64_t buf_len, int32_t &pos) { int ret = OB_SUCCESS; ObStringScanner scanner(src, src_cs_type); ObString encoding; int32_t wchar = 0; int32_t written_bytes = 0; while (OB_SUCC(ret) && scanner.next_character(encoding, wchar, ret)) { if ('\\' != wchar) { if (OB_FAIL(ObCharset::wc_mb(dst_cs_type, wchar, buf + pos, buf_len - pos, written_bytes))) { LOG_WARN("fail to convert unicode to multi-byte", K(ret), K(wchar)); } else { pos += written_bytes; } } else { int64_t unicode_encoding_value = 0; ObString encoding_inner; int32_t wchar_inner = 0; bool is_ucs2_format = true; for (int i = 0; OB_SUCC(ret) && i < 4; ++i) { if (!scanner.next_character(encoding_inner, wchar_inner, ret)) { if (OB_SUCC(ret)) { ret = OB_ERR_MUST_BE_FOLLOWED_BY_FOUR_HEXDECIMAL_CHARACTERS_OR_ANOTHER; } LOG_WARN("fail to get next character", K(ret)); } else if (0 == i && '\\' == wchar_inner) { //found "\\" if (OB_FAIL(ObCharset::wc_mb(dst_cs_type, wchar_inner, buf + pos, buf_len - pos, written_bytes))) { LOG_WARN("fail to convert unicode to multi-byte", K(ret), K(wchar_inner)); } else { pos += written_bytes; } is_ucs2_format = false; break; } else { int64_t value = 0; if ('0' <= wchar_inner && wchar_inner <= '9') { value = wchar_inner - '0'; } else if ('A' <= wchar_inner && wchar_inner <= 'F') { value = wchar_inner - 'A' + 10; } else if ('a' <= wchar_inner && wchar_inner <= 'f') { value = wchar_inner - 'a' + 10; } else { ret = OB_ERR_MUST_BE_FOLLOWED_BY_FOUR_HEXDECIMAL_CHARACTERS_OR_ANOTHER; LOG_WARN("fail to get next character", K(ret)); } if (OB_SUCC(ret)) { unicode_encoding_value *= 16; unicode_encoding_value += value; } } } //end for if (OB_SUCC(ret) && is_ucs2_format) { if (OB_UNLIKELY(pos + 2 > buf_len)) { ret = OB_SIZE_OVERFLOW; LOG_WARN("size overflow", K(ret)); } else { buf[pos++] = (unicode_encoding_value >> 8) & 0xFF; buf[pos++] = unicode_encoding_value & 0xFF; } } } } return ret; } int ObExprUnistr::calc_unistr_expr(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum) { int ret = OB_SUCCESS; ObDatum *src_param = NULL; if (expr.args_[0]->eval(ctx, src_param)) { LOG_WARN("eval arg failed", K(ret)); } else { if (src_param->is_null()) { res_datum.set_null(); } else { ObString src = src_param->get_string(); char *buf = NULL; int64_t buf_len = src.length() * ObCharset::MAX_MB_LEN; int32_t length = 0; if (OB_ISNULL(buf = static_cast(expr.get_str_res_mem(ctx, buf_len)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to allocate memory", K(ret), K(src)); } else if (OB_FAIL(calc_unistr(src, expr.args_[0]->datum_meta_.cs_type_, expr.datum_meta_.cs_type_, buf, buf_len, length))) { LOG_WARN("fail to calc unistr", K(ret)); } else { res_datum.set_string(buf, length); } } } return ret; } int ObExprUnistr::cg_expr(ObExprCGCtx &op_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const { int ret = OB_SUCCESS; UNUSED(op_cg_ctx); UNUSED(raw_expr); rt_expr.eval_func_ = calc_unistr_expr; return ret; } ObExprAsciistr::ObExprAsciistr(common::ObIAllocator &alloc) : ObStringExprOperator(alloc, T_FUN_ASCIISTR, N_ASCIISTR, 1) { } ObExprAsciistr::~ObExprAsciistr() { } int ObExprAsciistr::calc_result_type1(ObExprResType &type, ObExprResType &type1, common::ObExprTypeCtx &type_ctx) const { int ret = OB_SUCCESS; //deduce result type type.set_varchar(); type.set_collation_type(type_ctx.get_session()->get_nls_collation()); type.set_collation_level(CS_LEVEL_IMPLICIT); if (type1.is_character_type() && (type1.get_length_semantics() == LS_CHAR || type1.get_length_semantics() == LS_BYTE)) { type.set_length_semantics(type1.get_length_semantics()); } else { type.set_length_semantics(type_ctx.get_session()->get_actual_nls_length_semantics()); } //deduce calc type if (!type1.is_character_type()) { type1.set_calc_type(ObVarcharType); type1.set_calc_collation_type(type_ctx.get_session()->get_nls_collation()); } type1.set_calc_length_semantics(type.get_length_semantics()); //deduce length ObLength length = 0; ObExprResType temp_type; temp_type.set_meta(type1.get_calc_meta()); temp_type.set_length_semantics(type.get_length_semantics()); OZ (ObExprResultTypeUtil::deduce_max_string_length_oracle(type_ctx.get_session()->get_dtc_params(), type1, temp_type, length)); type1.set_calc_length(length); type.set_length(length * 10); return ret; } int calc_asciistr(const ObString &src, const ObCollationType src_cs_type, const ObCollationType dst_cs_type, char* buf, const int64_t buf_len, int32_t &pos) { int ret = OB_SUCCESS; ObStringScanner scanner(src, src_cs_type); ObString encoding; int32_t wchar = 0; while (OB_SUCC(ret) && scanner.next_character(encoding, wchar, ret)) { if (ob_isascii(wchar) && '\\' != wchar) { int32_t written_bytes = 0; if (OB_FAIL(ObCharset::wc_mb(dst_cs_type, wchar, buf + pos, buf_len - pos, written_bytes))) { LOG_WARN("fail to convert unicode to multi-byte", K(ret), K(wchar)); } else { pos += written_bytes; } } else { const int64_t temp_buf_len = 4; char temp_buf[temp_buf_len]; int32_t temp_written_bytes = 0; if (OB_FAIL(ObCharset::wc_mb(CS_TYPE_UTF16_BIN, wchar, temp_buf, temp_buf_len, temp_written_bytes))) { LOG_WARN("fail to convert unicode to multi-byte", K(ret), K(wchar)); } else { const int utf16_minmb_len = 2; if (OB_UNLIKELY(ObCharset::is_cs_nonascii(dst_cs_type))) { // not support non-ascii database charset for now ret = OB_NOT_SUPPORTED; LOG_USER_ERROR(OB_NOT_SUPPORTED, "charset except ascii"); LOG_WARN("not support charset", K(ret), K(dst_cs_type)); /* const int64_t hex_buf_len = temp_buf_len * 2; char hex_buf[hex_buf_len]; int32_t hex_written_bytes = 0; for (int i = 0; OB_SUCC(ret) && i < temp_written_bytes/utf16_minmb_len; ++i) { if (OB_FAIL(ObCharset::wc_mb(dst_cs_type, '\\', buf + pos, buf_len - pos, written_bytes))) { LOG_WARN("fail to convert unicode to multi-byte", K(ret), K(wchar)); } else { pos += written_bytes; } if (OB_SUCC(ret)) { if (OB_FAIL(hex_print(temp_buf, utf16_minmb_len, hex_buf, hex_buf_len, hex_written_bytes))) { LOG_WARN("fail to convert to hex", K(ret), K(temp_written_bytes), K(pos), K(buf_len)); } else if (OB_FAIL(ObCharset::charset_convert(CS_TYPE_UTF8MB4_BIN, hex_buf, hex_written_bytes, dst_cs_type, buf + pos, buf_len - pos, written_bytes))) { LOG_WARN("fail to convert charset", K(ret)); } else { pos += written_bytes; } } } */ } else { for (int i = 0; OB_SUCC(ret) && i < temp_written_bytes/utf16_minmb_len; ++i) { if (OB_UNLIKELY(pos >= buf_len)) { ret = OB_SIZE_OVERFLOW; LOG_WARN("size overflow", K(ret), K(pos), K(buf_len)); } else { buf[pos++] = '\\'; } if (OB_SUCC(ret)) { int64_t hex_writtern_bytes = 0; if (OB_FAIL(hex_print(temp_buf + i*utf16_minmb_len, utf16_minmb_len, buf + pos, buf_len - pos, hex_writtern_bytes))) { LOG_WARN("fail to convert to hex", K(ret), K(temp_written_bytes), K(pos), K(buf_len)); } else { pos += hex_writtern_bytes; } } } } } } } return ret; } int ObExprAsciistr::calc_asciistr_expr(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum) { int ret = OB_SUCCESS; ObDatum *src_param = NULL; if (expr.args_[0]->eval(ctx, src_param)) { LOG_WARN("eval arg failed", K(ret)); } else { if (src_param->is_null()) { res_datum.set_null(); } else { ObString src = src_param->get_string(); char *buf = NULL; int64_t buf_len = src.length() * ObCharset::MAX_MB_LEN * 2; int32_t length = 0; if (OB_ISNULL(buf = static_cast(expr.get_str_res_mem(ctx, buf_len)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to allocate memory", K(ret), K(src)); } else if (OB_FAIL(calc_asciistr(src, expr.args_[0]->datum_meta_.cs_type_, expr.datum_meta_.cs_type_, buf, buf_len, length))) { LOG_WARN("fail to calc unistr", K(ret)); } else { res_datum.set_string(buf, length); } } } return ret; } int ObExprAsciistr::cg_expr(ObExprCGCtx &op_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const { int ret = OB_SUCCESS; UNUSED(op_cg_ctx); UNUSED(raw_expr); rt_expr.eval_func_ = calc_asciistr_expr; return ret; } } }