patch 4.0

This commit is contained in:
wangzelin.wzl
2022-10-24 10:34:53 +08:00
parent 4ad6e00ec3
commit 93a1074b0c
10533 changed files with 2588271 additions and 2299373 deletions

View File

@ -13,36 +13,35 @@
#define USING_LOG_PREFIX SQL_ENG
#include "sql/engine/expr/ob_expr_translate.h"
#include "sql/engine/expr/ob_expr_util.h"
#include "sql/parser/ob_item_type.h"
#include "objit/common/ob_item_type.h"
#include "lib/oblog/ob_log.h"
#include "sql/session/ob_sql_session_info.h"
#include "sql/engine/expr/ob_expr_oracle_to_char.h"
#include "sql/engine/expr/ob_expr_result_type_util.h"
namespace oceanbase {
namespace oceanbase
{
using namespace common;
namespace sql {
namespace sql
{
const int64_t MAX_NUMBER_BUFFER_SIZE = 200;
const int64_t MAX_DATE_BUFFER_SIZE = 95;
const int64_t MAX_TIMESTAMP_BUFFER_SIZE = 200;
const int64_t MAX_CLOB_BUFFER_SIZE = 4000;
const int64_t MAX_INTERVAL_BUFFER_SIZE = 35;
const int64_t VARCHAR_SIZE_TIMES = 5;
const int64_t RAW_SIZE_TIMES = 10;
ObExprTranslate::ObExprTranslate(ObIAllocator& alloc)
ObExprTranslate::ObExprTranslate(ObIAllocator &alloc)
: ObStringExprOperator(alloc, T_FUN_SYS_TRANSLATE, N_TRANSLATE, MORE_THAN_ONE)
{}
{
}
ObExprTranslate::~ObExprTranslate()
{}
{
}
int ObExprTranslate::calc_result_typeN(
ObExprResType& type, ObExprResType* types, int64_t param_num, ObExprTypeCtx& type_ctx) const
int ObExprTranslate::calc_result_typeN(ObExprResType &type,
ObExprResType *types,
int64_t param_num,
ObExprTypeCtx &type_ctx) const
{
int ret = OB_SUCCESS;
int64_t mbmaxlen = 0;
const ObBasicSessionInfo* session = type_ctx.get_session();
const ObBasicSessionInfo *session = type_ctx.get_session();
CK(OB_NOT_NULL(type_ctx.get_session()));
if (OB_FAIL(ret)) {
} else if (2 == param_num) { // translate(c1 using char_cs/nchar_cs)
@ -55,9 +54,21 @@ int ObExprTranslate::calc_result_typeN(
} else {
ObSessionNLSParams nls_param = type_ctx.get_session()->get_session_nls_params();
bool is_char_cs = 0 == char_or_nchar_cs;
auto params = make_const_carray(&types[0]);
ObLength result_len = 0;
if (is_char_cs) {
type.set_type(ObVarcharType);
type.set_length_semantics(nls_param.nls_length_semantics_);
if (types[0].is_literal()) {
type.set_length_semantics(nls_param.nls_length_semantics_);
} else if (types[0].is_string_or_lob_locator_type()) {
if (types[0].is_clob() || types[0].is_clob_locator()) {
type.set_length_semantics(LS_BYTE);
} else {
type.set_length_semantics(types[0].get_length_semantics());
}
} else {
type.set_length_semantics(nls_param.nls_length_semantics_);
}
type.set_collation_level(CS_LEVEL_IMPLICIT);
type.set_collation_type(nls_param.nls_collation_);
} else {
@ -66,170 +77,96 @@ int ObExprTranslate::calc_result_typeN(
type.set_collation_level(CS_LEVEL_IMPLICIT);
type.set_collation_type(nls_param.nls_nation_collation_);
}
types[0].set_calc_type(type.get_type());
types[0].set_calc_collation_type(type.get_collation_type());
const ObObjTypeClass type_class = types[0].get_type_class();
if (ObStringTC == type_class || ObRawTC == type_class) {
if (OB_FAIL(ObExprToCharCommon::calc_result_length_for_string_param(type, types[0]))) {
LOG_WARN("calc result length for string param failed", K(ret));
}
} else {
int64_t type_length = 0;
switch (type_class) {
case ObNullTC: {
break;
}
case ObIntTC:
case ObUIntTC:
case ObFloatTC:
case ObDoubleTC:
case ObNumberTC: {
type_length = MAX_NUMBER_BUFFER_SIZE;
break;
}
case ObDateTC:
case ObDateTimeTC: {
type_length = MAX_DATE_BUFFER_SIZE;
break;
}
case ObOTimestampTC: {
type_length = MAX_TIMESTAMP_BUFFER_SIZE;
break;
}
case ObIntervalTC: {
type_length = MAX_INTERVAL_BUFFER_SIZE;
break;
}
case ObTextTC:
case ObLobTC: {
if (types[0].is_blob() || types[0].is_blob_locator()) {
ret = OB_NOT_SUPPORTED;
LOG_USER_ERROR(OB_NOT_SUPPORTED, "translate blob using char_cs/nchar_cs");
} else {
type_length = MAX_CLOB_BUFFER_SIZE;
}
break;
}
default: {
ret = OB_NOT_SUPPORTED;
LOG_WARN("invalid argument type for translate", K(types[0]));
}
}
if (OB_SUCC(ret)) {
if (!is_char_cs) {
int64_t mbminlen = 1;
if (OB_FAIL(ObCharset::get_mbminlen_by_coll(type.get_collation_type(), mbminlen))) {
LOG_WARN("get mbmaxlen by coll failed", K(ret), K(type));
} else {
type_length = (type_length + mbminlen - 1) / mbminlen;
}
}
type.set_length(type_length);
}
}
OZ(deduce_string_param_calc_type_and_charset(*session, type, params, LS_BYTE));
// the following code does not make sense, only to make result length identical with oracle,
// but it make the calc_result_type not reentrant
// if (OB_SUCC(ret)) {
// if (type.is_nvarchar2() && types[0].is_varchar_or_char()) {
// result_len = types[0].get_calc_length();
// } else {
// result_len = MIN(types[0].get_calc_length() * ObCharset::MAX_MB_LEN,
// OB_MAX_ORACLE_VARCHAR_LENGTH);
// }
// if (type.is_nvarchar2() && !ob_is_string_tc(types[0].get_type())) {
// const ObCharsetInfo *cs = ObCharset::get_charset(type.get_collation_type());
// result_len = result_len / cs->mbminlen;
// }
// type.set_length(result_len);
// }
OX(type.set_length(types[0].get_calc_length() * ObCharset::MAX_MB_LEN));
}
} else if (OB_LIKELY(3 == param_num)) { // translate(c1, c2, c3)
ObExprResType& ori_str_type = types[0];
ObExprResType& from_str_type = types[1];
ObExprResType& to_str_type = types[2];
} else if (OB_LIKELY(3 == param_num)) { //translate(c1, c2, c3)
// 这里只使用第一个参数推导结果类型,实验了下Oracle,结果类型是跟第一个参数挂钩的
ObExprResType &ori_str_type = types[0];
ObExprResType &from_str_type = types[1];
ObExprResType &to_str_type = types[2];
ObSEArray<ObExprResType*, 3, ObNullAllocator> params;
OZ(params.push_back(&ori_str_type));
OZ(aggregate_string_type_and_charset_oracle(*session, params, type, true));
OZ(aggregate_string_type_and_charset_oracle(*session, params, type, PREFER_VAR_LEN_CHAR));
if (OB_SUCC(ret) && type.is_clob()) {
type.set_type(ObVarcharType);
type.set_length_semantics(LS_BYTE);
}
// 使用第一个参数推导结果的类型后,再使用结果类型推导剩余所有参数的类型
OZ(params.push_back(&from_str_type));
OZ(params.push_back(&to_str_type));
OZ(deduce_string_param_calc_type_and_charset(*session, type, params));
CK((ori_str_type.get_calc_meta() == to_str_type.get_calc_meta()));
CK((ori_str_type.get_calc_meta() == from_str_type.get_calc_meta()));
// deduce result length
if (OB_SUCC(ret)) {
if (type.is_nstring() || LS_CHAR == type.get_length_semantics()) {
type.set_length(ori_str_type.get_length());
} else if (type.is_varchar_or_char()) {
if (OB_FAIL(ObCharset::get_mbmaxlen_by_coll(type.get_collation_type(), mbmaxlen))) {
LOG_WARN("get_mbmaxlen_by_coll failed", K(type.get_collation_type()), K(ret));
} else if (type.is_varchar()) {
int64_t len = mbmaxlen * ori_str_type.get_length();
type.set_length(len > OB_MAX_ORACLE_VARCHAR_LENGTH ? OB_MAX_ORACLE_VARCHAR_LENGTH : len);
} else if (type.is_char()) {
int64_t len = mbmaxlen * ori_str_type.get_length();
type.set_length(len > OB_MAX_ORACLE_CHAR_LENGTH_BYTE ? OB_MAX_ORACLE_CHAR_LENGTH_BYTE : len);
}
} else {
// do nothing
ObLength result_len = types[0].get_calc_length();
OZ(ObExprResultTypeUtil::deduce_max_string_length_oracle(session->get_dtc_params(),
types[0], type, result_len, LS_CHAR));
if (OB_SUCC(ret) && LS_BYTE == type.get_length_semantics()) {
OZ(ObCharset::get_mbmaxlen_by_coll(type.get_collation_type(), mbmaxlen));
OX(result_len *= mbmaxlen);
}
OX(type.set_length(result_len));
}
} else {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument number", K(ret), K(param_num));
}
LOG_DEBUG("check type info", K(type.get_collation_type()), K(type.get_length()), K(type.get_length_semantics()));
LOG_DEBUG("check type info", K(type.get_collation_type()), K(type.get_length()),
K(type.get_length_semantics()));
return ret;
}
int ObExprTranslate::calc_resultN(
ObObj& result, const common::ObObj* objs_stack, int64_t param_num, ObExprCtx& expr_ctx) const
{
int ret = OB_SUCCESS;
if (2 == param_num) {
const ObObj& param = objs_stack[0];
result = param;
} else if (OB_LIKELY(3 == param_num)) {
const ObObj& ori_obj = objs_stack[0];
const ObObj& from_obj = objs_stack[1];
const ObObj& to_obj = objs_stack[2];
if (ori_obj.is_null_oracle() || to_obj.is_null_oracle() || from_obj.is_null_oracle()) {
result.set_null();
} else {
ObString ori_str = ori_obj.get_string();
ObString from_str = from_obj.get_string();
ObString to_str = to_obj.get_string();
ObString res_str;
bool is_null = false;
ObCollationType cs_type = result_type_.get_collation_type();
if (OB_FAIL(translate_hashmap(
ori_str, from_str, to_str, cs_type, *expr_ctx.calc_buf_, *expr_ctx.calc_buf_, res_str, is_null))) {
LOG_WARN("translate_hashmap failed", K(ret), K(ori_str), K(from_str), K(to_str));
} else if (is_null || 0 == res_str.length()) {
result.set_null();
} else {
result.set_string(result_type_.get_type(), res_str);
result.set_collation_type(cs_type);
}
}
} else {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("expect 2 or 3 params", K(ret), K(param_num));
}
return ret;
}
// Use hashmap to store each character of from_str and to_str, key is the character in from_str, and val is the
// character in to_str Loop through each character of ori_str, find the key from the map, and replace it with val if it
// exists Support multi-byte characters
int ObExprTranslate::translate_hashmap(const ObString& ori_str, const ObString& from_str, const ObString& to_str,
ObCollationType cs_type, ObIAllocator& tmp_alloc, ObIAllocator& res_alloc, ObString& res_str, bool& is_null)
// 使用hashmap存储from_str和to_str的每个字符, key是from_str中的字符, val是to_str的字符
// 循环ori_str的每个字符,从map中找key,如果存在,则替换为val
// 支持多字节字符
int ObExprTranslate::translate_hashmap(const ObString &ori_str,
const ObString &from_str,
const ObString &to_str,
ObCollationType cs_type,
ObIAllocator &tmp_alloc,
ObIAllocator &res_alloc,
ObString &res_str,
bool &is_null)
{
int ret = OB_SUCCESS;
is_null = false;
res_str.reset();
ObIAllocator* allocator = &tmp_alloc;
ObIAllocator *allocator = &tmp_alloc;
if (OB_ISNULL(ori_str.ptr()) || OB_ISNULL(from_str.ptr()) || OB_ISNULL(to_str.ptr())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("string pointer is null", KP(ori_str.ptr()), KP(from_str.ptr()), KP(to_str.ptr()));
} else if ((0 == ori_str.length()) || (0 == from_str.length()) || (0 == to_str.length())) {
is_null = true;
} else {
size_t ori_str_len_in_char = ObCharset::strlen_char(cs_type, ori_str.ptr(), ori_str.length());
size_t ori_str_len_in_char = ObCharset::strlen_char(cs_type, ori_str.ptr(), ori_str.length());
size_t from_str_len_in_char = ObCharset::strlen_char(cs_type, from_str.ptr(), from_str.length());
size_t to_str_len_in_char = ObCharset::strlen_char(cs_type, to_str.ptr(), to_str.length());
ObFixedArray<size_t, ObIAllocator> ori_str_byte_num(allocator, ori_str_len_in_char);
size_t to_str_len_in_char = ObCharset::strlen_char(cs_type, to_str.ptr(), to_str.length());
// 记录ori_str/from_str/to_str中每个字符的字节长度
ObFixedArray<size_t, ObIAllocator> ori_str_byte_num(allocator, ori_str_len_in_char);
ObFixedArray<size_t, ObIAllocator> from_str_byte_num(allocator, from_str_len_in_char);
ObFixedArray<size_t, ObIAllocator> to_str_byte_num(allocator, to_str_len_in_char);
ObFixedArray<size_t, ObIAllocator> ori_str_byte_offset(allocator, ori_str_len_in_char + 1);
ObFixedArray<size_t, ObIAllocator> from_str_byte_offset(allocator, from_str_len_in_char + 1);
ObFixedArray<size_t, ObIAllocator> to_str_byte_offset(allocator, to_str_len_in_char + 1);
char* ret_buf = NULL;
ObFixedArray<size_t, ObIAllocator> to_str_byte_num(allocator, to_str_len_in_char);
ObFixedArray<size_t, ObIAllocator> ori_str_byte_offset(allocator, ori_str_len_in_char+1);
ObFixedArray<size_t, ObIAllocator> from_str_byte_offset(allocator, from_str_len_in_char+1);
ObFixedArray<size_t, ObIAllocator> to_str_byte_offset(allocator, to_str_len_in_char+1);
char *ret_buf = NULL;
size_t ret_buf_size = 0;
int64_t mbmaxlen = 0;
if (OB_FAIL(ObCharset::get_mbmaxlen_by_coll(cs_type, mbmaxlen))) {
@ -241,22 +178,25 @@ int ObExprTranslate::translate_hashmap(const ObString& ori_str, const ObString&
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("alloc memory failed", K(ori_str.length()));
} else {
if (OB_FAIL(ObExprUtil::get_mb_str_info(ori_str, cs_type, ori_str_byte_num, ori_str_byte_offset)) ||
OB_FAIL(ObExprUtil::get_mb_str_info(from_str, cs_type, from_str_byte_num, from_str_byte_offset)) ||
OB_FAIL(ObExprUtil::get_mb_str_info(to_str, cs_type, to_str_byte_num, to_str_byte_offset))) {
if (OB_FAIL(ObExprUtil::get_mb_str_info(ori_str, cs_type,
ori_str_byte_num, ori_str_byte_offset)) ||
OB_FAIL(ObExprUtil::get_mb_str_info(from_str, cs_type,
from_str_byte_num, from_str_byte_offset)) ||
OB_FAIL(ObExprUtil::get_mb_str_info(to_str, cs_type,
to_str_byte_num, to_str_byte_offset))) {
LOG_WARN("get_mb_str_info fail", K(cs_type), K(ori_str), K(from_str), K(to_str));
} else {
StringHashMap from_to_str_map;
// TODO: PostgreSQL的实现是使用两个循环,外层循环迭代ori_str,内层循环迭代from_str。比较简单
// 但是在字符串比较长时,没有这里使用hash的方法好。但是使用hash的方式需要付出空间和构建hash的时间的代价
// 后续可以分析在什么时候用那种方法比较好
// TODO: ori_str是列,from_str以及to_str是常量时,哈希表只需要构建一次,不需要每次都进行哈希表的构建
if (OB_FAIL(from_to_str_map.create(from_str_byte_num.count(), ObModIds::OB_SQL_EXPR))) {
LOG_WARN("from_to_str_map init failed", K(ret));
} else if (OB_FAIL(insert_map(
from_str, to_str, *(allocator), from_str_byte_num, to_str_byte_num, from_to_str_map))) {
LOG_WARN("construct StringHashMap failed",
K(ret),
K(from_str),
K(to_str),
K(from_str_byte_num),
K(to_str_byte_num));
} else if (OB_FAIL(insert_map(from_str, to_str, *(allocator),
from_str_byte_num, to_str_byte_num, from_to_str_map))) {
LOG_WARN("construct StringHashMap failed", K(ret), K(from_str), K(to_str),
K(from_str_byte_num), K(to_str_byte_num));
} else {
ObString key_iter_str;
ObString val_iter_str;
@ -277,7 +217,7 @@ int ObExprTranslate::translate_hashmap(const ObString& ori_str, const ObString&
} else if (OB_FAIL(ret)) {
LOG_WARN("get item from hash map failed", K(ret), K(key_iter_str));
}
} // for
} // for
res_str.assign_ptr(ret_buf, ret_buf_byte_pos);
}
}
@ -287,9 +227,10 @@ int ObExprTranslate::translate_hashmap(const ObString& ori_str, const ObString&
return ret;
}
int ObExprTranslate::insert_map(const ObString& key_str, const ObString& val_str, ObIAllocator& allocator,
const ObFixedArray<size_t, ObIAllocator>& key_str_byte_num,
const ObFixedArray<size_t, ObIAllocator>& val_str_byte_num, StringHashMap& ret_map)
int ObExprTranslate::insert_map(const ObString &key_str, const ObString &val_str, ObIAllocator &allocator,
const ObFixedArray<size_t, ObIAllocator> &key_str_byte_num,
const ObFixedArray<size_t, ObIAllocator> &val_str_byte_num,
StringHashMap &ret_map)
{
int ret = OB_SUCCESS;
ret_map.clear();
@ -299,7 +240,7 @@ int ObExprTranslate::insert_map(const ObString& key_str, const ObString& val_str
ret = OB_INVALID_ARGUMENT;
LOG_WARN("input_str.ptr() is NULL", K(ret), KP(key_str.ptr()), KP(val_str.ptr()));
} else {
char* buf = NULL;
char *buf = NULL;
size_t buf_len = 0;
ObString key_iter_str;
ObString val_iter_str;
@ -357,14 +298,15 @@ int ObExprTranslate::insert_map(const ObString& key_str, const ObString& val_str
return ret;
}
int calc_translate_expr(const ObExpr& expr, ObEvalCtx& ctx, ObDatum& res_datum)
int calc_translate_expr(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum)
{
int ret = OB_SUCCESS;
// translate(ori_str, from_str, to_str);
ObDatum* ori_datum = NULL;
ObDatum* from_datum = NULL;
ObDatum* to_datum = NULL;
if (OB_FAIL(expr.args_[0]->eval(ctx, ori_datum)) || OB_FAIL(expr.args_[1]->eval(ctx, from_datum)) ||
ObDatum *ori_datum = NULL;
ObDatum *from_datum = NULL;
ObDatum *to_datum = NULL;
if (OB_FAIL(expr.args_[0]->eval(ctx, ori_datum)) ||
OB_FAIL(expr.args_[1]->eval(ctx, from_datum)) ||
OB_FAIL(expr.args_[2]->eval(ctx, to_datum))) {
LOG_WARN("eval arg failed", K(ret), KP(ori_datum), KP(from_datum), KP(to_datum), K(expr));
} else if (ori_datum->is_null() || from_datum->is_null() || to_datum->is_null()) {
@ -377,11 +319,12 @@ int calc_translate_expr(const ObExpr& expr, ObEvalCtx& ctx, ObDatum& res_datum)
bool is_null = false;
ObExprStrResAlloc res_alloc(expr, ctx);
ObCollationType cs_type = expr.datum_meta_.cs_type_;
if (OB_FAIL(ObExprTranslate::translate_hashmap(
ori_str, from_str, to_str, cs_type, ctx.get_reset_tmp_alloc(), res_alloc, res_str, is_null))) {
ObEvalCtx::TempAllocGuard alloc_guard(ctx);
if (OB_FAIL(ObExprTranslate::translate_hashmap(ori_str, from_str, to_str, cs_type,
alloc_guard.get_allocator(), res_alloc, res_str, is_null))) {
LOG_WARN("translate_hashmap failed", K(ret), K(ori_str), K(from_str), K(to_str));
} else if (is_null || 0 == res_str.length()) {
// must set to null in oracle mode if length is 0
// oracle模式下结果长度为0,需要把结果设置为null
res_datum.set_null();
} else {
res_datum.set_string(res_str);
@ -390,10 +333,10 @@ int calc_translate_expr(const ObExpr& expr, ObEvalCtx& ctx, ObDatum& res_datum)
return ret;
}
int calc_translate_using_expr(const ObExpr& expr, ObEvalCtx& ctx, ObDatum& res_datum)
int calc_translate_using_expr(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum)
{
int ret = OB_SUCCESS;
ObDatum* ori_datum = NULL;
ObDatum *ori_datum = NULL;
if (OB_FAIL(expr.args_[0]->eval(ctx, ori_datum))) {
LOG_WARN("eval arg failed", K(ret), KP(ori_datum), K(expr));
} else if (ori_datum->is_null()) {
@ -404,7 +347,8 @@ int calc_translate_using_expr(const ObExpr& expr, ObEvalCtx& ctx, ObDatum& res_d
return ret;
}
int ObExprTranslate::cg_expr(ObExprCGCtx& expr_cg_ctx, const ObRawExpr& raw_expr, ObExpr& rt_expr) const
int ObExprTranslate::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr,
ObExpr &rt_expr) const
{
int ret = OB_SUCCESS;
UNUSED(expr_cg_ctx);
@ -422,5 +366,5 @@ int ObExprTranslate::cg_expr(ObExprCGCtx& expr_cg_ctx, const ObRawExpr& raw_expr
return ret;
}
} // namespace sql
} // namespace oceanbase
} // namespace sql
} // namespace oceanbase