[FEAT MERGE] Lob SQL refactoring (Mem-LobLocator, expressions and dbms_lob adaptions)

Co-authored-by: chaser-ch <chaser.ch@antgroup.com>
This commit is contained in:
obdev
2023-01-28 20:40:15 +08:00
committed by ob-robot
parent 4bb1033505
commit 3d4f554258
350 changed files with 19091 additions and 3918 deletions

View File

@ -18,6 +18,7 @@
#include "sql/resolver/expr/ob_raw_expr_util.h"
#include "sql/engine/expr/ob_expr_type_to_str.h"
#include "sql/engine/px/ob_dfo.h"
#include "sql/engine/expr/ob_expr_lob_utils.h"
namespace oceanbase
{
@ -344,6 +345,72 @@ int ObExprValuesOp::get_real_batch_obj_type(ObDatumMeta &src_meta,
return ret;
}
int ObExprValuesOp::eval_values_op_dynamic_cast_to_lob(ObExpr &real_src_expr,
ObObjMeta &src_obj_meta,
ObExpr *dst_expr)
{
int ret = OB_SUCCESS;
ObDatum *datum = NULL;
// large string types to temp lob needs lots of memory,
// for example char type from send long/piece data which is 40M, cast to longtext
// 1. char to longtext 40M (only used to add lob header if cs type is the same)
// 2. deep copy use another 40M
// if cast only used to build temp lob header, memory allocation in step 1 can be avoid.
bool string_to_lob_withsame_cs_type = false;
if (ob_is_string_tc(src_obj_meta.get_type())
&& ob_is_text_tc(dst_expr->obj_meta_.get_type())
&& (src_obj_meta.get_charset_type() == dst_expr->obj_meta_.get_charset_type())) {
string_to_lob_withsame_cs_type = true;
}
ObDatum &dst_datum = dst_expr->locate_datum_for_write(eval_ctx_);
if (!string_to_lob_withsame_cs_type) {
if (OB_FAIL(datum_caster_.to_type(dst_expr->datum_meta_, real_src_expr,
cm_, datum))) {
LOG_WARN("fail to dynamic cast", K(dst_expr->datum_meta_),
K(real_src_expr), K(cm_), K(ret));
} else if (lib::is_oracle_mode() && dst_expr->datum_meta_.type_ == common::ObLongTextType) {
if (ob_is_text_tc(real_src_expr.datum_meta_.type_) && dst_expr->obj_meta_.has_lob_header()) {
if (datum->get_string().ptr() == NULL || datum->get_string().length() == 0) {
datum->set_null(); // compat 4.0, empty text to ObLobType, result is NULL
}
}
}
if (OB_SUCC(ret)) {
ObExprStrResAlloc res_alloc(*dst_expr, eval_ctx_);
// need adjust lob header, since lob to lob may not handle headers
if (is_lob_storage(src_obj_meta.get_type()) &&
OB_FAIL(ob_adjust_lob_datum(*datum,
src_obj_meta,
dst_expr->obj_meta_,
eval_ctx_.exec_ctx_.get_eval_tmp_allocator()))) {
LOG_WARN("adjust lob datum failed",
K(ret), K(*datum), K(src_obj_meta), K(dst_expr->obj_meta_));
} else if (OB_FAIL(dst_datum.deep_copy(*datum, res_alloc))) {
LOG_WARN("fail to deep copy datum from cast res datum", K(ret), K(*datum));
}
}
} else {
ObDatum *src_datum;
if (OB_FAIL(real_src_expr.eval(eval_ctx_, src_datum))) {
LOG_WARN("fail to eval src", K(real_src_expr), K(cm_), K(ret));
} else if (src_datum->get_string().empty()
&& lib::is_oracle_mode()
&& dst_expr->datum_meta_.type_ == common::ObLongTextType) {
dst_datum.set_null();
} else {
ObString src_string = src_datum->get_string();
ObTextStringDatumResult lob_result(dst_expr->obj_meta_.get_type(),
dst_expr, &eval_ctx_, &dst_datum);
if (OB_FAIL(lob_result.init(src_string.length()))) {
} else if (OB_FAIL(lob_result.append(src_string))) {
} else {
lob_result.set_result();
}
}
}
return ret;
}
OB_INLINE int ObExprValuesOp::calc_next_row()
{
int ret = OB_SUCCESS;
@ -432,7 +499,8 @@ OB_INLINE int ObExprValuesOp::calc_next_row()
// T_QUESTIONMARK的表达式, 该表达式是没有reserve内存的,因此会导致ptr指向非预期
// 内存, 可能出现结果不对
} else if (src_meta.type_ == dst_expr->datum_meta_.type_
&& src_meta.cs_type_ == dst_expr->datum_meta_.cs_type_) {
&& src_meta.cs_type_ == dst_expr->datum_meta_.cs_type_
&& src_obj_meta.has_lob_header() == dst_expr->obj_meta_.has_lob_header()) {
// 将values中数据copy到output中
if (OB_FAIL(src_expr->eval(eval_ctx_, datum))) {
// catch err and print log later
@ -477,7 +545,7 @@ OB_INLINE int ObExprValuesOp::calc_next_row()
LOG_WARN("fail to do to_type", K(ret), K(*dst_expr), K(real_src_expr));
}
}
} else {
} else if (!dst_expr->obj_meta_.is_lob_storage()) {
if (OB_FAIL(datum_caster_.to_type(dst_expr->datum_meta_, real_src_expr,
cm_, datum))) {
LOG_WARN("fail to dynamic cast", K(dst_expr->datum_meta_),
@ -487,9 +555,16 @@ OB_INLINE int ObExprValuesOp::calc_next_row()
LOG_USER_WARN(OB_ERR_CANT_CREATE_GEOMETRY_OBJECT);
}
}
} else { // dst type is lob
if (OB_FAIL(eval_values_op_dynamic_cast_to_lob(real_src_expr, src_obj_meta, dst_expr))) {
LOG_WARN("fail to dynamic cast to lob types", K(dst_expr->datum_meta_),
K(real_src_expr), K(cm_), K(ret));
} else {
dst_expr->set_evaluated_projected(eval_ctx_);
}
}
if (OB_SUCC(ret)) {
if (OB_SUCC(ret) && !dst_expr->obj_meta_.is_lob_storage()) {
ObDatum &dst_datum = dst_expr->locate_datum_for_write(eval_ctx_);
if (ObObjDatumMapType::OBJ_DATUM_STRING == dst_expr->obj_datum_map_) {
ObExprStrResAlloc res_alloc(*dst_expr, eval_ctx_);

View File

@ -78,6 +78,9 @@ private:
ObObjMeta &src_obj_meta,
ObExpr *src_expr,
int64_t group_idx);
int eval_values_op_dynamic_cast_to_lob(ObExpr &real_src_expr,
ObObjMeta &src_obj_meta,
ObExpr *dst_expr);
private:
DISALLOW_COPY_AND_ASSIGN(ObExprValuesOp);

View File

@ -21,6 +21,7 @@
#include "sql/engine/ob_exec_context.h"
#include "pl/ob_pl_user_type.h"
#include "sql/engine/expr/ob_expr.h"
#include "sql/engine/expr/ob_expr_lob_utils.h"
namespace oceanbase
@ -185,9 +186,14 @@ int ObFunctionTableOp::inner_get_next_row()
MY_SPEC.column_exprs_.at(i)->locate_datum_for_write(eval_ctx_).set_null();
} else {
const ObObjDatumMapType &datum_map = MY_SPEC.column_exprs_.at(i)->obj_datum_map_;
if (OB_FAIL(MY_SPEC.column_exprs_.at(i)->locate_datum_for_write(eval_ctx_)
.from_obj(obj_stack[i], datum_map))) {
ObExpr * const &expr = MY_SPEC.column_exprs_.at(i);
ObDatum &datum = expr->locate_datum_for_write(eval_ctx_);
if (OB_FAIL(datum.from_obj(obj_stack[i], datum_map))) {
LOG_WARN("failed to convert datum", K(ret));
} else if (is_lob_storage(obj_stack[i].get_type()) &&
OB_FAIL(ob_adjust_lob_datum(obj_stack[i], expr->obj_meta_, datum_map,
get_exec_ctx().get_allocator(), datum))) {
LOG_WARN("adjust lob datum failed", K(ret), K(obj_stack[i].get_meta()), K(expr->obj_meta_));
}
}
if (OB_SUCC(ret)) {

View File

@ -934,13 +934,16 @@ int JtFuncHelpler::cast_to_res(JtScanCtx* ctx, ObIJsonBase* js_val, JtColNode& c
default: {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected dst_type", K(dst_type));
int tmp_ret = set_error_val(ctx, col_node, ret);
if (tmp_ret != OB_SUCCESS) {
LOG_WARN("failed to set error val.", K(tmp_ret));
}
break;
}
}
if (OB_SUCC(ret) && is_lob_storage(dst_type)) {
ObString val = res.get_string();
if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(*expr, *ctx->eval_ctx_, res, val, &ctx->row_alloc_))) {
LOG_WARN("fail to pack res result.", K(ret));
}
}
LOG_DEBUG("finish cast_to_res.", K(ret), K(dst_type));
return ret;
@ -982,7 +985,8 @@ int JtFuncHelpler::set_error_val(JtScanCtx* ctx, JtColNode& col_node, int& ret)
default_expr->datum_meta_.type_,
&ctx->row_alloc_,
default_expr->datum_meta_.cs_type_,
col_node.err_val_, false))) {
col_node.err_val_, false,
default_expr->obj_meta_.has_lob_header()))) {
LOG_WARN("failed: parse value to jsonBase", K(tmp_ret));
}
} else if (OB_SUCCESS != (tmp_ret = ObJsonExprHelper::transform_scalar_2jsonBase(datum,
@ -1420,7 +1424,8 @@ int JtColNode::set_val_on_empty(JtScanCtx* ctx, bool& need_cast_res)
default_expr->datum_meta_.type_,
&ctx->row_alloc_,
default_expr->datum_meta_.cs_type_,
err_val_, false))) {
err_val_, false,
default_expr->obj_meta_.has_lob_header()))) {
LOG_WARN("failed: parse value to jsonBase", K(ret));
} else {
curr_ = iter_ = err_val_;
@ -1463,7 +1468,8 @@ int JtColNode::set_val_on_empty(JtScanCtx* ctx, bool& need_cast_res)
default_expr->datum_meta_.type_,
&ctx->row_alloc_,
default_expr->datum_meta_.cs_type_,
emp_val_, false))) {
emp_val_, false,
default_expr->obj_meta_.has_lob_header()))) {
LOG_WARN("failed: parse value to jsonBase", K(ret));
}
} else if (OB_FAIL(ObJsonExprHelper::transform_scalar_2jsonBase(datum,
@ -2650,38 +2656,41 @@ int ObJsonTableOp::inner_get_next_row()
clear_evaluated_flag();
reset_columns();
if (OB_FAIL(jt_root_->get_next_row(in_, &jt_ctx_, is_root_null))) {
LOG_WARN("failed to open get next row.", K(ret));
if (ret != OB_ITER_END) {
LOG_WARN("failed to open get next row.", K(ret));
}
}
} else {
clear_evaluated_flag();
ObDatum *value = nullptr;
if (OB_FAIL(MY_SPEC.value_expr_->eval(eval_ctx_, value))) {
LOG_WARN("failed to eval value expr", K(ret));
} else if (value->is_null()) {
common::ObObjMeta& doc_obj_datum = MY_SPEC.value_expr_->obj_meta_;
ObDatumMeta& doc_datum = MY_SPEC.value_expr_->datum_meta_;
ObObjType doc_type = doc_datum.type_;
ObCollationType doc_cs_type = doc_datum.cs_type_;
ObString j_str;
bool is_null = false;
if (doc_type == ObNullType) {
ret = OB_ITER_END;
} else if (doc_type == ObNCharType || !(doc_type == ObJsonType || ob_is_string_type(doc_type))) {
ret = OB_ERR_INPUT_JSON_TABLE;
LOG_WARN("fail to get json base", K(ret), K(doc_type));
} else {
reset_columns();
ObObjType doc_type = MY_SPEC.value_expr_->datum_meta_.type_;
ObCollationType doc_cs_type = MY_SPEC.value_expr_->datum_meta_.cs_type_;
ObString j_str;
if (doc_type == ObLobType) {
const ObLobLocator& lob_locator = value->get_lob_locator();
j_str.assign_ptr(lob_locator.get_payload_ptr(), lob_locator.get_payload_length());
} else {
j_str = value->get_string();
}
if ((ob_is_string_type(doc_type))
&& (doc_cs_type != CS_TYPE_BINARY)
&& (ObCharset::charset_type_by_coll(doc_cs_type) != CHARSET_UTF8MB4)) {
if (OB_FAIL(ObJsonExprHelper::get_json_or_str_data(MY_SPEC.value_expr_,eval_ctx_,
jt_ctx_.row_alloc_, j_str, is_null))) {
LOG_WARN("get real data failed", K(ret));
} else if (is_null) {
ret = OB_ITER_END;
} else if ((ob_is_string_type(doc_type) || doc_type == ObLobType)
&& (doc_cs_type != CS_TYPE_BINARY)
&& (ObCharset::charset_type_by_coll(doc_cs_type) != CHARSET_UTF8MB4)) {
// need convert to utf8 first, we are using GenericInsituStringStream<UTF8<> >
char *buf = NULL;
char *buf = nullptr;
const int64_t factor = 2;
int64_t buf_len = value->get_string().length() * factor;
int64_t buf_len = j_str.length() * factor;
uint32_t result_len = 0;
buf = reinterpret_cast<char*>(jt_ctx_.row_alloc_.alloc(buf_len));
if (OB_ISNULL(buf)) {
if (OB_ISNULL(buf = static_cast<char*>(jt_ctx_.row_alloc_.alloc(buf_len)))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("alloc memory failed", K(ret));
} else if (OB_FAIL(ObCharset::charset_convert(doc_cs_type, j_str.ptr(),
@ -2702,10 +2711,6 @@ int ObJsonTableOp::inner_get_next_row()
bool is_ensure_json = (doc_type == ObJsonType);
if (OB_FAIL(ret)) {
} else if (!((doc_type == ObLobType || doc_type == ObJsonType || ob_is_string_type(doc_type))
&& doc_type != ObNCharType)) {
ret = OB_ERR_INPUT_JSON_TABLE;
LOG_WARN("fail to get json base", K(ret), K(j_in_type));
} else if (OB_FAIL(ObJsonBaseFactory::get_json_base(&jt_ctx_.row_alloc_, j_str, j_in_type, expect_type, in_, parse_flag))
|| (in_->json_type() != ObJsonNodeType::J_ARRAY && in_->json_type() != ObJsonNodeType::J_OBJECT)) {
if (OB_FAIL(ret) || (!is_ensure_json)) {

View File

@ -18,6 +18,7 @@
#include "sql/code_generator/ob_static_engine_cg.h"
#include "storage/blocksstable/encoding/ob_encoding_query_util.h"
#include "storage/blocksstable/ob_datum_row.h"
#include "sql/engine/expr/ob_expr_lob_utils.h"
namespace oceanbase
{
@ -1078,9 +1079,13 @@ int ObBlackFilterExecutor::filter(ObObj *objs, int64_t col_cnt, bool &filtered)
ObEvalCtx &eval_ctx = op_.get_eval_ctx();
for (int64_t i = 0; OB_SUCC(ret) && i < filter_.column_exprs_.count(); ++i) {
filter_.column_exprs_.at(i)->get_eval_info(eval_ctx).projected_ = true;
ObDatum &expr_datum = filter_.column_exprs_.at(i)->locate_datum_for_write(eval_ctx);
ObExpr * const &expr = filter_.column_exprs_.at(i);
ObDatum &expr_datum = expr->locate_datum_for_write(eval_ctx);
if (OB_FAIL(expr_datum.from_obj(objs[i]))) {
LOG_WARN("Failed to convert object from datum", K(ret), K(objs[i]));
} else if (is_lob_storage(objs[i].get_type()) &&
OB_FAIL(ob_adjust_lob_datum(objs[i], expr->obj_meta_, allocator_, expr_datum))) {
LOG_WARN("adjust lob datum failed", K(ret), K(objs[i]), K(expr->obj_meta_));
}
}
if (OB_SUCC(ret) && OB_FAIL(filter(eval_ctx, filtered))) {

View File

@ -14,6 +14,7 @@
#include "ob_values_op.h"
#include "sql/engine/ob_exec_context.h"
#include "sql/engine/expr/ob_expr_lob_utils.h"
namespace oceanbase
{
@ -80,6 +81,10 @@ int ObValuesOp::inner_get_next_row()
LOG_WARN("type mismatch", K(ret), K(i), K(cell.get_type()), K(*expr));
} else if (OB_FAIL(datum.from_obj(cell, expr->obj_datum_map_))) {
LOG_WARN("convert obj to datum failed", K(ret));
} else if (is_lob_storage(cell.get_type()) &&
OB_FAIL(ob_adjust_lob_datum(cell, expr->obj_meta_, expr->obj_datum_map_,
get_exec_ctx().get_allocator(), datum))) {
LOG_WARN("adjust lob datum failed", K(ret), K(cell.get_meta()), K(expr->obj_meta_));
} else {
expr->set_evaluated_projected(eval_ctx_);
}