Files
oceanbase/src/sql/engine/expr/ob_expr_json_extract.cpp

329 lines
12 KiB
C++

/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
// This file contains implementation for json_extract.
#define USING_LOG_PREFIX SQL_ENG
#include "ob_expr_json_extract.h"
#include "ob_expr_json_func_helper.h"
#include "lib/json_type/ob_json_tree.h"
using namespace oceanbase::common;
using namespace oceanbase::sql;
namespace oceanbase
{
namespace sql
{
ObExprJsonExtract::ObExprJsonExtract(ObIAllocator &alloc)
: ObFuncExprOperator(alloc, T_FUN_SYS_JSON_EXTRACT, N_JSON_EXTRACT, MORE_THAN_ONE, NOT_ROW_DIMENSION)
{
}
ObExprJsonExtract::~ObExprJsonExtract()
{
}
int ObExprJsonExtract::calc_result_typeN(ObExprResType& type,
ObExprResType* types_stack,
int64_t param_num,
ObExprTypeCtx& type_ctx) const
{
UNUSED(type_ctx);
int ret = OB_SUCCESS;
if (OB_UNLIKELY(param_num < 2)) {
ret = OB_ERR_PARAM_SIZE;
LOG_WARN("invalid argument number", K(ret), K(param_num));
} else {
// 1st param is json doc
ObObjType in_type = types_stack[0].get_type();
bool is_null_result = false;
if (OB_FAIL(ObJsonExprHelper::is_valid_for_json(types_stack, 0, N_JSON_EXTRACT))) {
LOG_WARN("wrong type for json doc.", K(ret), K(in_type));
} else if (in_type == ObNullType) {
is_null_result = true;
} else if (in_type == ObJsonType) {
// do nothing
} else if (ob_is_string_type(in_type) && types_stack[0].get_collation_type() != CS_TYPE_BINARY) {
if (types_stack[0].get_charset_type() != CHARSET_UTF8MB4) {
types_stack[0].set_calc_collation_type(CS_TYPE_UTF8MB4_BIN);
}
}
// following params are path strings
for (int64_t i = 1; i < param_num && OB_SUCC(ret); i++) {
if (types_stack[i].get_type() == ObNullType) {
is_null_result = true;
} else if (ob_is_string_type(types_stack[i].get_type())) {
if (types_stack[i].get_charset_type() != CHARSET_UTF8MB4) {
types_stack[i].set_calc_collation_type(CS_TYPE_UTF8MB4_BIN);
}
} else {
types_stack[i].set_calc_type(ObLongTextType);
types_stack[i].set_calc_collation_type(CS_TYPE_UTF8MB4_BIN);
}
}
if (OB_SUCC(ret)) {
type.set_json();
type.set_length((ObAccuracy::DDL_DEFAULT_ACCURACY[ObJsonType]).get_length());
}
}
return ret;
}
int ObExprJsonExtract::calc_resultN(ObObj &result, const ObObj *objs,
int64_t param_num, ObExprCtx &expr_ctx) const
{
int ret = OB_SUCCESS;
ObIAllocator *allocator = expr_ctx.calc_buf_;
ObIJsonBase *j_base = NULL;
bool is_null_result = (get_result_type().get_type() == ObNullType);
bool may_match_many = (param_num> 2);
if (is_null_result) {
// do nothing;
} else if (result_type_.get_collation_type() != CS_TYPE_UTF8MB4_BIN) {
ret = OB_ERR_INVALID_JSON_CHARSET;
LOG_WARN("invalid out put charset", K(ret), K(result_type_));
} else if (objs[0].is_null()) {
is_null_result = true; // mysql return NULL result
} else if (objs[0].get_type() != ObJsonType && ob_is_string_type(objs[0].get_type()) == false) {
ret = OB_ERR_INVALID_TYPE_FOR_JSON;
LOG_WARN("input type error", K(objs[0].get_type()));
} else if (OB_FAIL(ObJsonExprHelper::ensure_collation(objs[0].get_type(),
objs[0].get_collation_type()))) {
LOG_WARN("fail to ensure collation", K(ret), K(objs[0].get_type()), K(objs[0].get_collation_type()));
} else {
ObString j_str = objs[0].get_string();
ObJsonInType j_in_type = ObJsonExprHelper::get_json_internal_type(objs[0].get_type());
if (OB_FAIL(ObJsonBaseFactory::get_json_base(allocator, j_str, j_in_type, j_in_type, j_base))) {
LOG_WARN("fail to get json base", K(ret), K(j_in_type));
ret = OB_ERR_INVALID_JSON_TEXT;
}
}
if (is_null_result) {
// do nothing
} else if (OB_UNLIKELY(OB_FAIL(ret))) {
if (ret == OB_ERR_INVALID_TYPE_FOR_JSON) {
LOG_USER_ERROR(OB_ERR_INVALID_TYPE_FOR_JSON, 1, "json_extract");
} else if (ret == OB_ERR_INVALID_JSON_CHARSET) {
} else {
ret = OB_ERR_INVALID_JSON_TEXT_IN_PARAM;
LOG_USER_ERROR(OB_ERR_INVALID_JSON_TEXT_IN_PARAM);
}
LOG_WARN("fail to handle json param 0 in json extract in old sql engine", K(ret));
} else {
ObJsonBaseVector hit;
ObJsonPathCache ctx_cache(allocator);
ObJsonPathCache *path_cache = &ctx_cache;
for (int64_t i = 1; OB_SUCC(ret) && (!is_null_result) && i < param_num; i++) {
if (objs[i].get_type() == ObNullType) {
is_null_result = true;
} else {
ObString path_text = objs[i].get_string();
ObJsonPath *j_path = NULL;
if (OB_FAIL(ObJsonExprHelper::find_and_add_cache(path_cache, j_path, path_text, i, true))) {
LOG_WARN("parse text to path failed", K(path_text), K(ret));
} else if (OB_FAIL(j_base->seek(*j_path, j_path->path_node_cnt(), true, false, hit))) {
LOG_WARN("json seek failed", K(path_text), K(ret));
} else {
if (j_path->can_match_many()) {
may_match_many = true;
}
}
}
}
int32_t hit_size = hit.size();
ObJsonArray j_arr_res(allocator);
ObIJsonBase *jb_res = NULL;
if (OB_UNLIKELY(OB_FAIL(ret))) {
LOG_WARN("json seek failed", K(ret));
} else if (hit_size == 0 || is_null_result) {
result.set_null();
} else {
if (hit_size == 1 && (may_match_many == false)) {
jb_res = hit[0];
} else {
jb_res = &j_arr_res;
ObJsonNode *j_node = NULL;
ObIJsonBase *jb_node = NULL;
for (int32_t i = 0; OB_SUCC(ret) && i < hit_size; i++) {
if (OB_FAIL(ObJsonBaseFactory::transform(allocator, hit[i],
ObJsonInType::JSON_TREE, jb_node))) { // to tree
LOG_WARN("fail to transform to tree", K(ret), K(i), K(*(hit[i])));
} else { // is_tree, need deep copy, cause array append will change parent of value.
j_node = static_cast<ObJsonNode *>(jb_node);
if (OB_FAIL(jb_res->array_append(j_node->clone(allocator)))) {
LOG_WARN("result array append failed", K(ret), K(i), K(*j_node));
}
}
}
}
ObString raw_bin;
if (OB_FAIL(ret)) {
LOG_WARN("json extarct get results failed", K(ret));
} else if (OB_FAIL(jb_res->get_raw_binary(raw_bin, allocator))) {
LOG_WARN("json extarct get result binary failed", K(ret));
} else {
result.set_collation_type(CS_TYPE_UTF8MB4_BIN);
result.set_string(ObJsonType, raw_bin.ptr(), raw_bin.length());
}
}
}
return ret;
}
int ObExprJsonExtract::eval_json_extract_null(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res)
{
UNUSED(expr);
UNUSED(ctx);
res.set_null();
return OB_SUCCESS;
}
int ObExprJsonExtract::eval_json_extract(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res)
{
int ret = OB_SUCCESS;
ObDatum *json_datum = NULL;
ObExpr *json_arg = expr.args_[0];
ObObjType val_type = json_arg->datum_meta_.type_;
ObCollationType cs_type = json_arg->datum_meta_.cs_type_;
ObIJsonBase *j_base = NULL;
bool is_null_result = false;
bool may_match_many = (expr.arg_cnt_ > 2);
common::ObArenaAllocator &allocator = ctx.get_reset_tmp_alloc();
if (expr.datum_meta_.cs_type_ != CS_TYPE_UTF8MB4_BIN) {
ret = OB_ERR_INVALID_JSON_CHARSET;
LOG_WARN("invalid out put charset", K(ret), K(expr.datum_meta_.cs_type_));
} else if (OB_UNLIKELY(OB_FAIL(json_arg->eval(ctx, json_datum)))) {
LOG_WARN("eval json arg failed", K(ret));
} else if (json_datum->is_null()) {
is_null_result = true; // mysql return NULL result
} else if (val_type != ObJsonType && ob_is_string_type(val_type) == false) {
ret = OB_ERR_INVALID_TYPE_FOR_JSON;
LOG_WARN("input type error", K(val_type));
} else if (OB_FAIL(ObJsonExprHelper::ensure_collation(val_type, cs_type))) {
LOG_WARN("fail to ensure collation", K(ret), K(val_type), K(cs_type));
} else {
ObString j_str = json_datum->get_string();
ObJsonInType j_in_type = ObJsonExprHelper::get_json_internal_type(val_type);
if (OB_FAIL(ObJsonBaseFactory::get_json_base(&allocator, j_str, j_in_type, j_in_type, j_base))) {
LOG_WARN("fail to get json base", K(ret), K(j_in_type));
ret = OB_ERR_INVALID_JSON_TEXT;
}
}
if (OB_UNLIKELY(OB_FAIL(ret))) {
if (ret == OB_ERR_INVALID_TYPE_FOR_JSON) {
LOG_USER_ERROR(OB_ERR_INVALID_TYPE_FOR_JSON, 1, "json_extract");
} else if (ret == OB_ERR_INVALID_JSON_CHARSET) {
} else {
ret = OB_ERR_INVALID_JSON_TEXT_IN_PARAM;
LOG_USER_ERROR(OB_ERR_INVALID_JSON_TEXT_IN_PARAM);
}
LOG_WARN("fail to handle json param 0 in json extract in new sql engine", K(ret));
} else if (is_null_result == false) {
ObJsonBaseVector hit;
ObJsonPathCache ctx_cache(&allocator);
ObJsonPathCache* path_cache = ObJsonExprHelper::get_path_cache_ctx(expr.expr_ctx_id_, &ctx.exec_ctx_);
path_cache = ((path_cache != NULL) ? path_cache : &ctx_cache);
for (int64_t i = 1; OB_SUCC(ret) && (!is_null_result) && i < expr.arg_cnt_; i++) {
ObDatum *path_data = NULL;
if (OB_FAIL(expr.args_[i]->eval(ctx, path_data))) {
LOG_WARN("eval json path datum failed", K(ret));
} else if (path_data->is_null()) {
is_null_result = true;
} else {
ObString path_text = path_data->get_string();
ObJsonPath *j_path = NULL;
if (OB_FAIL(ObJsonExprHelper::find_and_add_cache(path_cache, j_path, path_text, i, true))) {
LOG_WARN("parse text to path failed", K(path_data->get_string()), K(ret));
} else if (OB_FAIL(j_base->seek(*j_path, j_path->path_node_cnt(), true, false, hit))) {
LOG_WARN("json seek failed", K(path_data->get_string()), K(ret));
} else {
if (j_path->can_match_many()) {
may_match_many = true;
}
}
}
}
int32_t hit_size = hit.size();
ObJsonArray j_arr_res(&allocator);
ObIJsonBase *jb_res = NULL;
if (OB_UNLIKELY(OB_FAIL(ret))) {
LOG_WARN("json seek failed", K(ret));
} else if (hit_size == 0 || is_null_result) {
res.set_null();
} else {
if (hit_size == 1 && (may_match_many == false)) {
jb_res = hit[0];
} else {
jb_res = &j_arr_res;
ObJsonNode *j_node = NULL;
ObIJsonBase *jb_node = NULL;
for (int32_t i = 0; OB_SUCC(ret) && i < hit_size; i++) {
if (OB_FAIL(ObJsonBaseFactory::transform(&allocator, hit[i],
ObJsonInType::JSON_TREE, jb_node))) { // to tree
LOG_WARN("fail to transform to tree", K(ret), K(i), K(*(hit[i])));
} else { // is_tree, need deep copy, cause array append will change parent of value.
j_node = static_cast<ObJsonNode *>(jb_node);
if (OB_FAIL(jb_res->array_append(j_node->clone(&allocator)))) {
LOG_WARN("result array append failed", K(ret), K(i), K(*j_node));
}
}
}
}
ObString raw_str;
if (OB_FAIL(ret)) {
LOG_WARN("json extarct get results failed", K(ret));
} else if (OB_FAIL(jb_res->get_raw_binary(raw_str, &allocator))) {
LOG_WARN("json extarct get result binary failed", K(ret));
} else {
char *buf = expr.get_str_res_mem(ctx, raw_str.length());
if (OB_UNLIKELY(buf == NULL)){
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("allocate memory for result failed", K(raw_str.length()), K(ret));
} else {
MEMCPY(buf, raw_str.ptr(), raw_str.length());
res.set_string(buf, raw_str.length());
}
}
}
} else if (OB_SUCC(ret) && is_null_result) {
res.set_null();
}
return ret;
}
int ObExprJsonExtract::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr,
ObExpr &rt_expr) const
{
UNUSED(expr_cg_ctx);
UNUSED(raw_expr);
if (rt_expr.datum_meta_.type_ == ObNullType) {
rt_expr.eval_func_ = eval_json_extract_null;
} else {
rt_expr.eval_func_ = eval_json_extract;
}
return OB_SUCCESS;
}
}
}